In [1]:
!pip install jaro-winkler
!pip install scikit-learn
!pip install torcheval



In [18]:
import torch
from torcheval.metrics.functional import binary_f1_score, binary_precision, binary_recall
import json
from jaro import jaro_winkler_metric
from sklearn.metrics import roc_curve, roc_auc_score

import numpy as np
import matplotlib.pyplot as plt

def to_binary(preds: list[tuple], gt: list[tuple]) -> tuple[list[int], list[int]]:
    gt = set(gt)
    output = []
    target = []
    for pred in preds:
        if pred in gt:
            # real triplet (true positive)
            gt.remove(pred)
            target.append(1)
        else:
            # fake triplet (false positive)
            target.append(0)

        # positive
        output.append(1)

    # unextracted triplets (false negatives)
    target.extend([1 for item in gt])
    output.extend([0 for item in gt])

    return output, target

def f1(preds: list[list[tuple[str]]], gt: list[list[tuple[str]]]) -> list:
    # N = len(gt)
    N = min(len(gt), len(preds))
    scores = []

    for i in range(N):
        # print("preds", preds[i])
        # print("gt", gt[i])
        output, target = to_binary(preds[i], gt[i])
        output, target = torch.tensor(output), torch.tensor(target)
        # print("out", output)
        # print("target", target)
        scores.append(float(binary_f1_score(output, target)))

    return scores

def precision(preds: list[list[tuple[str]]], gt: list[list[tuple[str]]]) -> list:
    # N = len(gt)
    N = min(len(gt), len(preds))
    scores = []

    for i in range(N):
        output, target = to_binary(preds[i], gt[i])
        output, target = torch.tensor(output), torch.tensor(target)
        scores.append(float(binary_precision(output, target)))

    return scores

def recall(preds: list[list[tuple[str]]], gt: list[list[tuple[str]]]) -> list:
    # N = len(gt)
    N = min(len(gt), len(preds))
    scores = []

    for i in range(N):
        output, target = to_binary(preds[i], gt[i])
        output, target = torch.tensor(output), torch.tensor(target)
        scores.append(float(binary_recall(output, target)))

    return scores

def jaro_winkler(preds: list[list[tuple[str]]], gt: list[list[tuple[str]]]) -> list:
    # N = len(gt)
    N = min(len(gt), len(preds))
    scores = []

    for i in range(N):
        output = json.dumps(preds[i])
        target = json.dumps(gt[i])
        scores.append(jaro_winkler_metric(output, target))

    return scores

In [25]:
import json
import statistics
from tqdm import tqdm
from json import JSONDecodeError
import re

def extract_tags(text):
    # Regular expression to match substrings enclosed within <>
    pattern = r'<([^<]*?)>(.*?)<\/\1>'

    # Find all tagged substrings
    tagged_substrings = re.findall(pattern, text)

    # Return each tag and content separately
    tagged_lists = [[tag, content] for tag, content in tagged_substrings]

    return tagged_lists

# def extract_tags(text):
#     # Regular expression pattern to match substrings enclosed within <>
#     pattern = r'<[^<]*?>.*?<\/[^<]*?>'

#     # Find all tagged substrings
#     tagged_substrings = re.findall(pattern, text)

#     # Split each tagged substring into individual tags and content
#     tagged_lists = []
#     for substring in tagged_substrings:
#         # Extract individual tags and content
#         tags_and_content = re.findall(r'<([^>]+)>(.*?)<\/\1>', substring)
#         tagged_lists.extend(tags_and_content)

#     return tagged_lists



successful_output = 0

# Create lists
f1_score_list = []
precision_list = []
recall_list = []
jw_score_list = []
successful_output = 0
failed_output = 0

test_set_path = 'C:/Users/navde/Desktop/Bioinformatics_401_Project/Data/Tagging_50_Test.jsonl'
truth_file_path = 'C:/Users/navde/Desktop/Bioinformatics_401_Project/Data/Tagging_Ground_Truth_Bias.jsonl'

with open(test_set_path, 'r',encoding="utf-8") as infile, open(truth_file_path, "r",encoding="utf-8") as outfile:
    for line, gt_line in tqdm(zip(infile, outfile)):
        successful_output += 1
        data = json.loads(line)
        datagt = json.loads(gt_line)
        abstract = data['input']
        ground_truth = datagt['input']

        # print(extract_tags(abstract))
        # print(extract_tags(ground_truth))

        # Get scores
        f1_score = f1(extract_tags(abstract), extract_tags(ground_truth))
        precision_value = precision(extract_tags(abstract), extract_tags(ground_truth))
        recall_val = recall(extract_tags(abstract), extract_tags(ground_truth))
        jw_score = jaro_winkler(extract_tags(abstract), extract_tags(ground_truth))

        #Increment variables
        f1_score_list.extend(f1_score)
        precision_list.extend(precision_value)
        recall_list.extend(recall_val)
        jw_score_list.extend(jw_score)

# Calculate statistics for each list
for name, score_list in [("F1 Score", f1_score_list), ("Precision", precision_list), ("Recall", recall_list), ("Jaro-Winkler Score", jw_score_list)]:
    avg_score = np.mean(score_list)
    min_score = np.min(score_list)
    max_score = np.max(score_list) 
    std_dev = np.std(score_list) # Std. dev. requires at least 2 data points

    # Print statistics
    print(f"{name}:")
    print(f"  Average: {avg_score}")
    print(f"  Minimum: {min_score}")
    print(f"  Maximum: {max_score}")
    print(f"  Standard Deviation: {std_dev}")

# Print other information
total_outputs = successful_output + failed_output
print(f"Successful Outputs: {successful_output}")
print(f"Failed Outputs: {failed_output}")
print(f"Total Outputs: {total_outputs}")

# Write results to a file
with open("Tagger/results_of_tagger_bias_2.txt", "w") as f:
    for name, score_list in [("F1 Score", f1_score_list), ("Precision", precision_list), ("Recall", recall_list), ("Jaro-Winkler Score", jw_score_list)]:
        avg_score = np.mean(score_list)
        min_score = np.min(score_list)
        max_score = np.max(score_list) 
        std_dev = np.std(score_list) #if len(score_list) > 1 else 0  # Std. dev. requires at least 2 data points

        # Write statistics to file
        f.write(f"{name}:\n")
        f.write(f"  Average: {avg_score}\n")
        f.write(f"  Minimum: {min_score}\n")
        f.write(f"  Maximum: {max_score}\n")
        f.write(f"  Standard Deviation: {std_dev}\n")

    # Write other information
    total_outputs = successful_output + failed_output
    f.write(f"Successful Outputs: {successful_output}\n")
    f.write(f"Failed Outputs: {failed_output}\n")
    f.write(f"Total Outputs: {total_outputs}\n")

print("Results have been written to results_of_tagger.txt.")


50it [00:00, 120.89it/s]

F1 Score:
  Average: 0.6889447236180904
  Minimum: 0.0
  Maximum: 1.0
  Standard Deviation: 0.4358073337157155
Precision:
  Average: 0.6889447236180904
  Minimum: 0.0
  Maximum: 1.0
  Standard Deviation: 0.4358073337157155
Recall:
  Average: 0.6889447236180904
  Minimum: 0.0
  Maximum: 1.0
  Standard Deviation: 0.4358073337157155
Jaro-Winkler Score:
  Average: 0.8726224245036122
  Minimum: 0.41516582153057063
  Maximum: 1.0
  Standard Deviation: 0.1832049838217127
Successful Outputs: 50
Failed Outputs: 0
Total Outputs: 50





FileNotFoundError: [Errno 2] No such file or directory: 'Tagger/results_of_tagger_bias_2.txt'