In [4]:
import os 
import pandas as pd 


root = '../results'

# folders = {
#     'phi3-4k-base-0shot-samples-finetuned-api': 'base-0shot',
#     'phi3-4k-snomedct-0shot-samples-finetuned-api': 'snomedct-0shot',
#     'phi3-4k-pmc-0shot-samples-finetuned-api': 'pmc-0shot',
#     'phi3-4k-wikiterms-0shot-samples-finetuned-api': 'wikiterms-0shot',
# }

folders = {
    'phi3-4k-base-0shot-samples-finetuned': '_base-0shot',
    'phi3-4k-snomedct-0shot-samples-finetuned': 'snomedct-0shot',
    'phi3-4k-pmc-0shot-samples-finetuned': 'pmc-0shot',
    'phi3-4k-wikiterms-0shot-samples-finetuned-api': 'wikiterms-0shot',
    'phi3-4k-drugbank-0shot-samples-finetuned': 'drugbank-0shot',
    'phi3-4k-knowchem-0shot-samples-finetuned': 'knowchem-0shot',
    'phi3-4k-medlineplus-0shot-samples-finetuned': 'medlineplus-0shot',
    'phi3-4k-nice-0shot-samples-finetuned': 'nice-0shot',
    'phi3-4k-pathbank-0shot-samples-finetuned': 'pathbank-0shot',
    'KnowMedPhi3-mini-0shot-samples-finetuned': 'KnowMedPhi3-mini-0shot',
}

folders = {
    'phi3-4k-base-0shot-samples-finetuned': 'Phi3-base',
    'phi3-4k-snomedct-0shot-samples-finetuned': 'Phi3-SNOMED',
    'phi3-4k-pmc-0shot-samples-finetuned': 'Phi3-PMC',
    'phi3-4k-wikiterms-0shot-samples-finetuned-api': 'Phi3-Wikiterms',
    'phi3-4k-drugbank-0shot-samples-finetuned': 'Phi3-Drugbank',
    'phi3-4k-knowchem-0shot-samples-finetuned': 'Phi3-Knowchem',
    'phi3-4k-medlineplus-0shot-samples-finetuned': 'Phi3-Medlineplus',
    'phi3-4k-nice-0shot-samples-finetuned': 'Phi3-NICE',
    'phi3-4k-pathbank-0shot-samples-finetuned': 'Phi3-Pathbank',
    'KnowMedPhi3-mini-0shot-samples-finetuned': 'KnowMedPhi3-mini',
}

In [5]:
for k, v in folders.items():
    print(k)

phi3-4k-base-0shot-samples-finetuned
phi3-4k-snomedct-0shot-samples-finetuned
phi3-4k-pmc-0shot-samples-finetuned
phi3-4k-wikiterms-0shot-samples-finetuned-api
phi3-4k-drugbank-0shot-samples-finetuned
phi3-4k-knowchem-0shot-samples-finetuned
phi3-4k-medlineplus-0shot-samples-finetuned
phi3-4k-nice-0shot-samples-finetuned
phi3-4k-pathbank-0shot-samples-finetuned
KnowMedPhi3-mini-0shot-samples-finetuned


In [6]:
for folder, dataset in folders.items():
    # find files ended with muc.csv
    muc_files = [os.path.join(root, folder, file) for file in os.listdir(os.path.join(root, folder)) if file.endswith('muc.csv')]

In [7]:
muc_files

['../results/KnowMedPhi3-mini-0shot-samples-finetuned/BC5CDR-chemical_test_muc.csv',
 '../results/KnowMedPhi3-mini-0shot-samples-finetuned/BC2GM_test_muc.csv',
 '../results/KnowMedPhi3-mini-0shot-samples-finetuned/NCBI-disease_test_muc.csv']

In [8]:
bc5cdr = pd.read_csv(muc_files[0])
bc2gm = pd.read_csv(muc_files[1])
ncbi = pd.read_csv(muc_files[2])

In [9]:
# get strict scores of all tree datasets
strict_score = pd.concat([bc5cdr.loc[0], bc2gm.loc[0], ncbi.loc[0]], axis=1)
strict_score

Unnamed: 0,0,0.1,0.2
Unnamed: 0,strict,strict,strict
precision,90.567,84.132,90.992
recall,89.008,84.622,90.858
f1_score,89.148,83.965,90.508
count,200,200,200


In [10]:
# transpose
strict_score = strict_score.T

In [11]:
# replace the first column with dataset names
strict_score = strict_score.rename(columns={'Unnamed: 0': 'type'})
strict_score['dataset'] = ['BC5CDR', 'BC2GM', 'NCBI']

# reset index
strict_score = strict_score.reset_index(drop=True)

strict_score

Unnamed: 0,type,precision,recall,f1_score,count,dataset
0,strict,90.567,89.008,89.148,200,BC5CDR
1,strict,84.132,84.622,83.965,200,BC2GM
2,strict,90.992,90.858,90.508,200,NCBI


In [12]:
def get_scores(muc_files, model, type='strict'):
    bc5cdr = pd.read_csv(muc_files[0])
    bc2gm = pd.read_csv(muc_files[1])
    ncbi = pd.read_csv(muc_files[2])

    if type == 'strict':
        row_idx = 0
    elif type == 'partial':
        row_idx = 2

    score = pd.concat([bc5cdr.loc[row_idx], bc2gm.loc[row_idx], ncbi.loc[row_idx]], axis=1)

    # transpose
    score = score.T

    # replace the first column with dataset names
    score = score.rename(columns={'Unnamed: 0': 'type'})
    score['dataset'] = ['BC5CDR', 'BC2GM', 'NCBI']

    # add column for model name
    score['model'] = model

    return score


In [13]:
get_scores(muc_files, 'phi3-4k-base-0shot-samples-finetuned-api', 'strict')

Unnamed: 0,type,precision,recall,f1_score,count,dataset,model
0,strict,90.567,89.008,89.148,200,BC5CDR,phi3-4k-base-0shot-samples-finetuned-api
0,strict,84.132,84.622,83.965,200,BC2GM,phi3-4k-base-0shot-samples-finetuned-api
0,strict,90.992,90.858,90.508,200,NCBI,phi3-4k-base-0shot-samples-finetuned-api


In [14]:
get_scores(muc_files, 'phi3-4k-base-0shot-samples-finetuned-api', 'partial')

Unnamed: 0,type,precision,recall,f1_score,count,dataset,model
2,partial,90.929,89.621,89.594,200,BC5CDR,phi3-4k-base-0shot-samples-finetuned-api
2,partial,88.915,89.05,88.473,200,BC2GM,phi3-4k-base-0shot-samples-finetuned-api
2,partial,92.042,92.087,91.622,200,NCBI,phi3-4k-base-0shot-samples-finetuned-api


In [15]:
# Put all together
df_out = pd.DataFrame()
for folder, dataset in folders.items():
    # find files ended with muc.csv
    muc_files = [os.path.join(root, folder, file) for file in os.listdir(os.path.join(root, folder)) if file.endswith('muc.csv')]

    strict_score = get_scores(muc_files, dataset, 'strict')

    partial_score = get_scores(muc_files, dataset, 'partial')

    df_out = pd.concat([df_out, strict_score, partial_score])


In [16]:
df_out


Unnamed: 0,type,precision,recall,f1_score,count,dataset,model
0,strict,88.733,88.158,87.821,200,BC5CDR,Phi3-base
0,strict,81.975,82.978,81.918,200,BC2GM,Phi3-base
0,strict,80.267,81.75,80.518,200,NCBI,Phi3-base
2,partial,90.171,89.804,89.317,200,BC5CDR,Phi3-base
2,partial,86.272,88.147,86.492,200,BC2GM,Phi3-base
2,partial,84.558,86.083,84.802,200,NCBI,Phi3-base
0,strict,90.083,88.492,88.689,200,BC5CDR,Phi3-SNOMED
0,strict,82.408,82.681,82.171,200,BC2GM,Phi3-SNOMED
0,strict,88.917,89.075,88.542,200,NCBI,Phi3-SNOMED
2,partial,90.604,89.304,89.31,200,BC5CDR,Phi3-SNOMED


In [17]:
# only strict
df_out[df_out['type'] == 'strict']

Unnamed: 0,type,precision,recall,f1_score,count,dataset,model
0,strict,88.733,88.158,87.821,200,BC5CDR,Phi3-base
0,strict,81.975,82.978,81.918,200,BC2GM,Phi3-base
0,strict,80.267,81.75,80.518,200,NCBI,Phi3-base
0,strict,90.083,88.492,88.689,200,BC5CDR,Phi3-SNOMED
0,strict,82.408,82.681,82.171,200,BC2GM,Phi3-SNOMED
0,strict,88.917,89.075,88.542,200,NCBI,Phi3-SNOMED
0,strict,90.208,88.767,88.842,200,BC5CDR,Phi3-PMC
0,strict,85.566,85.05,84.913,200,BC2GM,Phi3-PMC
0,strict,86.792,86.157,86.029,200,NCBI,Phi3-PMC
0,strict,89.958,71.769,77.79,200,BC5CDR,Phi3-Wikiterms


In [20]:
# sort by dataset
df_out[df_out['type'] == 'strict'].sort_values(by=['dataset', 'model']).reset_index(drop=True)

Unnamed: 0,type,precision,recall,f1_score,count,dataset,model
0,strict,84.132,84.622,83.965,200,BC2GM,KnowMedPhi3-mini
1,strict,84.449,84.533,84.203,200,BC2GM,Phi3-Drugbank
2,strict,82.799,82.978,82.51,200,BC2GM,Phi3-Knowchem
3,strict,85.891,85.381,85.322,200,BC2GM,Phi3-Medlineplus
4,strict,84.954,84.781,84.523,200,BC2GM,Phi3-NICE
5,strict,85.566,85.05,84.913,200,BC2GM,Phi3-PMC
6,strict,84.278,85.153,84.323,200,BC2GM,Phi3-Pathbank
7,strict,82.408,82.681,82.171,200,BC2GM,Phi3-SNOMED
8,strict,84.983,69.156,74.51,200,BC2GM,Phi3-Wikiterms
9,strict,81.975,82.978,81.918,200,BC2GM,Phi3-base


In [19]:
#  partial
df_out[df_out['type'] == 'partial'].sort_values(by=['dataset', 'model'])

Unnamed: 0,type,precision,recall,f1_score,count,dataset,model
2,partial,88.915,89.05,88.473,200,BC2GM,KnowMedPhi3-mini
2,partial,89.224,88.863,88.677,200,BC2GM,Phi3-Drugbank
2,partial,87.928,87.731,87.386,200,BC2GM,Phi3-Knowchem
2,partial,90.564,89.8,89.82,200,BC2GM,Phi3-Medlineplus
2,partial,89.632,89.238,89.026,200,BC2GM,Phi3-NICE
2,partial,89.589,88.794,88.745,200,BC2GM,Phi3-PMC
2,partial,88.42,89.119,88.329,200,BC2GM,Phi3-Pathbank
2,partial,87.237,87.312,86.799,200,BC2GM,Phi3-SNOMED
2,partial,86.158,70.212,75.612,200,BC2GM,Phi3-Wikiterms
2,partial,86.272,88.147,86.492,200,BC2GM,Phi3-base
