In [57]:
import os 
import pandas as pd 


root = '../results'

folders = {
    'phi3-4k-base-0shot-samples-finetuned-api': 'base-0shot',
    'phi3-4k-snomedct-0shot-samples-finetuned-api': 'snomedct-0shot',
    'phi3-4k-pmc-0shot-samples-finetuned-api': 'pmc-0shot',
    'phi3-4k-wikiterms-0shot-samples-finetuned-api': 'wikiterms-0shot',
}


In [60]:
for k, v in folders.items():
    print(k)

phi3-4k-base-0shot-samples-finetuned-api
phi3-4k-snomedct-0shot-samples-finetuned-api
phi3-4k-pmc-0shot-samples-finetuned-api
phi3-4k-wikiterms-0shot-samples-finetuned-api


In [61]:
for folder, dataset in folders.items():
    # find files ended with muc.csv
    muc_files = [os.path.join(root, folder, file) for file in os.listdir(os.path.join(root, folder)) if file.endswith('muc.csv')]

In [62]:
muc_files

['../results/phi3-4k-wikiterms-0shot-samples-finetuned-api/BC5CDR-chemical_test_muc.csv',
 '../results/phi3-4k-wikiterms-0shot-samples-finetuned-api/BC2GM_test_muc.csv',
 '../results/phi3-4k-wikiterms-0shot-samples-finetuned-api/NCBI-disease_test_muc.csv']

In [34]:
bc5cdr = pd.read_csv(muc_files[0])
bc2gm = pd.read_csv(muc_files[1])
ncbi = pd.read_csv(muc_files[2])

In [35]:
# get strict scores of all tree datasets
strict_score = pd.concat([bc5cdr.loc[0], bc2gm.loc[0], ncbi.loc[0]], axis=1)
strict_score

Unnamed: 0,0,0.1,0.2
Unnamed: 0,strict,strict,strict
precision,89.958,84.983,88.7
recall,71.769,69.156,73.312
f1_score,77.79,74.51,78.335
count,200,200,200


In [36]:
# transpose
strict_score = strict_score.T

In [37]:
# replace the first column with dataset names
strict_score = strict_score.rename(columns={'Unnamed: 0': 'type'})
strict_score['dataset'] = ['BC5CDR', 'BC2GM', 'NCBI']

# reset index
strict_score = strict_score.reset_index(drop=True)

strict_score

Unnamed: 0.1,Unnamed: 0,precision,recall,f1_score,count,dataset
0,strict,89.958,71.769,77.79,200,BC5CDR
1,strict,84.983,69.156,74.51,200,BC2GM
2,strict,88.7,73.312,78.335,200,NCBI


In [50]:
def get_scores(muc_files, model, type='strict'):
    bc5cdr = pd.read_csv(muc_files[0])
    bc2gm = pd.read_csv(muc_files[1])
    ncbi = pd.read_csv(muc_files[2])

    if type == 'strict':
        row_idx = 0
    elif type == 'partial':
        row_idx = 2

    score = pd.concat([bc5cdr.loc[row_idx], bc2gm.loc[row_idx], ncbi.loc[row_idx]], axis=1)

    # transpose
    score = score.T

    # replace the first column with dataset names
    score = score.rename(columns={'Unnamed: 0': 'type'})
    score['dataset'] = ['BC5CDR', 'BC2GM', 'NCBI']

    # add column for model name
    score['model'] = model

    return score


In [51]:
get_scores(muc_files, 'phi3-4k-base-0shot-samples-finetuned-api', 'strict')

Unnamed: 0,type,precision,recall,f1_score,count,dataset,model
0,strict,89.958,71.769,77.79,200,BC5CDR,phi3-4k-base-0shot-samples-finetuned-api
0,strict,84.983,69.156,74.51,200,BC2GM,phi3-4k-base-0shot-samples-finetuned-api
0,strict,88.7,73.312,78.335,200,NCBI,phi3-4k-base-0shot-samples-finetuned-api


In [52]:
get_scores(muc_files, 'phi3-4k-base-0shot-samples-finetuned-api', 'partial')

Unnamed: 0,type,precision,recall,f1_score,count,dataset,model
2,partial,90.021,71.819,77.846,200,BC5CDR,phi3-4k-base-0shot-samples-finetuned-api
2,partial,86.158,70.212,75.612,200,BC2GM,phi3-4k-base-0shot-samples-finetuned-api
2,partial,89.533,74.07,79.123,200,NCBI,phi3-4k-base-0shot-samples-finetuned-api


In [63]:
# Put all together
df_out = pd.DataFrame()
for folder, dataset in folders.items():
    # find files ended with muc.csv
    muc_files = [os.path.join(root, folder, file) for file in os.listdir(os.path.join(root, folder)) if file.endswith('muc.csv')]

    strict_score = get_scores(muc_files, dataset, 'strict')

    partial_score = get_scores(muc_files, dataset, 'partial')

    df_out = pd.concat([df_out, strict_score, partial_score])





In [64]:
df_out


Unnamed: 0,type,precision,recall,f1_score,count,dataset,model
0,strict,88.983,71.724,77.506,200,BC5CDR,base-0shot
0,strict,81.527,68.732,73.027,200,BC2GM,base-0shot
0,strict,80.35,68.43,72.413,200,NCBI,base-0shot
2,partial,89.879,72.578,78.377,200,BC5CDR,base-0shot
2,partial,82.444,69.548,73.883,200,BC2GM,base-0shot
2,partial,82.142,70.117,74.143,200,NCBI,base-0shot
0,strict,90.5,72.022,78.175,200,BC5CDR,snomedct-0shot
0,strict,81.771,67.391,72.333,200,BC2GM,snomedct-0shot
0,strict,89.583,74.105,79.213,200,NCBI,snomedct-0shot
2,partial,90.5,72.022,78.175,200,BC5CDR,snomedct-0shot


In [74]:
# only strict
df_out[df_out['type'] == 'strict']

Unnamed: 0,type,precision,recall,f1_score,count,dataset,model
0,strict,88.983,71.724,77.506,200,BC5CDR,base-0shot
0,strict,81.527,68.732,73.027,200,BC2GM,base-0shot
0,strict,80.35,68.43,72.413,200,NCBI,base-0shot
0,strict,90.5,72.022,78.175,200,BC5CDR,snomedct-0shot
0,strict,81.771,67.391,72.333,200,BC2GM,snomedct-0shot
0,strict,89.583,74.105,79.213,200,NCBI,snomedct-0shot
0,strict,89.958,71.769,77.79,200,BC5CDR,pmc-0shot
0,strict,84.983,69.156,74.51,200,BC2GM,pmc-0shot
0,strict,88.7,73.312,78.335,200,NCBI,pmc-0shot
0,strict,89.958,71.769,77.79,200,BC5CDR,wikiterms-0shot


In [75]:
# sort by dataset
df_out[df_out['type'] == 'strict'].sort_values(by='dataset')

Unnamed: 0,type,precision,recall,f1_score,count,dataset,model
0,strict,81.527,68.732,73.027,200,BC2GM,base-0shot
0,strict,81.771,67.391,72.333,200,BC2GM,snomedct-0shot
0,strict,84.983,69.156,74.51,200,BC2GM,pmc-0shot
0,strict,84.983,69.156,74.51,200,BC2GM,wikiterms-0shot
0,strict,88.983,71.724,77.506,200,BC5CDR,base-0shot
0,strict,90.5,72.022,78.175,200,BC5CDR,snomedct-0shot
0,strict,89.958,71.769,77.79,200,BC5CDR,pmc-0shot
0,strict,89.958,71.769,77.79,200,BC5CDR,wikiterms-0shot
0,strict,80.35,68.43,72.413,200,NCBI,base-0shot
0,strict,89.583,74.105,79.213,200,NCBI,snomedct-0shot


In [76]:
#  partial
df_out[df_out['type'] == 'partial'].sort_values(by='dataset')

Unnamed: 0,type,precision,recall,f1_score,count,dataset,model
2,partial,82.444,69.548,73.883,200,BC2GM,base-0shot
2,partial,82.84,68.375,73.348,200,BC2GM,snomedct-0shot
2,partial,86.158,70.212,75.612,200,BC2GM,pmc-0shot
2,partial,86.158,70.212,75.612,200,BC2GM,wikiterms-0shot
2,partial,89.879,72.578,78.377,200,BC5CDR,base-0shot
2,partial,90.5,72.022,78.175,200,BC5CDR,snomedct-0shot
2,partial,90.021,71.819,77.846,200,BC5CDR,pmc-0shot
2,partial,90.021,71.819,77.846,200,BC5CDR,wikiterms-0shot
2,partial,82.142,70.117,74.143,200,NCBI,base-0shot
2,partial,90.508,74.941,80.085,200,NCBI,snomedct-0shot
