In [1]:
import json
import pandas as pd

In [2]:
# function to assess precision, recall, fscore
def assess_accuracy(label_jsonl, predict_jsonl):
    
    # import and merge into one dataframe, dropping bad ground truth rows
    label = pd.read_json(label_jsonl, lines=True)
    predict = pd.read_json(predict_jsonl, lines=True)
    merged = pd.merge(predict, label, how='inner',on=['text'])
    merged = merged[merged.answer_y=='accept']
    
    # go through and extract each word from each recipe into two dataframes, one for annotated, one for original
    label_words = pd.DataFrame()
    predict_words = pd.DataFrame()
    for index,row in merged.iterrows():
        label_data = row['spans_y']
        predict_data = row['spans_x']
        for item in label_data:
            label_words = label_words.append([[index,item]],ignore_index=True)
        for item in predict_data:
            predict_words = predict_words.append([[index,item]],ignore_index=True)
    
    # get json formatted stuff out and into its own columns
    label_words.columns = ['recipe_index','json']
    predict_words.columns = ['recipe_index','json']
    label_words = pd.concat([label_words.drop(['json'],axis=1), label_words['json'].apply(pd.Series)],axis=1)
    label_words = label_words.drop(['token_start','token_end'],axis=1)
    predict_words = pd.concat([predict_words.drop(['json'],axis=1), predict_words['json'].apply(pd.Series)],axis=1)
    
    # true positives, correctly predicted the right class
    TP_df = pd.merge(label_words,predict_words, on=['recipe_index','start','end','label'],how='inner')
    TP = len(TP_df.index)
    
    # false positives, predicted a class where there is no class, or predicted the wrong class
    label_remain = pd.concat([label_words,TP_df],axis=0).drop_duplicates(keep=False)
    predict_remain = pd.concat([predict_words,TP_df],axis=0).drop_duplicates(keep=False)
    
    dc_df = pd.concat([label_remain,predict_remain])  # different class, same start and end
    dc_df = dc_df[dc_df.duplicated(['recipe_index','start','end'], keep=False)]
    dc = len(dc_df.index)/2
    
    label_remain = pd.concat([label_remain,dc_df,dc_df],axis=0).drop_duplicates(keep=False)
    predict_remain = pd.concat([predict_remain,dc_df,dc_df],axis=0).drop_duplicates(keep=False)
    
    ss_df = pd.concat([label_remain,predict_remain])  # same start position
    ss_df = ss_df[ss_df.duplicated(['recipe_index','start'],keep=False)]
    ss = len(ss_df.index)/2
    
    label_remain = pd.concat([label_remain,ss_df,ss_df],axis=0).drop_duplicates(keep=False)
    predict_remain = pd.concat([predict_remain,ss_df,ss_df],axis=0).drop_duplicates(keep=False)
    
    se_df = pd.concat([label_remain,predict_remain])  # same end position
    se_df = se_df[se_df.duplicated(['recipe_index','end'],keep=False)]
    se = len(se_df.index)/2
    
    label_remain = pd.concat([label_remain,se_df,se_df],axis=0).drop_duplicates(keep=False)
    predict_remain = pd.concat([predict_remain,se_df,se_df],axis=0).drop_duplicates(keep=False)
    
    # predicted wrong class = predicted different class + same start but different end + same end but different start
    pwc = dc + ss + se
    
    FP = len(predict_remain.index) + pwc
    
    #false negatives, did not predict anything when should have
    FN = len(label_remain.index)
    
    
    #final calculations
    precision = TP/(TP+FP)
    recall = TP/(TP+FN)
    f1_score = 2*(recall*precision)/(recall+precision)
    
    print('precision',precision)
    print('recall',recall)
    print('f1_score',f1_score)
    
    return merged, label_words, predict_words, precision, recall, f1_score

#### original seed words

In [3]:
annotated_test = '../KH_Data/test_2_epicurious_KH_annotated.jsonl'

# original seed words
original_test = '../KH_Data/test_2_epicurious_KH_original.jsonl'

[merged, label_words, predict_words, precision, recall, f1_score] = assess_accuracy(annotated_test, original_test)

precision 0.814218009478673
recall 0.5241000610128127
f1_score 0.637713437268003


#### boosted seed words

In [4]:
annotated_test = '../KH_Data/test_2_epicurious_KH_annotated.jsonl'

# boosted seed words
original_test = '../KH_Data/test_2_epicurious_KP_original_annot2.jsonl'

[merged, label_words, predict_words, precision, recall, f1_score] = assess_accuracy(annotated_test, original_test)

precision 0.7704081632653061
recall 0.5770700636942675
f1_score 0.6598689002184996
