In [1]:
import pandas as pd
import pickle
import annotations

from sklearn.metrics import classification_report

In [None]:
PREDS_PATH = "test_pred_sample.tsv"
ACTUALS_PATH = "test_rel_labels.pkl"

In [2]:
preds_df = pd.read_csv(PREDS_PATH, sep='\t')
with (open(ACTUALS_PATH, "rb")) as openfile:
    actual_dicts = pickle.load(openfile)

# Since the preds only had 16 sample, we take 16 actual labels only
actual_dicts = actual_dicts[:16]

In [3]:
def convert_actual_labels(actual_dicts):
    df = pd.DataFrame(columns=["index", "label", "relation"])
    for i, actual_dict in enumerate(actual_dicts):
        df = df.append(
            {
                "index": i,
                "label": actual_dict['label'],
                "relation": actual_dict['relation'].name
            },
            ignore_index=True
        )
    return df

def gen_classification_reports(df):
    print("-"*55)
    print("Overall Classification Report")
    print(
        classification_report(
            df.label_actual.astype(int),
            df.label_predicted.astype(int)
        )
    )
    
    for relation in df.relation.unique():
        sub_df = df[df.relation==relation]
        print("-"*55)
        print("Classification Report for {} Relation".format(relation))
        print(
            classification_report(
                sub_df.label_actual.astype(int),
                sub_df.label_predicted.astype(int)
            )
        )
    print("-"*55)

In [4]:
actual_df = convert_actual_labels(actual_dicts)

In [5]:
final_df = actual_df.merge(preds_df, how="left", on="index", suffixes=("_actual", "_predicted"))

In [6]:
gen_classification_reports(final_df)

-------------------------------------------------------
Overall Classification Report
              precision    recall  f1-score   support

           0       0.75      0.75      0.75         4
           1       0.92      0.92      0.92        12

    accuracy                           0.88        16
   macro avg       0.83      0.83      0.83        16
weighted avg       0.88      0.88      0.88        16

-------------------------------------------------------
Classification Report for Dosage-Drug Relation
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         1
           1       1.00      1.00      1.00         2

    accuracy                           1.00         3
   macro avg       1.00      1.00      1.00         3
weighted avg       1.00      1.00      1.00         3

-------------------------------------------------------
Classification Report for Form-Drug Relation
              precision    recall  f1-score   support

 

In [None]:
# def numerical_relations(relations):
#     relation_idx={relation: i for i, relation in enumerate(relations.unique(), 1)}
#     return relation_idx
#
# def generate_multilabels(df):
#     df["multi_label_actual"] = df['relation'].mask(
#         final_df['label_actual'].apply(lambda x: False if x==1 else True)
#     ).fillna(0)
#     df["multi_label_predicted"] = df['relation'].mask(
#         final_df['label_predicted'].apply(lambda x: False if x==1 else True)
#     ).fillna(0)
#     return df