In [203]:
import pandas as pd

class Score(object):
    def __init__(self, class_label, p, r, f1, tp, fp, fn):
        self.class_label = class_label
        self.precision = p
        self.recall = r
        self.f1 = f1
        self.tp = tp
        self.fp = fp
        self.fn = fn
        
    def summary(self):
        print("{}\tPrecision\tRecall\tF1".format(self.class_label))
        print("{:.2f}\t{:.2f}\t{:.2f}".format(self.precision, self.recall, self.f1))

class Evaluator(object):
    def __init__(self, results_file, neg_class = "None"):
        self.results_file = results_file
        self.neg_class = neg_class
        self.smoothing = 0.00001
        
    def compute_class_score(self, class_label):
        df = pd.read_csv(self.results_file, delimiter="\t")
        tp = len(df[(df.Gold == df.Predicted) & (df.Gold == class_label)])        
        fp = len(df[(df.Gold != df.Predicted) & (df.Predicted == class_label) & (df.Gold != class_label)])
        p = tp / (tp + fp + self.smoothing)
        fn = len(df[(df.Gold == class_label)]) - tp
        r = tp / (tp + fn + self.smoothing)
        f1 = (2 * p * r) / (p + r + self.smoothing)
        return Score(class_label, p, r, f1, tp, fp, fn)
    
    def compute_micro_score(self):
        df = pd.read_csv(self.results_file, delimiter="\t")
        # ignore instances where negative class was predicted
        pos_predictions = len(df[df.Predicted != self.neg_class])
        # num. positive predictions that are correct
        tp = len(df[(df.Gold == df.Predicted) & (df.Predicted != self.neg_class)])
        # num. positive predictions that are incorrect
        fp = len(df[(df.Gold != df.Predicted) & (df.Predicted != self.neg_class)])
        fn = len(df[df.Gold != self.neg_class]) - tp
        p = tp / (tp + fp + self.smoothing)
        r = tp / (tp + fn + self.smoothing)
        f1 = (2 * p * r) / (p + r + self.smoothing)

        return Score("MICRO", p, r, f1, tp, fp, fn)
        
    def compute_macro_score(self):
        df = pd.read_csv(self.results_file, delimiter="\t")
        scores = [self.compute_class_score(cl) for cl in set(df.Gold.values)]
        p = 0
        r = 0
        f1 = 0
        tp = 0
        fp = 0
        fn = 0
        pos_classes = [s for s in scores if s.class_label != self.neg_class]
        for s in pos_classes:    
            p += s.precision
            r += s.recall
            f1 += s.f1
            tp += s.tp
            fp += s.fp
            fn += s.fn
            
        return Score("MACRO", p/len(pos_classes), r/len(pos_classes), f1/len(pos_classes), tp, fp, fn)
            
    def generate_scores_df(self):
        df = pd.read_csv(self.results_file, delimiter="\t")
        header = ("Class", "P", "R", "F1", "TP", "FP", "FN")
        scores = [self.compute_class_score(cl) for cl in sorted(set(df.Gold.values))]
        data = [(s.class_label, s.precision, s.recall, s.f1, s.tp, s.fp, s.fn) for s in scores]
        macro = self.compute_macro_score()
        micro = self.compute_micro_score()
        data += [(macro.class_label, macro.precision, macro.recall, macro.f1, macro.tp, macro.fp, macro.fn), (micro.class_label, micro.precision, micro.recall, micro.f1, micro.tp, micro.fp, micro.fn)]
        return pd.DataFrame(data, columns=header)

In [188]:
evaluator = Evaluator("results.tsv", "None")

macro= evaluator.compute_macro_score()
macro.summary()
micro = evaluator.compute_micro_score()
micro.summary()

MACRO	Precision	Recall	F1
0.26	0.10	0.14
MICRO	Precision	Recall	F1
0.51	0.17	0.26


In [189]:
results = evaluator.generate_scores_df().round(2)
print(results.to_csv(sep="\t", index=False))

Class	P	R	F1	TP	FP	FN
E1 precedes E2	0.52	0.2	0.29	24	22	98
E2 precedes E1	0.0	0.0	0.0	0	1	16
None	0.84	0.96	0.9	618	114	23
MACRO	0.26	0.1	0.14	24	23	114
MICRO	0.51	0.17	0.26	24	23	114



In [209]:
import os
for f in os.listdir("."):
    if "-" in f and f.endswith(".tsv") and "old" not in f:
        model_name = f.split("-", 1)[-1].replace(".tsv", "")
        evaluator = Evaluator(f, "None")
        results = evaluator.generate_scores_df().round(2)
        print("Results for {}".format(model_name.upper()))
        print(results.to_csv(sep="\t", index=False))

Results for LIN-SVM-L1
Class	P	R	F1	TP	FP	FN
E1 precedes E2	0.49	0.39	0.43	65	68	101
E2 precedes E1	0.0	0.0	0.0	0	4	28
None	0.84	0.91	0.87	652	123	66
MACRO	0.24	0.2	0.22	65	72	129
MICRO	0.47	0.34	0.39	65	72	129

Results for LIN-SVM-L2
Class	P	R	F1	TP	FP	FN
E1 precedes E2	0.53	0.3	0.38	50	45	116
E2 precedes E1	0.25	0.04	0.06	1	3	27
None	0.83	0.94	0.88	674	139	44
MACRO	0.39	0.17	0.22	51	48	143
MICRO	0.52	0.26	0.35	51	48	143

Results for LR-L1
Class	P	R	F1	TP	FP	FN
E1 precedes E2	0.54	0.36	0.43	60	52	106
E2 precedes E1	0.0	0.0	0.0	0	1	28
None	0.84	0.93	0.88	670	129	48
MACRO	0.27	0.18	0.22	60	53	134
MICRO	0.53	0.31	0.39	60	53	134

Results for LR-L2
Class	P	R	F1	TP	FP	FN
E1 precedes E2	0.59	0.27	0.37	44	31	122
E2 precedes E1	0.0	0.0	0.0	0	2	28
None	0.83	0.96	0.89	689	146	29
MACRO	0.29	0.13	0.18	44	33	150
MICRO	0.57	0.23	0.32	44	33	150

Results for RF
Class	P	R	F1	TP	FP	FN
E1 precedes E2	0.57	0.23	0.33	38	29	128
E2 precedes E1	0.5	0.04	0.07	1	1	27
None	0.82	0.96	0.88	688	155	30
MACRO	0.53	0.