In [None]:
import numpy as np
import pandas as pd

In [None]:
# Replace this with your own submission.csv. Same format as LB submission, but a subset of train files.
sub = pd.read_csv('../input/essay2-submission/submission.csv')

# If this code is inserted into train cycle, then truth just needs to be a subset of train.csv for your val set.
# Or we can reconstruct it from submission.csv id as below.
truth = pd.read_csv('../input/feedback-prize-2021/train.csv').rename(columns={'discourse_type':'class'})
truth = truth[['id', 'class', 'predictionstring']]
keeps = {k:1 for k in sub.id.values}
truth = truth[truth.id.map(lambda idx:idx in keeps)]

In [None]:
def edge_words(predictionstring):
    words = predictionstring.split()
    return [int(words[0]),int(words[-1])]

for df in [sub, truth]:
    df['span'] = df.predictionstring.apply(edge_words)
    df.drop('predictionstring', axis=1, inplace=True)

truth = truth.groupby(['id','class']).apply(lambda gp:gp.span.values).reset_index().rename(columns={0:'truth'})
sub = sub.groupby(['id','class']).apply(lambda gp:gp.span.values).reset_index().rename(columns={0:'sub'})

df = sub.merge(truth, on=["id", "class"], how="outer")
df.head()

In [None]:
def overlap(sub, truth):
    overlap = min(sub[1], truth[1]) - max(sub[0], truth[0]) + 1
    return max(0, overlap)

def score(sub, truth):
    TP = 0
    if sub is np.NaN: sub = []
    elif truth is np.NaN: truth = []
    else:
        sub_lengths = [(s[1] - s[0] + 1) for s in sub]
        truth_lengths = [(t[1] - t[0] + 1) for t in truth]
        overlaps = np.array([[overlap(t,s) for s in sub] for t in truth])
        for t in range(len(overlaps)):
            s = np.argmax(overlaps[t])
            max_overlap = overlaps[t,s]
            if max_overlap > 0:
                overlaps[:,s] = 0
                if max_overlap / sub_lengths[s] >= 0.5 and max_overlap / truth_lengths[t] >= 0.5:
                    TP += 1
    FP = len(sub) - TP
    FN = len(truth) - TP
    return TP, FP, FN

def row_score(row):
    row['TP'], row['FP'], row['FN'] = score(row['sub'], row['truth'])
    return row

df = df.apply(row_score, axis=1)

In [None]:
df = df.groupby('class')[['TP','FP','FN']].sum()
df['Precision'] = df.TP / (df.TP + df.FP)
df['Recall'] = df.TP / (df.TP + df.FN)
df['F1'] = 2 * df.Precision * df.Recall / (df.Precision + df.Recall)
pd.set_option('display.float_format', '{:0.2f}'.format)
df

In [None]:
print(f'Final macro mean of F1 scores: {df.F1.mean():.3f}')