Collect predictions of submitted systems:

In [1]:
import pandas as pd
import json

In [2]:
pair_id, same = [], []
for line in open('datasets/pan20-authorship-verification-test/truth.jsonl'):
    pair = json.loads(line)
    pair_id.append(pair['id'])
    same.append(int(pair['same']))

In [3]:
df = pd.DataFrame(zip(pair_id, same), columns=('id', 'same'))
df

Unnamed: 0,id,same
0,c04fdf1e-ddf5-5542-96e7-13ce18cae176,1
1,49dc4cae-3d32-5b4d-b240-a080a1dbb659,0
2,f326fe7c-fc10-566f-a70f-0f36e3f92399,0
3,16daa0d1-61b8-5650-b7ee-5e265bd40910,1
4,08b536a8-4fed-5f62-97bb-e57f79e841d2,0
...,...,...
14306,9756ec0d-7245-58fc-8d7c-a2ff3fc8d626,1
14307,467c9f79-419f-594a-9b87-0e7c43cd4933,1
14308,31928276-3e41-5802-8263-ff27b7052b64,1
14309,1f93b430-2263-5f39-bf6c-143c457d9e53,1


In [4]:
from glob import glob
systems = []

for pred_file in sorted(glob('submissions/*/answers.jsonl')):
    print(pred_file)
    scores = {}
    for line in open(pred_file):
        res = json.loads(line)
        val = res['value']
        if isinstance(val, list):
            val = val[0]
        scores[res['id']] = val
    predictions = [scores[i] for i in df['id']]
    system = pred_file.split('/')[-2]
    df[system] = predictions
    systems.append(system)

submissions/araujo20-large/answers.jsonl
submissions/araujo20-small/answers.jsonl
submissions/boenninghoff20-large/answers.jsonl
submissions/boenninghoff20-small/answers.jsonl
submissions/faber20-small/answers.jsonl
submissions/gagala20-small/answers.jsonl
submissions/halvani20-small/answers.jsonl
submissions/ikae20-small/answers.jsonl
submissions/kipnis20-small/answers.jsonl
submissions/niven20-small/answers.jsonl
submissions/ordonez20-large/answers.jsonl
submissions/weerasinghe20-large/answers.jsonl
submissions/weerasinghe20-small/answers.jsonl


In [5]:
df.head()

Unnamed: 0,id,same,araujo20-large,araujo20-small,boenninghoff20-large,boenninghoff20-small,faber20-small,gagala20-small,halvani20-small,ikae20-small,kipnis20-small,niven20-small,ordonez20-large,weerasinghe20-large,weerasinghe20-small
0,c04fdf1e-ddf5-5542-96e7-13ce18cae176,1,0.959482,0.999483,0.998031,0.9935224,0.2333,1.0,0.61,0.77837,0.952,0.743432,0.995946,1.0,1.0
1,49dc4cae-3d32-5b4d-b240-a080a1dbb659,0,0.207092,0.75147,0.164877,0.5,0.2833,0.0,0.473,0.689979,0.194,0.256587,0.988152,0.004039,0.566177
2,f326fe7c-fc10-566f-a70f-0f36e3f92399,0,0.208751,0.819039,1e-06,1.582263e-08,0.4333,0.0,0.443,0.68872,0.382,0.256587,1.0,0.003807,0.003335
3,16daa0d1-61b8-5650-b7ee-5e265bd40910,1,0.995293,0.979941,0.716923,0.03127071,0.2167,1.0,0.526,0.728918,0.68,0.743432,1.0,1.0,0.995425
4,08b536a8-4fed-5f62-97bb-e57f79e841d2,0,0.970113,0.679099,0.5,0.8246948,0.4667,0.0,0.413,0.649772,0.164,0.256587,0.99999,0.081437,5e-05


In [6]:
df.to_excel('predictions.xlsx')

## Evaluations

In [7]:
from pan20_verif_evaluator import *

In [8]:
evaluations = []
metrics = {'AUC': auc, 'c@1': c_at_1, 'F1': f1, 'F0.5u': f_05_u_score}
for system in systems:
    evaluations.append([metrics[m](df['same'], df[system]) for m in metrics])

evaluations = pd.DataFrame(evaluations, columns=list(metrics.keys()), index=systems)
evaluations

Unnamed: 0,AUC,c@1,F1,F0.5u
araujo20-large,0.858709,0.75131,0.799527,0.744638
araujo20-small,0.87392,0.770037,0.811372,0.762223
boenninghoff20-large,0.969237,0.928269,0.936349,0.906665
boenninghoff20-small,0.939986,0.889061,0.905762,0.853452
faber20-small,0.293359,0.331308,0.261599,0.313543
gagala20-small,0.786438,0.786458,0.80034,0.808773
halvani20-small,0.877568,0.796195,0.806912,0.818917
ikae20-small,0.840379,0.544756,0.70487,0.598996
kipnis20-small,0.86597,0.800979,0.808628,0.814546
niven20-small,0.794689,0.785619,0.778324,0.84151


In [9]:
evaluations['Overall'] = evaluations.mean(axis=1)
evaluations = evaluations.sort_values('Overall', ascending=False)
evaluations

Unnamed: 0,AUC,c@1,F1,F0.5u,Overall
boenninghoff20-large,0.969237,0.928269,0.936349,0.906665,0.93513
weerasinghe20-large,0.953372,0.879743,0.891481,0.881905,0.901625
boenninghoff20-small,0.939986,0.889061,0.905762,0.853452,0.897065
weerasinghe20-small,0.938713,0.832856,0.85997,0.816656,0.862049
halvani20-small,0.877568,0.796195,0.806912,0.818917,0.824898
kipnis20-small,0.86597,0.800979,0.808628,0.814546,0.822531
araujo20-small,0.87392,0.770037,0.811372,0.762223,0.804388
niven20-small,0.794689,0.785619,0.778324,0.84151,0.800036
gagala20-small,0.786438,0.786458,0.80034,0.808773,0.795502
araujo20-large,0.858709,0.75131,0.799527,0.744638,0.788546


In [10]:
evaluations.to_excel('metrics.xlsx')