In [None]:
from pathlib import Path
from sklearn.metrics import f1_score, precision_score, recall_score
import numpy as np
import pandas as pd

In [None]:
data = []

for model in Path('/data/fire/logs/bigvul/').iterdir():
    # print(model.stem)
    preds = []
    truth = []

    for dataset in [ 'codexglue', 'd2a', 'draper' ]:
        # print('\t', dataset)
        df_safe = pd.read_csv(model.joinpath(f'{dataset}-safe.csv'))
        df_vuln = pd.read_csv(model.joinpath(f'{dataset}-vuln.csv'))
        
        ds_preds = np.concatenate([df_vuln.before, df_vuln.after, df_safe.before])
        ds_truth = np.zeros(ds_preds.shape)
        ds_truth[:df_vuln.shape[0]] = 1
        
        assert ds_preds.shape[0] == (df_safe.shape[0] + 2 * df_vuln.shape[0]), 'Preds is the wrong shape'
        assert ds_truth.shape == ds_preds.shape, 'Truth is the wrong shape'
        assert (ds_truth == 0).sum() == df_safe.shape[0] + df_vuln.shape[0], 'Wrong number of safe values'
        assert (ds_truth == 1).sum() == df_vuln.shape[0], 'Wrong number of vuln values'
        
        preds.append(ds_preds)
        truth.append(ds_truth)

    preds = np.concatenate(preds)
    truth = np.concatenate(truth)

    data.append({
        'model': model.stem,
        'f1': f1_score(truth, preds > 0.5),
        'precision': precision_score(truth, preds > 0.5, zero_division=0),
        'recall': recall_score(truth, preds > 0.5),
    })

preds = np.zeros(preds.shape)
data.append({
    'model': 'safe baseline',
    'f1': f1_score(truth, preds),
    'precision': precision_score(truth, preds, zero_division=0),
    'recall': recall_score(truth, preds),
})

preds = np.ones(preds.shape)
data.append({
    'model': 'vuln baseline',
    'f1': f1_score(truth, preds),
    'precision': precision_score(truth, preds, zero_division=0),
    'recall': recall_score(truth, preds),
})
        
data = pd.DataFrame(data)
data = data.sort_values(by='f1', ascending=False)
data.shape

In [None]:
data\
    .style.format({
        'f1': '{:.4f}',
        'precision': '{:.4f}',
        'recall': '{:.4f}',
    }).background_gradient()

In [None]:
vuln = 100

df = data.copy()
df['fp'] = (1 / df.precision - 1) * vuln
df['fn'] = vuln - df.recall * vuln
df[['model', 'fp', 'fn']].round()