In [None]:
from pathlib import Path
from sklearn.metrics import f1_score, precision_score, recall_score
import numpy as np
import pandas as pd

In [None]:
dvul = pd.read_json('/data/fire/data/jsonl/diversevul/all.jsonl', lines=True)
dvul = dvul.rename(columns={'target': 'y_true'}).drop(columns='func')
dvul = dvul.sort_values(by='idx')
dvul.columns

In [None]:
dvul.y_true.value_counts()

In [None]:
data = []

for model in Path('/data/fire/logs/diversevul/').iterdir():
    # print(model.stem)
    preds = []
    truth = []

    for dataset in model.glob('*.csv'):
        df = pd.read_csv(dataset)
        df = df.sort_values(by='idx')
        
        df = df.merge(dvul, on='idx', how='left')
        preds.append(df.y_pred.values)
        truth.append(df.y_true.values)

    preds = np.concatenate(preds)
    truth = np.concatenate(truth)

    data.append({
        'model': model.stem,
        'f1': f1_score(truth, preds > 0.5),
        'precision': precision_score(truth, preds > 0.5, zero_division=0),
        'recall': recall_score(truth, preds > 0.5),
    })

preds = np.zeros(preds.shape)
data.append({
    'model': 'safe baseline',
    'f1': f1_score(truth, preds),
    'precision': precision_score(truth, preds, zero_division=0),
    'recall': recall_score(truth, preds),
})

preds = np.ones(preds.shape)
data.append({
    'model': 'vuln baseline',
    'f1': f1_score(truth, preds),
    'precision': precision_score(truth, preds, zero_division=0),
    'recall': recall_score(truth, preds),
})
        
data = pd.DataFrame(data)
data = data.sort_values(by='f1', ascending=False)
data.shape

In [None]:
data\
    .style.format({
        'f1': '{:.4f}',
        'precision': '{:.4f}',
        'recall': '{:.4f}',
    }).background_gradient()