# Metrics - SST2 (id)

## Setup

In [1]:
from sklearn.metrics import roc_auc_score, roc_curve
import numpy as np

def compute_auroc(id_pps, ood_pps, normalize=False, return_curve=False):
    y = np.concatenate((np.ones_like(ood_pps), np.zeros_like(id_pps)))
    scores = np.concatenate((ood_pps, id_pps))
    if normalize:
        scores = (scores - scores.min()) / (scores.max() - scores.min())
    if return_curve:
        return roc_curve(y, scores)
    else:
        return 100*roc_auc_score(y, scores)

def compute_far(id_pps, ood_pps, rate=5):
    incorrect = len(id_pps[id_pps > np.percentile(ood_pps, rate)])
    return 100*incorrect / len(id_pps)

In [24]:
import pandas as pd

columns = ['roberta_type', 'id_ood', 'AUROC', 'FAR']
df = pd.DataFrame(columns=columns)
df

Unnamed: 0,roberta_type,id_ood,AUROC,FAR


# IMDB (ood)

## Base

In [25]:
sst2_base = np.load('../runs/msp_best/base_sst2_msp.npy')
imdb_base = np.load('../runs/msp_best/base_imdb_msp.npy')

In [26]:
auroc_base = compute_auroc(sst2_base, imdb_base)
far_base = compute_far(sst2_base, imdb_base)

print('auroc:', auroc_base)
print('far:', far_base)

auroc: 35.33037249283667
far: 97.99426934097421


In [27]:
idx = 0
df.loc[idx]=['base', 'sst2_imdb', auroc_base, far_base]
df

Unnamed: 0,roberta_type,id_ood,AUROC,FAR
0,base,sst2_imdb,35.330372,97.994269


## Large

In [21]:
sst2_large = np.load('../runs/msp_best/large_sst2_msp.npy')
imdb_large = np.load('../runs/msp_best/large_imdb_msp.npy')

In [22]:
auroc_large = compute_auroc(sst2_large, imdb_large)
far_large = compute_far(sst2_large, imdb_large)

print('auroc:', auroc_large)
print('far:', far_large)

auroc: 35.716532951289395
far: 97.99426934097421


In [28]:
idx += 1
df.loc[idx]=['large', 'sst2_imdb', auroc_base, far_base]
df

Unnamed: 0,roberta_type,id_ood,AUROC,FAR
0,base,sst2_imdb,35.330372,97.994269
1,large,sst2_imdb,35.330372,97.994269


# Counterfactual-IMDB (ood)

## Base

In [31]:
sst2_base = np.load('../runs/msp_best/base_sst2_msp.npy')
counterfactual_imdb_base = np.load('../runs/msp_best/base_counterfactual-imdb_msp.npy')

In [32]:
auroc_base = compute_auroc(sst2_base, counterfactual_imdb_base)
far_base = compute_far(sst2_base, counterfactual_imdb_base)

print('auroc:', auroc_base)
print('far:', far_base)

auroc: 32.83142115646579
far: 97.70773638968481


In [33]:
idx += 1
df.loc[idx]=['base', 'sst2_counterfactual-imdb', auroc_base, far_base]
df

Unnamed: 0,roberta_type,id_ood,AUROC,FAR
0,base,sst2_imdb,35.330372,97.994269
1,large,sst2_imdb,35.330372,97.994269
2,base,sst2_counterfactual-imdb,32.831421,97.707736


## Large

In [34]:
sst2_large = np.load('../runs/msp_best/large_sst2_msp.npy')
counterfactual_imdb_large = np.load('../runs/msp_best/large_counterfactual-imdb_msp.npy')

In [35]:
auroc_large = compute_auroc(sst2_large, counterfactual_imdb_large)
far_large = compute_far(sst2_large, counterfactual_imdb_large)

print('auroc:', auroc_large)
print('far:', far_large)

auroc: 32.28407276058058
far: 97.70773638968481


In [36]:
idx += 1
df.loc[idx]=['large', 'sst2_counterfactual-imdb', auroc_base, far_base]
df

Unnamed: 0,roberta_type,id_ood,AUROC,FAR
0,base,sst2_imdb,35.330372,97.994269
1,large,sst2_imdb,35.330372,97.994269
2,base,sst2_counterfactual-imdb,32.831421,97.707736
3,large,sst2_counterfactual-imdb,32.831421,97.707736
