# Metrics - SST2 (id)

## Setup

In [20]:
from sklearn.metrics import roc_auc_score, roc_curve
import numpy as np

def compute_auroc(id_pps, ood_pps, normalize=False, return_curve=False):
    y = np.concatenate((np.ones_like(ood_pps), np.zeros_like(id_pps)))
    scores = np.concatenate((ood_pps, id_pps))
    if normalize:
        scores = (scores - scores.min()) / (scores.max() - scores.min())
    if return_curve:
        return roc_curve(y, scores)
    else:
        return 100*roc_auc_score(y, scores)

def compute_far(id_pps, ood_pps, rate=5):
    incorrect = len(id_pps[id_pps > np.percentile(ood_pps, rate)])
    return 100*incorrect / len(id_pps)

In [21]:
import pandas as pd

columns = ['roberta_type', 'id_ood', 'AUROC', 'FAR']
df = pd.DataFrame(columns=columns)
df

Unnamed: 0,roberta_type,id_ood,AUROC,FAR


# IMDB (id) vs SST2 (ood)

## Base

In [22]:
imdb_base = np.load('/scratch/apo249/nlp/ood-detection/roberta/msp/roberta-base_imdb_msp.npy')
sst2_base = np.load('/scratch/apo249/nlp/ood-detection/roberta/msp/roberta-base_sst2_msp.npy')

In [23]:
auroc_base = compute_auroc(imdb_base,sst2_base)
far_base = compute_far(imdb_base,sst2_base)

print('auroc:', auroc_base)
print('far:', far_base)

auroc: 29.820407110091747
far: 96.58


In [24]:
idx = 0
df.loc[idx]=['base', 'imdb_sst2', auroc_base, far_base]
df

Unnamed: 0,roberta_type,id_ood,AUROC,FAR
0,base,imdb_sst2,29.820407,96.58


## Large

In [15]:
imdb_large = np.load('/scratch/apo249/nlp/ood-detection/roberta/msp/roberta-large_imdb_msp.npy')
sst2_large = np.load('/scratch/apo249/nlp/ood-detection/roberta/msp/roberta-large_sst2_msp.npy')

In [16]:
auroc_large = compute_auroc(imdb_large, sst2_large)
far_large = compute_far(imdb_large, sst2_large)

print('auroc:', auroc_large)
print('far:', far_large)

auroc: 29.718110665137615
far: 97.475


In [17]:
idx += 1
df.loc[idx]=['large', 'imdb_sst2', auroc_base, far_base]
df

Unnamed: 0,roberta_type,id_ood,AUROC,FAR
0,base,imdb_sst2,29.820407,96.58
1,base,imdb_snli,6.375897,99.49
2,large,imdb_snli,6.375897,99.49
3,large,imdb_sst2,6.375897,99.49


# IMDB (id) vs SNLI (ood)

## Base

In [7]:
imdb_base = np.load('/scratch/apo249/nlp/ood-detection/roberta/msp/roberta-base_imdb_msp.npy')
snli_base = np.load('/scratch/apo249/nlp/ood-detection/roberta/msp/roberta-base_snli_msp.npy')

In [8]:
auroc_base = compute_auroc(imdb_base, snli_base)
far_base = compute_far(imdb_base, snli_base)

print('auroc:', auroc_base)
print('far:', far_base)

auroc: 6.3758975
far: 99.49


In [9]:
idx += 1
df.loc[idx]=['base', 'imdb_snli', auroc_base, far_base]
df

Unnamed: 0,roberta_type,id_ood,AUROC,FAR
0,base,imdb_sst2,29.820407,96.58
1,base,imdb_snli,6.375897,99.49


## Large

In [10]:
imdb_large = np.load('/scratch/apo249/nlp/ood-detection/roberta/msp/roberta-large_imdb_msp.npy')
snli_large = np.load('/scratch/apo249/nlp/ood-detection/roberta/msp/roberta-large_snli_msp.npy')

In [11]:
auroc_large = compute_auroc(imdb_large, snli_large)
far_large = compute_far(imdb_large, snli_large)

print('auroc:', auroc_large)
print('far:', far_large)

auroc: 5.58914425
far: 99.565


In [12]:
idx += 1
df.loc[idx]=['large', 'imdb_snli', auroc_base, far_base]
df

Unnamed: 0,roberta_type,id_ood,AUROC,FAR
0,base,imdb_sst2,29.820407,96.58
1,base,imdb_snli,6.375897,99.49
2,large,imdb_snli,6.375897,99.49
