# Metrics - SST2 (id)

## Setup

In [1]:
from sklearn.metrics import roc_auc_score, roc_curve
import numpy as np

def compute_auroc(id_pps, ood_pps, normalize=False, return_curve=False):
    y = np.concatenate((np.ones_like(ood_pps), np.zeros_like(id_pps)))
    scores = np.concatenate((ood_pps, id_pps))
    if normalize:
        scores = (scores - scores.min()) / (scores.max() - scores.min())
    if return_curve:
        return roc_curve(y, scores)
    else:
        return 100*roc_auc_score(y, scores)

def compute_far(id_pps, ood_pps, rate=5):
    incorrect = len(id_pps[id_pps > np.percentile(ood_pps, rate)])
    return 100*incorrect / len(id_pps)

In [2]:
import pandas as pd

columns = ['roberta_type', 'id_ood', 'AUROC', 'FAR']
df = pd.DataFrame(columns=columns)
df

Unnamed: 0,roberta_type,id_ood,AUROC,FAR


# SST2

In [6]:
sst2_base = np.load('msp/sst2_in_domain/base_sst2_msp.npy')
sst2_large = np.load('msp/sst2_in_domain/large_sst2_msp.npy')

# IMDB (ood)

## Base

In [7]:
imdb_base = np.load('msp/sst2_in_domain/base_imdb_msp.npy')

In [8]:
auroc_base = compute_auroc(sst2_base, imdb_base)
far_base = compute_far(sst2_base, imdb_base)

print('auroc:', auroc_base)
print('far:', far_base)

auroc: 35.33037249283667
far: 97.99426934097421


In [9]:
idx = 0
df.loc[idx]=['base', 'sst2_imdb', auroc_base, far_base]
df

Unnamed: 0,roberta_type,id_ood,AUROC,FAR
0,base,sst2_imdb,35.330372,97.994269


## Large

In [10]:
imdb_large = np.load('msp/sst2_in_domain/large_imdb_msp.npy')

In [11]:
auroc_large = compute_auroc(sst2_large, imdb_large)
far_large = compute_far(sst2_large, imdb_large)

print('auroc:', auroc_large)
print('far:', far_large)

auroc: 35.716532951289395
far: 97.99426934097421


In [12]:
idx += 1
df.loc[idx]=['large', 'sst2_imdb', auroc_base, far_base]
df

Unnamed: 0,roberta_type,id_ood,AUROC,FAR
0,base,sst2_imdb,35.330372,97.994269
1,large,sst2_imdb,35.330372,97.994269


# Counterfactual-IMDB (ood)

## Base

In [13]:
counterfactual_imdb_base = np.load('msp/sst2_in_domain/base_counterfactual-imdb_msp.npy')

In [14]:
auroc_base = compute_auroc(sst2_base, counterfactual_imdb_base)
far_base = compute_far(sst2_base, counterfactual_imdb_base)

print('auroc:', auroc_base)
print('far:', far_base)

auroc: 32.83142115646579
far: 97.70773638968481


In [15]:
idx += 1
df.loc[idx]=['base', 'sst2_counterfactual-imdb', auroc_base, far_base]
df

Unnamed: 0,roberta_type,id_ood,AUROC,FAR
0,base,sst2_imdb,35.330372,97.994269
1,large,sst2_imdb,35.330372,97.994269
2,base,sst2_counterfactual-imdb,32.831421,97.707736


## Large

In [16]:
counterfactual_imdb_large = np.load('msp/sst2_in_domain/large_counterfactual-imdb_msp.npy')

In [17]:
auroc_large = compute_auroc(sst2_large, counterfactual_imdb_large)
far_large = compute_far(sst2_large, counterfactual_imdb_large)

print('auroc:', auroc_large)
print('far:', far_large)

auroc: 32.28407276058058
far: 97.70773638968481


In [18]:
idx += 1
df.loc[idx]=['large', 'sst2_counterfactual-imdb', auroc_base, far_base]
df

Unnamed: 0,roberta_type,id_ood,AUROC,FAR
0,base,sst2_imdb,35.330372,97.994269
1,large,sst2_imdb,35.330372,97.994269
2,base,sst2_counterfactual-imdb,32.831421,97.707736
3,large,sst2_counterfactual-imdb,32.831421,97.707736


# SNLI (ood)

## Base

In [19]:
snli_base = np.load('msp/sst2_in_domain/base_snli_msp.npy')

In [20]:
auroc_base = compute_auroc(sst2_base, snli_base)
far_base = compute_far(sst2_base, snli_base)

print('auroc:', auroc_base)
print('far:', far_base)

auroc: 10.253760744985673
far: 99.14040114613181


In [21]:
idx += 1
df.loc[idx]=['base', 'sst2_snli', auroc_base, far_base]
df

Unnamed: 0,roberta_type,id_ood,AUROC,FAR
0,base,sst2_imdb,35.330372,97.994269
1,large,sst2_imdb,35.330372,97.994269
2,base,sst2_counterfactual-imdb,32.831421,97.707736
3,large,sst2_counterfactual-imdb,32.831421,97.707736
4,base,sst2_snli,10.253761,99.140401


## Large

In [22]:
snli_large = np.load('msp/sst2_in_domain/large_snli_msp.npy')

In [23]:
auroc_large = compute_auroc(sst2_large, snli_large)
far_large = compute_far(sst2_large, snli_large)

print('auroc:', auroc_large)
print('far:', far_large)

auroc: 9.339104584527218
far: 99.5702005730659


In [24]:
idx += 1
df.loc[idx]=['large', 'sst2_snli', auroc_base, far_base]
df

Unnamed: 0,roberta_type,id_ood,AUROC,FAR
0,base,sst2_imdb,35.330372,97.994269
1,large,sst2_imdb,35.330372,97.994269
2,base,sst2_counterfactual-imdb,32.831421,97.707736
3,large,sst2_counterfactual-imdb,32.831421,97.707736
4,base,sst2_snli,10.253761,99.140401
5,large,sst2_snli,10.253761,99.140401
