In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
cd '/content/drive/MyDrive/ClassAug Work/final code'

/content/drive/MyDrive/ClassAug Work/final code


In [4]:
!pip install ood-metrics

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting ood-metrics
  Downloading ood_metrics-0.2.9-py3-none-any.whl (5.5 kB)
Installing collected packages: ood-metrics
Successfully installed ood-metrics-0.2.9


In [5]:
import torch
import torch.nn as nn

import pickle
import tensorflow as tf
import numpy as np
from tensorflow import keras
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_curve, classification_report, accuracy_score, roc_auc_score, average_precision_score, precision_recall_curve, auc, confusion_matrix

from sklearn.decomposition import PCA

import matplotlib.pyplot as plt

import ood_metrics
import json

In [6]:
image_datasets = ['cifar10', 'cifar100', 'fashion_mnist', 'mnist', 'svhn', 'tinyimagenet']
text_datasets = ['20ng', 'sms_spam', 'twitter_us_airline_sentiment']

dataset_classes = {
    'cifar10': 10,
    'cifar100': 100,
    'fashion_mnist': 10,
    'mnist': 10,
    'svhn': 10,
    '20ng': 20,
    'sms_spam': 2,
    'twitter_us_airline_sentiment': 3
}

drca_dataset_classes = {k: v+(v*(v-1)//2) for k, v in dataset_classes.items()}

paper_model_exts = {
    'drca': 'h5',
    'mixup': 't70_0',
    'oe': 'pt',
    'simple': 'h5'
}

In [14]:
cache = {}

In [15]:
def get_tf_model(typ, num_classes):
    model = None
    keras.backend.clear_session()
    if(typ == 'image'):
        model = keras.models.Sequential()
        model.add(keras.layers.Dense(1024, activity_regularizer='l2', activation='relu', input_shape=(2048,)))
        model.add(keras.layers.Dense(num_classes, activity_regularizer='l2', activation='relu'))
        model.add(keras.layers.Softmax())
    elif(typ == 'text'):
        model = keras.models.Sequential()
        model.add(keras.layers.Dense(512, activity_regularizer='l2', activation='relu', input_shape=(512,)))
        model.add(keras.layers.Dropout(0.3))
        model.add(keras.layers.Dense(num_classes, activity_regularizer='l2', activation='relu'))
        model.add(keras.layers.Softmax())
    return model

## UNCOMMENT FOR IMAGE ##
# class OEModel(nn.Module):
#     def __init__(self, num_classes):
#         super(OEModel, self).__init__()
#         self.l1 = nn.Linear(2048, 1024)
#         self.classifier = nn.Linear(1024, num_classes)
#         self.softmax = nn.Softmax()

#     def forward(self, x):
#         x = self.l1(x)
#         x = self.classifier(x)
#         x = self.softmax(x)
#         return x

## UNCOMMENT FOR TEXT ##
class OEModel(nn.Module):
    def __init__(self, num_classes):
        super(OEModel, self).__init__()
        self.l1 = nn.Linear(512, 512)
        self.d1 = nn.Dropout(0.3)
        self.classifier = nn.Linear(512, num_classes)
        self.softmax = nn.Softmax()

    def forward(self, x):
        x = self.l1(x)
        x = self.d1(x)
        x = self.classifier(x)
        x = self.softmax(x)
        return x

class MixupTextModel(nn.Module):
    def __init__(self, num_classes):
        super(MixupTextModel, self).__init__()

        self.l1 = nn.Linear(512, 512)
        self.d1 = nn.Dropout(0.3)
        self.classifier = nn.Linear(512, num_classes)

    def forward(self, x):
        x = self.l1(x)
        x = self.d1(x)
        x = self.classifier(x)
        return x

def get_model_path(paper, dataset):
    return f'./models/{paper}/{dataset}.{paper_model_exts[paper]}'

def get_models(dataset):
    typ = 'image' if dataset in image_datasets else 'text'
    simple_model = get_tf_model(typ, dataset_classes[dataset])
    simple_model.load_weights(get_model_path('simple', dataset))

    drca_model = get_tf_model(typ, drca_dataset_classes[dataset])
    drca_model.load_weights(get_model_path('drca', dataset))
    drca_model = keras.Model(inputs=drca_model.input, outputs=drca_model.layers[-2].output)

    oe_model = OEModel(dataset_classes[dataset])
    oe_model.load_state_dict(torch.load(get_model_path('oe', dataset)))

    mixup_model = torch.load(get_model_path('mixup', dataset))
    mixup_model = mixup_model['net']

    return simple_model, drca_model, oe_model, mixup_model

def eval(in_scores, out_scores, preds, labels):
    num_in = in_scores.shape[0]
    num_out = out_scores.shape[0]
    
    y_true = np.concatenate([np.zeros(num_in), np.ones(num_out)])
    y_score = np.concatenate([in_scores, out_scores])

    metrics = ood_metrics.calc_metrics(1. - y_score, y_true)

    return metrics

def eval_dataset(in_dataset, out_dataset):
    simple_model, drca_model, oe_model, mixup_model = get_models(in_dataset)

    embs_data = None

    if in_dataset not in cache:
        with open(f'./embeddings/{in_dataset}_embeddings.pkl', 'rb') as f:
            embs_data = pickle.load(f)

        embs = embs_data['embs']
        labels = embs_data['labels']

        cache[in_dataset] = {}
        cache[in_dataset]['simple'] = simple_model.predict(embs)
        cache[in_dataset]['drca'] = tf.nn.softmax(drca_model.predict(embs)[:,:dataset_classes[in_dataset]])
        cache[in_dataset]['oe'] = oe_model(torch.from_numpy(embs).float()).detach().cpu().numpy()
        # cache[in_dataset]['mixup'] = mixup_model(torch.from_numpy(embs).float()).detach().cpu().numpy() # FOR IMAGE
        cache[in_dataset]['mixup'] = tf.nn.softmax(mixup_model(torch.from_numpy(embs).float()).detach().cpu().numpy()) # FOR TEXT
        cache[in_dataset]['labels'] = np.squeeze(labels)

    if out_dataset not in cache:
        with open(f'./embeddings/{out_dataset}_embeddings.pkl', 'rb') as f:
            embs_data = pickle.load(f)

        embs = embs_data['embs']
        if 'labels' in embs_data:
            labels = embs_data['labels']

        cache[out_dataset] = {}

        cache[out_dataset]['simple'] = simple_model.predict(embs)
        cache[out_dataset]['drca'] = tf.nn.softmax(drca_model.predict(embs)[:,:dataset_classes[in_dataset]])
        cache[out_dataset]['oe'] = oe_model(torch.from_numpy(embs).float()).detach().cpu().numpy()
        # cache[out_dataset]['mixup'] = mixup_model(torch.from_numpy(embs).float()).detach().cpu().numpy() # FOR IMAGE
        cache[out_dataset]['mixup'] = tf.nn.softmax(mixup_model(torch.from_numpy(embs).float()).detach().cpu().numpy()) # FOR TEXT
        if 'labels' in embs_data:
          cache[out_dataset]['labels'] = np.squeeze(labels)

    simple_in_preds = cache[in_dataset]['simple']
    drca_in_preds = cache[in_dataset]['drca']
    oe_in_preds = cache[in_dataset]['oe']
    mixup_in_preds = cache[in_dataset]['mixup']
    labels = cache[in_dataset]['labels']

    simple_in_scores = np.max(simple_in_preds, axis=1)
    drca_in_scores = np.max(drca_in_preds, axis=1)
    oe_in_scores = np.max(oe_in_preds, axis=1)
    mixup_in_scores = np.max(mixup_in_preds, axis=1)

    simple_out_preds = cache[out_dataset]['simple']
    drca_out_preds = cache[out_dataset]['drca']
    oe_out_preds = cache[out_dataset]['oe']
    mixup_out_preds = cache[out_dataset]['mixup']

    simple_out_scores = np.max(simple_out_preds, axis=1)
    drca_out_scores = np.max(drca_out_preds, axis=1)
    oe_out_scores = np.max(oe_out_preds, axis=1)
    mixup_out_scores = np.max(mixup_out_preds, axis=1)

    return {
        "simple": eval(simple_in_scores, simple_out_scores, simple_in_preds, labels), 
        "drca": eval(drca_in_scores, drca_out_scores, drca_in_preds, labels),
        "oe": eval(oe_in_scores, oe_out_scores, oe_in_preds, labels),
        "mixup": eval(mixup_in_scores, mixup_out_scores, mixup_in_preds, labels),
    }

In [13]:
for in_dataset in ['cifar10']:
  # for out_dataset in image_datasets:
  for out_dataset in ['tinyimagenet']:
    if in_dataset != out_dataset:
      print(in_dataset + " - " + out_dataset)
      d = eval_dataset(in_dataset, out_dataset)
      print(json.dumps(d, indent=2, sort_keys=True))

cifar10 - tinyimagenet


RuntimeError: ignored

In [18]:
for in_dataset in text_datasets:
  for out_dataset in text_datasets:
    if in_dataset != out_dataset:
      print(in_dataset + " - " + out_dataset)
      d = eval_dataset(in_dataset, out_dataset)
      print(json.dumps(d, indent=2, sort_keys=True))

20ng - sms_spam
{
  "drca": {
    "aupr_in": 0.9675550569038263,
    "aupr_out": 0.7184776320335018,
    "auroc": 0.9166772037672025,
    "detection_error": 0.16549266934228846,
    "fpr_at_95_tpr": 0.19972407938023984
  },
  "mixup": {
    "aupr_in": 0.8876155055092506,
    "aupr_out": 0.3392735386001381,
    "auroc": 0.6972092263061797,
    "detection_error": 0.5956261774101074,
    "fpr_at_95_tpr": 0.7569776079804733
  },
  "oe": {
    "aupr_in": 0.8912884364623977,
    "aupr_out": 0.4252296039642216,
    "auroc": 0.7228463490189383,
    "detection_error": 0.608198869686297,
    "fpr_at_95_tpr": 0.7732675368778521
  },
  "simple": {
    "aupr_in": 0.9609362239621217,
    "aupr_out": 0.5089172073187243,
    "auroc": 0.8706750701781373,
    "detection_error": 0.19653534277991647,
    "fpr_at_95_tpr": 0.23989175421840178
  }
}
20ng - twitter_us_airline_sentiment
{
  "drca": {
    "aupr_in": 0.9501756074806521,
    "aupr_out": 0.87821299630969,
    "auroc": 0.9311696155755391,
    "dete

In [None]:
import json
d = {'simple': {'fpr_at_95_tpr': 0.9974921693238928, 'detection_error': 0.9084994830220791, 'auroc': 0.4250627879221264, 'aupr_out': 0.07429476615498992, 'aupr_in': 0.87460933489779}, 'drca': {'fpr_at_95_tpr': 0.1009477384201674, 'detection_error': 0.09583764148267437, 'auroc': 0.9811262813604729, 'aupr_out': 0.9458446057810337, 'aupr_in': 0.9974178724108781}, 'oe': {'fpr_at_95_tpr': 0.5545427993030446, 'detection_error': 0.5082121713987684, 'auroc': 0.7956671635327176, 'aupr_out': 0.23511691188805728, 'aupr_in': 0.9744944396527349}, 'mixup': {'fpr_at_95_tpr': 0.9549361366291074, 'detection_error': 0.9084994830220791, 'auroc': 0.4628955342485069, 'aupr_out': 0.07983833196786251, 'aupr_in': 0.9323017608952976}}
print(json.dumps(d, indent=2, sort_keys=True))

{
  "drca": {
    "aupr_in": 0.9974178724108781,
    "aupr_out": 0.9458446057810337,
    "auroc": 0.9811262813604729,
    "detection_error": 0.09583764148267437,
    "fpr_at_95_tpr": 0.1009477384201674
  },
  "mixup": {
    "aupr_in": 0.9323017608952976,
    "aupr_out": 0.07983833196786251,
    "auroc": 0.4628955342485069,
    "detection_error": 0.9084994830220791,
    "fpr_at_95_tpr": 0.9549361366291074
  },
  "oe": {
    "aupr_in": 0.9744944396527349,
    "aupr_out": 0.23511691188805728,
    "auroc": 0.7956671635327176,
    "detection_error": 0.5082121713987684,
    "fpr_at_95_tpr": 0.5545427993030446
  },
  "simple": {
    "aupr_in": 0.87460933489779,
    "aupr_out": 0.07429476615498992,
    "auroc": 0.4250627879221264,
    "detection_error": 0.9084994830220791,
    "fpr_at_95_tpr": 0.9974921693238928
  }
}
