In [None]:
%reload_ext autoreload
%autoreload 2

import numpy as np
import sys
import os
import pickle
import argparse
import math
import time
from bisect import bisect_left
import torch
import torch.nn as nn
import torch.backends.cudnn as cudnn
import torchvision.transforms as trn
import torchvision.datasets as dset
import torch.nn.functional as F
from torch.autograd import Variable as V
import torchtext

from torchtext import data
from torchtext import datasets

import csv
csv.field_size_limit(sys.maxsize)

import tqdm
from tqdm import tqdm_notebook

In [None]:
np.random.seed(1)

args = argparse.Namespace(
    in_dist_dataset = '20ng', # 'sst' or 'trec' or '20ng'
    oe_dataset = 'wikitext2',
    epochs = 2, # Fine-tune epochs
    batch_size = 64,
    learning_rate = 0.01,
    momentum = 0.5,
    test_bs = 256,
    save = 'results',
    load = 'results',
    test = 'store_true',
    mix ='store_true',
    ngpu= 1,
    prefetch= 2,
    lambda_1 = 0.1,
    lambda_2 = 0.05
    )

In [None]:
if args.in_dist_dataset == 'sst':
    # set up fields
    TEXT = data.Field(pad_first=True)
    LABEL = data.Field(sequential=False)

    # make splits for data
    train, val, test = datasets.SST.splits(
        TEXT, LABEL, fine_grained=False, train_subtrees=False,
        filter_pred=lambda ex: ex.label != 'neutral')

    # build vocab
    TEXT.build_vocab(train, max_size=10000)
    LABEL.build_vocab(train, max_size=10000)
    print('vocab length (including special tokens):', len(TEXT.vocab))
    num_classes = len(LABEL.vocab)
    print('num labels:', len(LABEL.vocab))
    # create our own iterator, avoiding the calls to build_vocab in SST.iters
    train_iter, val_iter, test_iter = data.BucketIterator.splits(
        (train, val, test), batch_size=args.batch_size, repeat=False)
    
elif args.in_dist_dataset == '20ng':
    
    TEXT = data.Field(pad_first=True, lower=True, fix_length=100)
    LABEL = data.Field(sequential=False)

    train = data.TabularDataset(path='20ng-train.txt',
                                     format='csv',
                                     fields=[('label', LABEL),('text', TEXT)])

    test = data.TabularDataset(path='20ng-test.txt',
                                     format='csv',
                                     fields=[('label', LABEL),('text', TEXT)])
    

    TEXT.build_vocab(train, max_size=10000)
    LABEL.build_vocab(train, max_size=10000)
    print('vocab length (including special tokens):', len(TEXT.vocab))
    num_classes = len(LABEL.vocab)
    print('num labels:', len(LABEL.vocab))
    train_iter = data.BucketIterator(train, batch_size=args.batch_size, repeat=False)
    test_iter = data.BucketIterator(test, batch_size=args.batch_size, repeat=False)
    
elif args.in_dist_dataset == 'trec':
    # set up fields
    TEXT = data.Field(pad_first=True, lower=True)
    LABEL = data.Field(sequential=False)

    # make splits for data
    train, test = datasets.TREC.splits(TEXT, LABEL, fine_grained=True)


    # build vocab
    TEXT.build_vocab(train, max_size=10000)
    LABEL.build_vocab(train, max_size=10000)
    print('vocab length (including special tokens):', len(TEXT.vocab))
    num_classes = len(LABEL.vocab)
    print('num labels:', len(LABEL.vocab))

    # make iterators
    train_iter, test_iter = data.BucketIterator.splits(
        (train, test), batch_size=args.batch_size, repeat=False)


In [None]:
if args.oe_dataset == 'wikitext2':
    TEXT_custom = data.Field(pad_first=True, lower=True)
    
    custom_data = data.TabularDataset(path='./wikitext_sentences',
                                      format='csv',
                                      fields=[('text', TEXT_custom)])

    TEXT_custom.build_vocab(train.text, max_size=10000)
    print('vocab length (including special tokens):', len(TEXT_custom.vocab))

    train_iter_oe = data.BucketIterator(custom_data, batch_size=args.batch_size, repeat=False)
    
elif args.oe_dataset == 'wikitext103':
    TEXT_custom = data.Field(pad_first=True, lower=True)

    custom_data = data.TabularDataset(path='./wikitext103_sentences',
                                      format='csv',
                                      fields=[('text', TEXT_custom)])

    TEXT_custom.build_vocab(train.text, max_size=10000)
    print('vocab length (including special tokens):', len(TEXT_custom.vocab))

    train_iter_oe = data.BucketIterator(custom_data, batch_size=args.batch_size, repeat=False)
    
elif args.oe_dataset == 'gutenberg':
    TEXT_custom = data.Field(pad_first=True, lower=True)

    custom_data = data.TabularDataset(path='./.data/gutenberg/gutenberg_sentences',
                                      format='csv',
                                      fields=[('text', TEXT_custom)])

    TEXT_custom.build_vocab(train.text, max_size=10000)
    print('vocab length (including special tokens):', len(TEXT_custom.vocab))

    train_iter_oe = data.BucketIterator(custom_data, batch_size=args.batch_size, repeat=False)

In [None]:
class ClfGRU(nn.Module):
    def __init__(self, num_classes):
        super().__init__()
        self.embedding = nn.Embedding(len(TEXT.vocab), 50, padding_idx=1)
        self.gru = nn.GRU(input_size=50, hidden_size=128, num_layers=2,
            bias=True, batch_first=True,bidirectional=False)
        self.linear = nn.Linear(128, num_classes)

    def forward(self, x):
        embeds = self.embedding(x)
        hidden = self.gru(embeds)[1][1]  # select h_n, and select the 2nd layer
        logits = self.linear(hidden)
        return logits

if args.in_dist_dataset == '20ng':
    model = ClfGRU(num_classes-1).cuda()  # change to match dataset
else:
    model = ClfGRU(num_classes-1).cuda()

model.load_state_dict(torch.load(f'./{args.load}/{args.in_dist_dataset}/baseline/model.dict'))

optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=args.epochs)

In [None]:
f = open(os.path.join(args.save, args.in_dist_dataset+ f'/OECC/{args.oe_dataset}/OECC_training_results.txt'), 'w')

def train():
    model.train()
    data_loss_ema = 0
    oe_loss_ema = 0

    for batch_idx, (batch, batch_oe) in enumerate(zip(iter(train_iter), iter(train_iter_oe))):
        inputs = batch.text.t()
        labels = batch.label - 1 
        inputs = inputs.cuda() # To convert to cuda
        labels = labels.cuda(non_blocking=True) # To convert to cuda
        logits = model(inputs)
        data_loss = F.cross_entropy(logits, labels)

        inputs_oe = batch_oe.text.t()
        inputs_oe = inputs_oe.cuda() # To convert to cuda
        logits_oe = model(inputs_oe)
        
        
        # OECC LOSS 
        if args.in_dist_dataset == 'sst':
            A_tr = 0.778
        elif args.in_dist_dataset == '20ng':
            A_tr = 0.739
        elif args.in_dist_dataset == 'trec':
            A_tr = 0.778
        sm = torch.nn.Softmax(dim=1) # Create a Softmax 
        probabilities = sm(logits) # Get the probabilites for In data only
        probabilities_oe = sm(logits_oe) # Get the probabilites for Outliers only
        max_probs, _ = torch.max(probabilities, dim=1) # Take the maximum probabilities produced by softmax for In data only
        max_probs_oe, _ = torch.max(probabilities_oe, dim=1) # Take the maximum probabilities produced by softmax for Outliers only
        prob_diff_in = max_probs - A_tr # Use the training accuracy
        data_loss += args.lambda_1 * torch.sum(prob_diff_in**2) ## 1st Regularization term
        prob_diff_out = probabilities_oe - (1/(num_classes-1))
        oe_loss = args.lambda_2 * torch.sum(torch.abs(prob_diff_out)) ## 2nd Regularization term

        loss = data_loss + oe_loss

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        data_loss_ema = data_loss_ema * 0.9 + data_loss.data.cpu().numpy() * 0.1
        oe_loss_ema = oe_loss_ema * 0.9 + oe_loss.data.cpu().numpy() * 0.1

        if (batch_idx % 200 == 0 or batch_idx < 10):
            print('iter: {} \t| data_loss_ema: {} \t| oe_loss_ema: {}'.format(
                batch_idx, data_loss_ema, oe_loss_ema))
            f.write(f'\niter: {batch_idx} | data_loss_ema: {data_loss_ema:.4f} | oe_loss_ema: {oe_loss_ema:.4f}')

    scheduler.step()


def evaluate():
    model.eval()
    running_loss = 0
    num_examples = 0
    correct = 0
    conf = 0.0

    for batch_idx, batch in enumerate(iter(test_iter)):
        inputs = batch.text.t()
        labels = batch.label  - 1 
        inputs = inputs.cuda() # To convert to cuda
        labels = labels.cuda(non_blocking=True) # To convert to cuda
        logits = model(inputs)

        loss = F.cross_entropy(logits, labels, size_average=False)
        running_loss += loss.data.cpu().numpy()

        pred = logits.max(1)[1]
        correct += pred.eq(labels).sum().data.cpu().numpy()

        num_examples += inputs.shape[0]
        
        # avg confidence
        probs =F.softmax(logits, dim = 1)
        conf += torch.mean(torch.max(probs,1).values)

    acc = correct / num_examples
    loss = running_loss / num_examples
    avg_conf = conf / num_examples

    return acc, loss, avg_conf 

for epoch in range(args.epochs):
    print('Epoch', epoch)
    f.write(f'\nEpoch {epoch}')
    train()
    acc, loss, conf = evaluate()
    print(f'test acc: {acc:.3f} | test loss: {loss:.4f} | avg conf: {conf:.3f}\n')
    f.write(f'\ntest acc: {acc:.3f} | test loss: {loss:.4f}\n')

f.close()

torch.save(model.state_dict(), f'./{args.save}/{args.in_dist_dataset}/OECC/{args.oe_dataset}/model_finetune.dict')
print('Saved model.')

### Calibration

In [None]:
# ============================ SST ============================ #
# set up fields
TEXT_sst = data.Field(pad_first=True)
LABEL_sst = data.Field(sequential=False)

# make splits for data
train_sst, val_sst, test_sst = datasets.SST.splits(
    TEXT_sst, LABEL_sst, fine_grained=False, train_subtrees=False,
    filter_pred=lambda ex: ex.label != 'neutral')

# build vocab
TEXT_sst.build_vocab(train_sst, max_size=10000)
LABEL_sst.build_vocab(train_sst, max_size=10000)
print('vocab length for SST(including special tokens):', len(TEXT_sst.vocab))
num_classes = len(LABEL_sst.vocab)
print('num labels:', len(LABEL_sst.vocab))
# create our own iterator, avoiding the calls to build_vocab in SST.iters
train_iter_sst, val_iter_sst, test_iter_sst = data.BucketIterator.splits(
    (train_sst, val_sst, test_sst), batch_size=args.batch_size, repeat=False)


ood_num_examples = len(test_iter_sst.dataset) // 5
expected_ap = ood_num_examples / (ood_num_examples + len(test_iter_sst.dataset))
recall_level = 0.9

In [None]:
class ClfGRU(nn.Module):
    def __init__(self, num_classes):
        super().__init__()
        self.embedding = nn.Embedding(len(TEXT_sst.vocab), 50, padding_idx=1)
        self.gru = nn.GRU(input_size=50, hidden_size=128, num_layers=2, bias=True, batch_first=True, bidirectional=False)
        self.linear = nn.Linear(128, num_classes)
        self.num_classes = num_classes

    def forward(self, x):
        embeds = self.embedding(x)
        hidden = self.gru(embeds)[1][1]  # select h_n, and select the 2nd layer
        logits = self.linear(hidden)
        return logits


model = ClfGRU(num_classes-1)
model.load_state_dict(torch.load(f'./{args.load}/{args.in_dist_dataset}/OECC/{args.oe_dataset}/model_finetune.dict'))  # change location as per our method
print('\nLoaded model.\n')

In [19]:
def evaluate():
    model.eval()
    running_loss = 0
    num_examples = 0
    correct = 0
    acc_bm = []
    conf_bm= []
    for batch_idx, batch in enumerate(iter(test_iter_sst)):
        inputs = batch.text.t()
        labels = batch.label - 1
        
#         inputs, labels = inputs.cuda(), labels.cuda()
    
        logits = model(inputs)

        loss = F.cross_entropy(logits, labels, size_average=False)
        running_loss += loss.data.cpu().numpy()

        pred = logits.max(1)[1]
        runnning_acc = pred.eq(labels).sum().data.cpu().numpy()
        correct += pred.eq(labels).sum().data.cpu().numpy()

        num_examples += inputs.shape[0]
        
        # avg confidence
        probs = F.softmax(logits, dim=1)
        conf = torch.max(probs,dim=1).values.sum().item()
        
        acc_bm.append(runnning_acc/logits.shape[0])
        conf_bm.append(conf/logits.shape[0])

    acc = correct / num_examples
    loss = running_loss / num_examples

    return acc_bm, conf_bm, 1-acc, num_examples

In [None]:
acc, conf, err, n = evaluate()

In [None]:
err

In [None]:
bm = test_iter.batch_size
ece = (bm/n)*np.abs(np.subtract(acc, conf)).sum()
ece

In [None]:
mce =  max(np.abs(np.subtract(acc, conf))) 
mce