In [None]:
%reload_ext autoreload
%autoreload 2

import numpy as np
import sys
import os
import pickle
import argparse
import math
import time
from bisect import bisect_left
import torch
import torch.nn as nn
import torch.backends.cudnn as cudnn
import torchvision.transforms as trn
import torchvision.datasets as dset
import torch.nn.functional as F
from torch.autograd import Variable as V

from torchtext import data
from torchtext import datasets

import csv
csv.field_size_limit(sys.maxsize)

import tqdm
from tqdm import tqdm_notebook

In [None]:
np.random.seed(1)

args = argparse.Namespace(
    in_dist_dataset='sst', # 'sst', or 'trec', or '20ng'
    epochs=5,
    batch_size=64,
    learning_rate = 0.01,
    momentum= 0.5,
    test_bs = 256,
    save='results',
    load= 'results',
    test= 'store_true',
    mix='store_true',
    prefetch= 2,    
    )

In [None]:
# root='NLP_classification'
if args.in_dist_dataset == 'sst':
    # set up fields
    TEXT = data.Field(pad_first=True)
    LABEL = data.Field(sequential=False)

    # make splits for data
    train, val, test = datasets.SST.splits(
        TEXT, LABEL, fine_grained=False, train_subtrees=False,
        filter_pred=lambda ex: ex.label != 'neutral')

    # build vocab
    TEXT.build_vocab(train, max_size=10000)
    LABEL.build_vocab(train, max_size=10000)
    print('vocab length (including special tokens):', len(TEXT.vocab))
    num_classes = len(LABEL.vocab)
    print('num labels:', len(LABEL.vocab))
    # create our own iterator, avoiding the calls to build_vocab in SST.iters
    train_iter, val_iter, test_iter = data.BucketIterator.splits(
        (train, val, test), batch_size=args.batch_size, repeat=False)
    
elif args.in_dist_dataset == '20ng':
    
    TEXT = data.Field(pad_first=True, lower=True, fix_length=100)
    LABEL = data.Field(sequential=False)

    train = data.TabularDataset(path='20ng-train.txt',
                                     format='csv',
                                     fields=[('label', LABEL),('text', TEXT)])

    test = data.TabularDataset(path='20ng-test.txt',
                                     format='csv',
                                     fields=[('label', LABEL),('text', TEXT)])
    

    TEXT.build_vocab(train, max_size=10000)
    LABEL.build_vocab(train, max_size=10000)
    print('vocab length (including special tokens):', len(TEXT.vocab))
    num_classes = len(LABEL.vocab)
    print('num labels:', len(LABEL.vocab))
    train_iter = data.BucketIterator(train, batch_size=args.batch_size, repeat=False)
    test_iter = data.BucketIterator(test, batch_size=args.batch_size, repeat=False)
    
elif args.in_dist_dataset == 'trec':
    # set up fields
    TEXT = data.Field(pad_first=True, lower=True)
    LABEL = data.Field(sequential=False)

    # make splits for data
    train, test = datasets.TREC.splits(TEXT, LABEL, fine_grained=True)


    # build vocab
    TEXT.build_vocab(train, max_size=10000)
    LABEL.build_vocab(train, max_size=10000)
    print('vocab length (including special tokens):', len(TEXT.vocab))
    num_classes = len(LABEL.vocab)
    print('num labels:', len(LABEL.vocab))

    # make iterators
    train_iter, test_iter = data.BucketIterator.splits(
        (train, test), batch_size=args.batch_size, repeat=False)


In [9]:
class ClfGRU(nn.Module):
    def __init__(self, num_classes):
        super().__init__()
        self.embedding = nn.Embedding(len(TEXT.vocab), 50, padding_idx=1)
        self.gru = nn.GRU(input_size=50, hidden_size=128, num_layers=2,
            bias=True, batch_first=True, bidirectional=False)
        self.linear = nn.Linear(128, num_classes)

    def forward(self, x):
        embeds = self.embedding(x)
        hidden = self.gru(embeds)[1][1]  # select h_n, and select the 2nd layer
        logits = self.linear(hidden)
        return logits
    
    

cudnn.benchmark = True  # fire on all cylinders

model = ClfGRU(num_classes-1).cuda()

optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=args.epochs)

In [None]:
f = open(os.path.join(args.save, args.in_dist_dataset+ '/baseline/baseline_training_results.txt'), 'w')

def train():
    model.train()
    loss_ema = 0

    for batch_idx, batch in enumerate(iter(train_iter)):
        inputs = batch.text.t()
        labels = batch.label -1
        
        inputs = inputs.cuda()
        labels = labels.cuda()
        
        logits = model(inputs)
        loss = F.cross_entropy(logits, labels)

        optimizer.zero_grad()        
        loss.backward()
        optimizer.step()

        loss_ema = loss_ema * 0.9 + loss.data.cpu().numpy() * 0.1

        if batch_idx % 200 == 0:
            print(f'iter: {batch_idx} | loss_ema: {loss_ema}')
            f.write(f'\niter: {batch_idx} | loss_ema: {loss_ema:.4f} | ')
                                   

    scheduler.step()


def evaluate():
    model.eval()
    running_loss = 0
    num_examples = 0
    correct = 0

    for batch_idx, batch in enumerate(iter(test_iter)):
        inputs = batch.text.t()
        labels = batch.label - 1
        
        inputs, labels = inputs.cuda(), labels.cuda()
    
        logits = model(inputs)

        loss = F.cross_entropy(logits, labels, size_average=False)
        running_loss += loss.data.cpu().numpy()

        pred = logits.max(1)[1]
        correct += pred.eq(labels).sum().data.cpu().numpy()

        num_examples += inputs.shape[0]

    acc = correct / num_examples
    loss = running_loss / num_examples

    return acc, loss

                       
for epoch in range(args.epochs):
    print('Epoch', epoch)
    f.write(f'\nEpoch {epoch}')
    train()
    acc, loss = evaluate()
    print(f'test acc: {acc:.3f} | test loss: {loss:.4f}\n')
    f.write(f'test acc: {acc:.3f} | test loss: {loss:.4f}\n')

f.close()

torch.save(model.state_dict(), f'./{args.save}/{args.in_dist_dataset}/baseline/model.dict')
print('Saved model.')

### Calibration

In [10]:
class ClfGRU(nn.Module):
    def __init__(self, num_classes):
        super().__init__()
        self.embedding = nn.Embedding(len(TEXT.vocab), 50, padding_idx=1)
        self.gru = nn.GRU(input_size=50, hidden_size=128, num_layers=2,
            bias=True, batch_first=True,bidirectional=False)
        self.linear = nn.Linear(128, num_classes)

    def forward(self, x):
        embeds = self.embedding(x)
        hidden = self.gru(embeds)[1][1]  # select h_n, and select the 2nd layer
        logits = self.linear(hidden)
        return logits


model = ClfGRU(num_classes-1).cuda()  # change to match dataset

model.load_state_dict(torch.load(f'./{args.load}/{args.in_dist_dataset}/baseline/model.dict'))

optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=args.epochs)

In [20]:
def evaluate():
    model.eval()
    running_loss = 0
    num_examples = 0
    correct = 0
    acc_bm = []
    conf_bm= []
    for batch_idx, batch in enumerate(iter(test_iter)):
        inputs = batch.text.t()
        labels = batch.label - 1
        
        inputs, labels = inputs.cuda(), labels.cuda()
    
        logits = model(inputs)

        loss = F.cross_entropy(logits, labels, size_average=False)
        running_loss += loss.data.cpu().numpy()

        pred = logits.max(1)[1]
        runnning_acc = pred.eq(labels).sum().data.cpu().numpy()
        correct += pred.eq(labels).sum().data.cpu().numpy()

        num_examples += inputs.shape[0]
        
        # avg confidence
        probs = F.softmax(logits, dim=1)
        conf = torch.max(probs,dim=1).values.sum().item()
        
        acc_bm.append(runnning_acc/logits.shape[0])
        conf_bm.append(conf/logits.shape[0])

    acc = correct / num_examples
    loss = running_loss / num_examples

    return acc_bm, conf_bm, 1-acc, num_examples

In [21]:
acc, conf, err, n = evaluate()

In [None]:
err

In [None]:
bm = test_iter.batch_size
ece = (bm/n)*np.abs(np.subtract(acc, conf)).sum()
ece

In [None]:
mce =  max(np.abs(np.subtract(acc, conf))) 
mce