In [1]:
# Torch, Sklearn imports
import torch
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
from torch.utils.data import Dataset, DataLoader

In [2]:
# AllenNLP
import allennlp
from allennlp.modules.elmo import Elmo, batch_to_ids
from pytorch_pretrained_bert import BertTokenizer, BertModel, BertForMaskedLM
from allennlp.modules.token_embedders import BertEmbedder

print("PyTorch: {}".format(torch.__version__))
print("AllenNLP: {}".format(allennlp.__version__))
if torch.cuda.is_available():
    device = torch.cuda.current_device()
    print("Using CUDA device: {}".format(device))
    print("- # GPU device: {}".format(torch.cuda.device_count()))
    print("- Device Name: {}".format(torch.cuda.get_device_name(device)))
    print("- Device Proprierties: {}".format(torch.cuda.get_device_properties(device)))

PyTorch: 1.0.0.dev20190206
AllenNLP: 0.8.1
Using CUDA device: 0
- # GPU device: 1
- Device Name: GeForce GTX 1080 Ti
- Device Proprierties: _CudaDeviceProperties(name='GeForce GTX 1080 Ti', major=6, minor=1, total_memory=11175MB, multi_processor_count=28)


In [8]:
## General libs
import numpy as np
import pandas as pd
from string import punctuation
import os, re, sys, json, requests, pickle

## Sklearn
from sklearn.model_selection import train_test_split

In [5]:
with open("dataset/train_elmos.pkl","rb") as f:
    train_elmos, train_labels = pickle.load(f)

In [6]:
with open("dataset/test_elmos.pkl","rb") as f:
    test_elmos, test_labels = pickle.load(f)

In [7]:
train_elmos.shape, train_labels.shape, test_elmos.shape, test_labels.shape

((68916, 1024), (68916,), (2140, 1024), (2140,))

## Train/Valid Split

In [9]:
x_train, x_valid, y_train, y_valid = train_test_split(train_elmos, train_labels, test_size=0.20, random_state=42)
x_train.shape, y_train.shape

((55132, 1024), (55132,))

In [10]:
class IntentsPrecomp(Dataset):
    def __init__(self, X, Y):
        self.len = len(X)
        self.data = X
        self.label = Y
        
    def __getitem__(self, index):
        X = torch.tensor(self.data[index], dtype=torch.float32)
        y = torch.tensor(self.label[index], dtype=torch.int64)
        return X, y
    
    def __len__(self):
        return self.len

In [11]:
training_set = IntentsPrecomp(x_train, y_train)
validing_set = IntentsPrecomp(x_valid, y_valid)

## Simple MLP Classifier

In [12]:
class SimpleMLP(nn.Module):
    def __init__(self, inputdim, 
                        nclasses, 
                        nhidden, 
                        dropout = 0):
        super(SimpleMLP, self).__init__()
        """
        PARAMETERS:
        -dropout:    dropout for MLP
        """
        
        self.inputdim = inputdim
        self.hidden_dim = nhidden
        self.dropout = dropout
        self.nclasses = nclasses
        self.model = nn.Sequential(
            nn.Linear(self.inputdim, nhidden),
            nn.Dropout(p=self.dropout),
            nn.ReLU(),
            nn.Linear(nhidden, self.nclasses),
            )
        if torch.cuda.is_available():
            self.model = self.model.cuda()
    def forward(self, x):
        log_probs = self.model(x)
        return log_probs

In [19]:
INP_DIM = train_elmos.shape[1]
NUM_LABELS = len((set(test_labels)))
NHIDDEN = 2048
DROPOUT = 0

In [20]:
model = SimpleMLP(inputdim = INP_DIM ,
              nhidden = NHIDDEN,
              nclasses = NUM_LABELS,
              dropout = DROPOUT)

if torch.cuda.is_available():
    device = torch.device("cuda:0")
    model = model.to(device)
    model.cuda()

In [21]:
samp = training_set.__getitem__(0)[0].cuda()
model.forward(samp)

tensor([ 0.0058,  0.0202,  0.0230,  0.0220, -0.0164], device='cuda:0',
       grad_fn=<AddBackward0>)

### Training

In [22]:
# Dataloaders Parameters
params = {'batch_size': 64,
          'shuffle': True,
          'num_workers': 0}
max_epochs = 30

train_loader = DataLoader(training_set, **params)
valid_loader = DataLoader(validing_set, **params)
# Hyperparams
loss_function = nn.CrossEntropyLoss()
learning_rate = 0.0001 
optimizer = optim.Adam(params =  model.parameters(), lr=learning_rate)

In [23]:
for epoch in range(max_epochs):
    print("EPOCH -- {}".format(epoch))
    for i, (sent, label) in enumerate(train_loader):
        optimizer.zero_grad() 
        if torch.cuda.is_available():
            sent = sent.cuda()
            label = label.cuda()
        output = model.forward(sent)
        loss = loss_function(output, label)
        loss.backward()
        optimizer.step()
        if i%1000 == 0:      
            correct = 0
            total = 0
            for sent, label in valid_loader:      
                if torch.cuda.is_available():
                    sent = sent.cuda()
                    label = label.cuda()
                output = model.forward(sent)
                _, predicted = torch.max(output.data, 1)
                total += label.size(0)
                correct += (predicted.cpu() == label.cpu()).sum()
            accuracy = 100.00 * correct.numpy() / total
            print('LOSS: {}. VALID ACCURACY: {}%'.format(loss.data, accuracy))

EPOCH -- 0
LOSS: 1.6059281826019287. VALID ACCURACY: 28.605629715612306%
EPOCH -- 1
LOSS: 1.1538652181625366. VALID ACCURACY: 50.68195008705746%
EPOCH -- 2
LOSS: 1.1874431371688843. VALID ACCURACY: 53.1485780615206%
EPOCH -- 3
LOSS: 1.083764910697937. VALID ACCURACY: 53.525827045850264%
EPOCH -- 4
LOSS: 0.9412069320678711. VALID ACCURACY: 54.432675565873474%
EPOCH -- 5
LOSS: 0.9073969721794128. VALID ACCURACY: 54.4181659895531%
EPOCH -- 6
LOSS: 1.0376415252685547. VALID ACCURACY: 54.62130005803831%
EPOCH -- 7
LOSS: 1.0445117950439453. VALID ACCURACY: 54.904236796285545%
EPOCH -- 8
LOSS: 1.0572164058685303. VALID ACCURACY: 55.383052814857805%
EPOCH -- 9
LOSS: 1.002320408821106. VALID ACCURACY: 55.390307603017995%
EPOCH -- 10
LOSS: 1.0465998649597168. VALID ACCURACY: 55.46285548461985%
EPOCH -- 11
LOSS: 1.1100976467132568. VALID ACCURACY: 55.724027858386535%
EPOCH -- 12
LOSS: 0.8420049548149109. VALID ACCURACY: 55.62971561230412%
EPOCH -- 13
LOSS: 0.8491542935371399. VALID ACCURACY: 56.0

### Checking Test Error

In [24]:
correct = 0
total = 0
for i, utt in enumerate(test_elmos):
    X = torch.tensor(utt, dtype=torch.float32).cuda()
    y = torch.tensor(test_labels[i], dtype=torch.int64)
    logits_out = model(X)
    softmax_out = F.softmax(logits_out, dim=0).cpu()
    _, pred_label = torch.max(softmax_out.data, 0)
    total +=1
    if pred_label == y:
        correct += 1
test_accuracy = 100.00 * correct / total
print("TEST ACCURACY  -- {}".format(test_accuracy))

TEST ACCURACY  -- 46.02803738317757


## Implementing Augmentation Class

In [25]:
%matplotlib inline
from torch.nn.functional import interpolate
from sklearn.manifold import TSNE
import matplotlib.pyplot as plt

In [None]:
def get_linear_interpolation(label, num_interp_samples, return_all_points=False):
    sentences= list(train_pruned.clean_text[train_dataset.label == label])
    points = np.zeros((elmo.get_output_dim(),len(sentences)))    
    
    for i,utt in enumerate(sentences):
        points[:,i] = get_elmo(utt).detach().cpu().clone().numpy()
    point = torch.tensor(points)
    x = point.unsqueeze(dim=0) 
    
    ## Random selector for which interpolated phrase to pick
    rand_phrase = np.random.randint(num_interp_samples, size = 1)
    
    ## Interpolate phrases
    interp = interpolate(x, size=(num_interp_samples), mode='linear', align_corners=True).squeeze(0).numpy().T
    
    if return_all_points == False:
        interp =interp[rand_phrase].squeeze(0)      ## Pick Randomly 1 point sample
    return interp