In [1]:
# Torch, Sklearn imports
import torch
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
from torch.utils.data import Dataset, DataLoader
print("PyTorch: {}".format(torch.__version__))
if torch.cuda.is_available():
    device = torch.cuda.current_device()
    print("Using CUDA device: {}".format(device))
    print("- # GPU device: {}".format(torch.cuda.device_count()))
    print("- Device Name: {}".format(torch.cuda.get_device_name(device)))
    print("- Device Proprierties: {}".format(torch.cuda.get_device_properties(device)))

PyTorch: 1.0.0.dev20190206
Using CUDA device: 0
- # GPU device: 1
- Device Name: GeForce GTX 1080 Ti
- Device Proprierties: _CudaDeviceProperties(name='GeForce GTX 1080 Ti', major=6, minor=1, total_memory=11175MB, multi_processor_count=28)


In [2]:
## General libs
import numpy as np
import pandas as pd
from string import punctuation
import os, re, sys, json, requests, pickle

## Sklearn
from sklearn.model_selection import train_test_split

In [3]:
with open("dataset/train_elmos.pkl","rb") as f:
    train_elmos, train_labels = pickle.load(f)

In [4]:
with open("dataset/test_elmos.pkl","rb") as f:
    test_elmos, test_labels = pickle.load(f)

In [5]:
train_elmos.shape, train_labels.shape, test_elmos.shape, test_labels.shape

((68916, 1024), (68916,), (2140, 1024), (2140,))

## Train/Valid Split

In [6]:
x_train, x_valid, y_train, y_valid = train_test_split(train_elmos, train_labels, test_size=0.20, random_state=42)
x_train.shape, y_train.shape

((55132, 1024), (55132,))

In [7]:
class IntentsPrecomp(Dataset):
    def __init__(self, data_x, data_y):
        self.len = len(data_x)
        self.data = data_x
        self.label = data_y
        
    def __getitem__(self, index):
        X = torch.tensor(self.data[index], dtype=torch.float32)
        y = torch.tensor(self.label[index], dtype=torch.int64)
        return X, y
    
    def __len__(self):
        return self.len

In [8]:
training_set = IntentsPrecomp(x_train, y_train)
validing_set = IntentsPrecomp(x_valid, y_valid)

In [9]:
idx = 0
a,b = training_set.__getitem__(idx)

In [10]:
a, b

(tensor([-0.0111,  0.0017,  0.0153,  ...,  0.0377,  0.0142, -0.0148]),
 tensor(3))

## Simple MLP Classifier

In [11]:
class SimpleMLP(nn.Module):
    def __init__(self, inputdim, 
                        nclasses, 
                        nhidden, 
                        dropout = 0):
        super(SimpleMLP, self).__init__()
        """
        PARAMETERS:
        -dropout:    dropout for MLP
        """
        
        self.inputdim = inputdim
        self.hidden_dim = nhidden
        self.dropout = dropout
        self.nclasses = nclasses
        self.model = nn.Sequential(
            nn.Linear(self.inputdim, nhidden),
            nn.Dropout(p=self.dropout),
            nn.ReLU(),
            nn.Linear(nhidden, self.nclasses),
            )
        if torch.cuda.is_available():
            self.model = self.model.cuda()
    def forward(self, x):
        log_probs = self.model(x)
        return log_probs

In [12]:
INP_DIM = train_elmos.shape[1]
NUM_LABELS = len((set(test_labels)))
NHIDDEN = 1024
DROPOUT = 0.2

In [None]:
model = SimpleMLP(inputdim = INP_DIM ,
              nhidden = NHIDDEN,
              nclasses = NUM_LABELS,
              dropout = DROPOUT)

if torch.cuda.is_available():
    device = torch.device("cuda:0")
    model = model.to(device)
    model.cuda()

In [None]:
samp = training_set.__getitem__(0)[0].cuda()
model.forward(samp)

### Training

In [174]:
# Dataloaders Parameters
params = {'batch_size': 32,
          'shuffle': True,
          'num_workers': 0}
max_epochs = 45
learning_rate = 0.0001 

In [173]:
train_loader = DataLoader(training_set, **params)
valid_loader = DataLoader(validing_set, **params)
loss_function = nn.CrossEntropyLoss()
optimizer = optim.Adam(params =  model.parameters(), lr=learning_rate)

NameError: name 'model' is not defined

In [None]:
for epoch in range(max_epochs):
    print("EPOCH -- {}".format(epoch))
    for i, (sent, label) in enumerate(train_loader):
        optimizer.zero_grad() 
        if torch.cuda.is_available():
            sent = sent.cuda()
            label = label.cuda()
        output = model.forward(sent)
        loss = loss_function(output, label)
        loss.backward()
        optimizer.step()
        if i%1000 == 0:      
            correct = 0
            total = 0
            for sent, label in valid_loader:      
                if torch.cuda.is_available():
                    sent = sent.cuda()
                    label = label.cuda()
                output = model.forward(sent)
                _, predicted = torch.max(output.data, 1)
                total += label.size(0)
                correct += (predicted.cpu() == label.cpu()).sum()
            accuracy = 100.00 * correct.numpy() / total
            print('LOSS: {}. VALID ACCURACY: {}%'.format(loss.data, accuracy))

### Checking Test Error

In [None]:
correct = 0
total = 0
for i, utt in enumerate(test_elmos):
    X = torch.tensor(utt, dtype=torch.float32).cuda()
    y = torch.tensor(test_labels[i], dtype=torch.int64)
    logits_out = model(X)
    softmax_out = F.softmax(logits_out, dim=0).cpu()
    _, pred_label = torch.max(softmax_out.data, 0)
    total +=1
    if pred_label == y:
        correct += 1
test_accuracy = 100.00 * correct / total
print("TEST ACCURACY  -- {}".format(test_accuracy))

## Implementing Augmentation Class

In [175]:
%matplotlib inline
from torch.nn.functional import interpolate
from sklearn.manifold import TSNE
import matplotlib.pyplot as plt

In [176]:
def get_linear_interpolation(interp_label, 
                             dataset_data, 
                             dataset_labels, 
                             num_interp_points=30,        # number of points used to interpolate
                             num_interp_samples=1,
                             return_all_points = False):       # number of outpout interpolated points
    idx = np.where(dataset_labels==interp_label)[0]
    interp_idx = np.random.choice(idx, num_interp_points)
    interp_points = torch.tensor(dataset_data[interp_idx].T).unsqueeze(dim=0) 
    #print(interp_points.shape)
    
    ## Random selector for which interpolated phrase to pick
    rand_phrase = np.random.randint(num_interp_samples, size = 1)
    interp = interpolate(interp_points, size=(num_interp_samples), mode='linear', align_corners=True).squeeze(0).numpy().T
    #print(interp.shape)
    
    if return_all_points == False:
        interp =interp[rand_phrase]      ## Pick Randomly 1 point sample
    return interp.squeeze(0).T

In [177]:
get_linear_interpolation(4, train_elmos, train_labels, 
                         num_interp_points = 40, 
                         num_interp_samples= 1,
                         return_all_points = False).shape

(1024,)

In [178]:
class AugmentedIntents(Dataset):
    def __init__(self, data_x, data_y , num_interp_points, num_samples, alpha, augmentation = True):
        self.len = len(data_x)
        self.data = data_x
        self.label = data_y
        self.augmentation = augmentation
        self.num_interp_points = num_interp_points,  ## number of points used in interpolation
        self.num_samples = num_samples               ## number of samples drawn from interpolation
        self.alpha = alpha                           ## probabilty to get a real phrase vs interpolated phrase
        
    def __getitem__(self, index):
        y = torch.tensor(self.label[index], dtype=torch.int64)
        if self.augmentation:
            proba = np.random.binomial(1, self.alpha )
            if proba == 1:
                X = torch.tensor(self.data[index], dtype=torch.float32)
            elif proba == 0:
                #print("Interpolada")
                X =  torch.tensor(get_linear_interpolation(y.cpu().numpy(), 
                                              self.data, 
                                              self.label, 
                                              self.num_interp_points, 
                                              self.num_samples,
                                              return_all_points = False), dtype=torch.float32)
        else:
            X = torch.tensor(self.data[index], dtype=torch.float32)
        return X, y
    
    def __len__(self):
        return self.len

In [179]:
num_samples = 1
num_interp_points = 2000
alpha = 0.5
training_set = AugmentedIntents(x_train, y_train, num_interp_points, num_samples, alpha, augmentation = True)
validing_set = AugmentedIntents(x_valid, y_valid, num_interp_points, num_samples, alpha, augmentation = False)

In [180]:
idx = 0
a,b = training_set.__getitem__(idx)

In [181]:
model_2 = SimpleMLP(inputdim = INP_DIM ,
              nhidden = NHIDDEN,
              nclasses = NUM_LABELS,
              dropout = DROPOUT)

if torch.cuda.is_available():
    device = torch.device("cuda:0")
    model_2 = model_2.to(device)
    model_2.cuda()

In [182]:
samp = training_set.__getitem__(0)[0].cuda()
model_2.forward(samp)

tensor([ 0.0135,  0.0135,  0.0056, -0.0461, -0.0104], device='cuda:0',
       grad_fn=<AddBackward0>)

In [183]:
train_loader = DataLoader(training_set, **params)
valid_loader = DataLoader(validing_set, **params)
loss_function = nn.CrossEntropyLoss()
optimizer = optim.Adam(params =  model_2.parameters(), lr=learning_rate)

In [None]:
for epoch in range(max_epochs):
    print("EPOCH -- {}".format(epoch))
    for i, (sent, label) in enumerate(train_loader):
        optimizer.zero_grad() 
        if torch.cuda.is_available():
            sent = sent.cuda()
            label = label.cuda()
        output = model_2.forward(sent)
        loss = loss_function(output, label)
        loss.backward()
        optimizer.step()
        if i%1000 == 0:      
            correct = 0
            total = 0
            for sent, label in valid_loader:      
                if torch.cuda.is_available():
                    sent = sent.cuda()
                    label = label.cuda()
                output = model_2.forward(sent)
                _, predicted = torch.max(output.data, 1)
                total += label.size(0)
                correct += (predicted.cpu() == label.cpu()).sum()
            accuracy = 100.00 * correct.numpy() / total
            print('LOSS: {}. VALID ACCURACY: {}%'.format(loss.data, accuracy))

EPOCH -- 0
LOSS: 1.6198681592941284. VALID ACCURACY: 11.302959953569356%
LOSS: 1.309521198272705. VALID ACCURACY: 49.6445153801509%
EPOCH -- 1
LOSS: 1.033869981765747. VALID ACCURACY: 51.574289030760305%
LOSS: 0.985174834728241. VALID ACCURACY: 52.15467208357516%
EPOCH -- 2
LOSS: 1.0202715396881104. VALID ACCURACY: 52.55368543238537%
LOSS: 1.0046827793121338. VALID ACCURACY: 53.199361578641906%
EPOCH -- 3
LOSS: 0.9870170950889587. VALID ACCURACY: 53.279164248403944%
LOSS: 1.3163374662399292. VALID ACCURACY: 53.49680789320952%
EPOCH -- 4
LOSS: 1.0523302555084229. VALID ACCURACY: 54.106210098665116%
LOSS: 1.2577253580093384. VALID ACCURACY: 53.81601857225769%
EPOCH -- 5
LOSS: 1.267742395401001. VALID ACCURACY: 54.07719094602437%
LOSS: 1.1621816158294678. VALID ACCURACY: 54.14248403946605%
EPOCH -- 6
LOSS: 1.0908269882202148. VALID ACCURACY: 54.30934416715032%
LOSS: 1.178288459777832. VALID ACCURACY: 54.40365641323273%
EPOCH -- 7
LOSS: 0.9422082901000977. VALID ACCURACY: 54.28032501450958

In [171]:
correct = 0
total = 0
for i, utt in enumerate(test_elmos):
    X = torch.tensor(utt, dtype=torch.float32).cuda()
    y = torch.tensor(test_labels[i], dtype=torch.int64)
    logits_out = model_2(X)
    softmax_out = F.softmax(logits_out, dim=0).cpu()
    _, pred_label = torch.max(softmax_out.data, 0)
    total +=1
    if pred_label == y:
        correct += 1
test_accuracy = 100.00 * correct / total
print("TEST ACCURACY  -- {}".format(test_accuracy))

TEST ACCURACY  -- 47.47663551401869


| Alpha (Real/Sintetic) | Num Samples Interpolated | Valid Accuracy % | Test Accuracy % | Epochs | Mini-Batch Size |
| --- | --- | --- | --- | -- | -- | 
| <font color='red'> 1 (no augmentation) </font> |  -  | <font color='red'> 58.31  </font>| <font color='red'> 46.02  </font> |  30  | 32 |
| 0.5 | 10  | 57.1 | 46.8 |  30  | 32 |
| 0.7 | 10  | 57.57 | 45.88 |  30  | 32 |
| 0.9 | 10  | 57.52 | 46.40 |  30  | 32 |
| 0.5 | 30  | 57.41 | 45.74 |  30  | 32 |
| 0.7 | 30  | 58.26 | 46.12 |  30  | 32 |
| 0.9 | 30  | 57.63 | 45.65 |  30  | 32 |
| 0.5 | 50  | 58.17 | 46.12 |  30  | 32 |
| 0.7 | 50  | 57.98 | 46.02 |  30  | 32 |
| 0.9 | 50  | 57.61 | 47.24 |  30  | 32 |
| 0.5 | 100  | 57.50 | 46.26 |  30  | 32 |
| 0.7 | 100  | 57.70 | 47.24 |  30  | 32 |
| 0.9 | 100  | 57.61 | 46.30 |  30  | 32 |
| 0.5 | 300  | 57.65 | 46.54 | 30 | 32 |
| 0.7 | 300  | 57.85 | 47.00 | 30  | 32 |
| 0.9 | 300  |  57.63 | **47.57** | 30 | 32 |
| 0.5 | 500  | 57.55 | 47.10 | 30 | 32 |
| 0.7 | 500  | 57.88 | 47.05 | 30 | 32 |
| 0.9 | 500  | 57.34 | 47.47 | 30  | 32 |
| 0.5 | 2000  |  |  | 45  | 32 |
| 0.7 | 2000  |  |  | 45  | 32 |
| 0.9 | 2000  |  |  | 45  | 32 |