In [61]:
# Load the libraries
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from sklearn.datasets import load_breast_cancer
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
import seaborn as sns
from scipy.io import loadmat
from sklearn import metrics
from pylab import *
import sklearn.preprocessing


import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

max_len = 400
embed_size = 24
hidden_size = 11
batch_size = 24
device = torch.device('cpu') # can be changed to cuda if available

In [62]:
# load data
# Load dev data
d = loadmat('data_ivector_embeddings.mat') # load the data
data2 = d['devIVs']

# define X 
inputs = data2.transpose()

# define Y
labels = d['labels']
labels = labels[:,0]

In [63]:
# create dataframe which contains the data with the labels
df = pd.DataFrame(list(zip(inputs,labels)),columns = ["data","label"])
df.head()
# print head of dataframe
df.shape

(192, 2)

In [64]:
# Define X and Y variables
X_data = list(df['data'])
Y_data = df['label']

In [65]:
X = torch.zeros(192, 400)
Y = torch.zeros(192,)

X = torch.LongTensor(X_data)

# Create a tensor for Y
Y = torch.LongTensor(Y_data)

# Provided
X = X.to(device)
Y = Y.to(device)

In [66]:
X_train = X[:96] 
X_valid = X[-96:]

Y_train = Y[:96]
Y_valid = Y[-96:]

In [67]:
from torch.utils.data import TensorDataset, DataLoader

train_set = TensorDataset(X_train, Y_train)
valid_set = TensorDataset(X_valid, Y_valid)

train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True)
valid_loader = DataLoader(valid_set, batch_size=batch_size)

In [68]:
class RNN(nn.Module):
  
        
    def __init__(self):
        super().__init__()
        
        # Here we define the network layers
        
        # An embedding layer projecting vectors of size vocab_size into embeddings of size embed_size
        # Assigns to each word in the vocabulary an embedding of size embed_size
        self.embed = nn.Embedding(11, embed_size)
        
        # A recurrent (GRU) layer to process each input token (represented by its embedding)
        # The GRU network takes as input the embedding (of size embed_size) of the current word 
        # and the previous hidden state (of size hidden_size)
        self.rnn = nn.GRU(embed_size, hidden_size, num_layers=1, bidirectional=False, batch_first=True)
        
        # Drop out layer for regularisation
        self.dropout = nn.Dropout(0.3)
        
        # Fully connected layer mapping 
        # the last layer maps a hidden state to a vector of size the number of classes
        self.decision = nn.Linear(hidden_size, len(set(Y_data)))
         
    def forward(self, x):
        # Here we say how the layers are connected 
       
        #  for each token in the input, retrieve the corresponding embeddings 
        
        # x = [batch size, input size]
        embed = self.embed(x)
        # embed = [batch size, sent len, emb dim]       
       
        # Run the RNN on the input embeddings
        # output is the sequence of hidden states produced by the RNN
        # hidden is the last hidden state produced
        output, hidden = self.rnn(embed)
        
        # output = [sent len, batch size, hidden size]
        # hidden = [num_layers * num_directions, batch size, hidden_size ]
        
        # Apply dropout (for regularisation)
        drop = self.dropout(hidden)
        
        # drop = [num_layers * num_directions, batch size, hidden_size]   
        
        # Apply the fully connected layer to the output of the dropout
        # Expected input size: [batch_size, input_size]
        # We transpose [num_layers * num_directions, batch size, hidden_size ]
        # to: [batch size, num_layers * num_directions, hidden_size ]
        # And apply view to create an input of the form [batch size, input_size ]
        # (x.size(0) = batch size)
        return self.decision(drop.transpose(0, 1).view(x.size(0), -1))
    
rnn_model = RNN()
rnn_model.to(device)

RNN(
  (embed): Embedding(11, 24)
  (rnn): GRU(24, 11, batch_first=True)
  (dropout): Dropout(p=0.3)
  (decision): Linear(in_features=11, out_features=11, bias=True)
)

In [69]:
def perf(model, loader):
    # define the loss
    criterion = nn.CrossEntropyLoss()
    # No drop out
    model.eval()
    total_loss = correct = num = 0
    for x, y in loader:
    # No gradient computation, weights remain unchanged
      with torch.no_grad():
        # Compute the scores for the instances in the input batch
        y_scores = model(x)
        # Compute the loss
        loss = criterion(y_scores, y)
        # Compute the predictions
        y_pred = torch.max(y_scores, 1)[1]
        # Compares the predictions witht the expected values
        correct += torch.sum(y_pred.data == y)
        # Update the batch loss
        total_loss += loss.item()
        num += len(y)
    return total_loss / num, correct.item() / num

In [70]:
def fit(model, epochs):
    # Define the loss function to be used (Cross Entropy)
    criterion = nn.CrossEntropyLoss()
    # Define the optimiser (Adam)
    # add gradients to all parameters (required by pytorch for training)
    optimizer = optim.Adam(filter(lambda param: param.requires_grad, model.parameters()))
    # Iterate over epochs (i.e., slices of the data)
    for epoch in range(epochs):
        # Set the module in training mode 
        model.train()
        # Initialise the loss to 0
        total_loss = num = 0
        # Iterate over batches of (x,y) pairs in the training data
        for x, y in train_loader:
            # null the gradients 
            optimizer.zero_grad()
            # predict labels for the batch
            y_scores = model(x)
            # calculate the loss
            loss = criterion(y_scores, y)
            # Back propagate
            loss.backward()
            # Adjust the weights
            optimizer.step()
            # Update the total loss
            total_loss += loss.item()
            num += len(y)
        # For each epoch print out the nb of the epoch, the average loss on the training and the valid data 
        # and the task score
        print(epoch, total_loss / num, *perf(model, valid_loader))

In [72]:
rnn_model = RNN()
fit(rnn_model, 1)

RuntimeError: index out of range at /opt/conda/conda-bld/pytorch-cpu_1556653114183/work/aten/src/TH/generic/THTensorEvenMoreMath.cpp:193