First I import the packages I need



In [None]:
import json
import torch
import pandas as pd
import numpy as np
import sklearn

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"

In [None]:
from torch import nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader

Here I define my dependent variables (the predictions of valence & arousal) so Y (https://machinelearningmastery.com/multi-label-classification-with-deep-learning/)

In [None]:
def load_dataframe(filename):
  dataframe = pd.read_json(filename)
  dataframe = dataframe.T
  #print(dataframe.head)
  return dataframe

In [None]:
def loadgroundtruth(dataframe):
  #I first zip values of valence and arousal together 
  Y = list(zip(dataframe["valence"], dataframe["activation"]))
  Y = [ torch.Tensor(datapoint, device=device) for datapoint in Y]
  return Y

In [None]:
def load_features(dataframe):
  X = dataframe["features"]
  X = [torch.Tensor(datapoint, device=device) for datapoint in X]
  return X

In [None]:
train_df = load_dataframe('/content/drive/MyDrive/DeepLProject/train.json')
dev_df = load_dataframe('/content/drive/MyDrive/DeepLProject/dev.json')

train_X = load_features(train_df)
train_Y = loadgroundtruth(train_df)
dev_X = load_features(dev_df)

Next I split X and Y into training and testing sets with sklearn

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
#max_length = max(len(row) for row in X)
#max_cols = max([len(row) for batch in X for row in batch])
#max_rows = max([len(batch) for batch in X])
#padded = [batch + [[0] * (max_cols)] * (max_rows - len(batch)) for batch in X]
#padded = torch.tensor([row + [0] * (max_cols - len(row)) for batch in padded for row in batch])
#padded = padded.view(-1, max_rows, max_cols)
#all matrices now have same length 1707 like biggest one

In [None]:
X = [torch.tensor(i) for i in X]
Y = [torch.tensor(i, dtype=float) for i in Y]

In [None]:
print(X[0]) #list of tensors
print(X[0].shape)

tensor([[5.5028, 5.3896, 5.8908,  ..., 8.2201, 6.8112, 6.4579],
        [4.6536, 6.8838, 5.5860,  ..., 7.7556, 6.9207, 6.1452],
        [5.0233, 4.1029, 6.8853,  ..., 7.4144, 7.2947, 7.0197],
        ...,
        [5.7946, 4.0957, 5.4560,  ..., 8.0678, 6.3310, 6.3012],
        [6.0561, 6.9045, 7.7520,  ..., 8.1527, 6.9558, 6.7917],
        [2.3150, 1.9837, 1.4823,  ..., 2.9754, 1.7627, 2.2844]])
torch.Size([147, 26])


In [None]:
print(Y[0])

tensor([0., 1.], dtype=torch.float64)


In [None]:
X_train, X_dev, Y_train, Y_dev = train_test_split(X, Y, test_size=0.33, random_state=0)
#random state is a hyperparameter that controls the shuffling process. With random state set to zero we get the same train and 
#test sets across different executions 
#in this case the test set is the dev set

In [None]:
#https://colab.research.google.com/drive/1DtQzLzwg9oXm_TCZeG9FJsOxdzE2wM3T?usp=sharing


In [None]:
#I create a class
class RNN(nn.Module):
  #define the constructor
    def __init__(self, input_size, hidden_size, num_layers, num_classes) -> None:
        super(RNN,self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        #self.seq_length = seq_length 
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=False)
        #This line creates a module for a linear transformation, 𝑥𝐖+𝑏xW+b
        #fc means fully connected 
        self.fc = nn.Linear(hidden_size, num_classes) #(hidden_size*2, num_classes) if i wanted bidirection

    #PyTorch networks created with nn.Module must have a forward method. It will take in a tensor and pass it
    #through the operations that I have defined in the __init__ method 
    
    def forward(self, x):
        sequence_length, n_features = x.shape
          #forward propagation of the input through LSTM
        out, _ = self.lstm(x) #lstm with input, hidden, and internal state
        # out = out.reshape(out.shape[0], -1) #reshaping the data for Dense Layer next
        # Decode the hidden state of the last time step
        out = self.fc(out[-1])
        return out

the lstm will only accept one entry of our sequence at a time
so we need to iterate voer the first dimension of the data matrix, 
and pass this to the lstm while updating the, new freshly generated hidden and cell states
one application of the lstm gives somthing like, (out, (h1,c1)) = self.lstm(x[0], (h0, c0))

sequence_length, n_features = x.shape # the shape gives us the size of each dimension of the matrix

The schedule your learning rate is going to follow is a major hyperparameter that you want to tune. PyTorch provides support for scheduling learning rates with it’s torch.optim.lr_scheduler module which has a variety of learning rate schedules. The following example demonstrates one such example.

In [None]:
#here I initialize the model and check how the architecture is represented
model = RNN(input_size=26,hidden_size=64, num_layers=1, num_classes=2).to(device)
model

RNN(
  (lstm): LSTM(26, 64)
  (fc): Linear(in_features=64, out_features=2, bias=True)
)

In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr = 0.003)

In [None]:
#Training the network
for epoch in range(1000):
    for x, y in zip(X_train, Y_train):
        
        #Get data to cuda
        x = x.to(device=device)
        y = y.to(device=device)

        #forward
        scores = model(x)
        loss = criterion(scores, y)
        print(loss)
        
        #backward
        optimizer.zero_grad()
        loss.backward()

        #gradient descent
        optimizer.step()

tensor(1.4456, dtype=torch.float64, grad_fn=<DivBackward1>)
tensor(0.4983, dtype=torch.float64, grad_fn=<DivBackward1>)
tensor(-0., dtype=torch.float64, grad_fn=<DivBackward1>)
tensor(0.5247, dtype=torch.float64, grad_fn=<DivBackward1>)
tensor(1.4152, dtype=torch.float64, grad_fn=<DivBackward1>)
tensor(-0., dtype=torch.float64, grad_fn=<DivBackward1>)
tensor(0.5562, dtype=torch.float64, grad_fn=<DivBackward1>)
tensor(0.5611, dtype=torch.float64, grad_fn=<DivBackward1>)
tensor(0.8458, dtype=torch.float64, grad_fn=<DivBackward1>)
tensor(-0., dtype=torch.float64, grad_fn=<DivBackward1>)
tensor(1.4034, dtype=torch.float64, grad_fn=<DivBackward1>)
tensor(0.5762, dtype=torch.float64, grad_fn=<DivBackward1>)
tensor(0.5757, dtype=torch.float64, grad_fn=<DivBackward1>)
tensor(0.5706, dtype=torch.float64, grad_fn=<DivBackward1>)
tensor(1.3868, dtype=torch.float64, grad_fn=<DivBackward1>)
tensor(0.5609, dtype=torch.float64, grad_fn=<DivBackward1>)
tensor(-0., dtype=torch.float64, grad_fn=<DivBack

KeyboardInterrupt: ignored

In [None]:

def check_accuracy(loader, model):
    num_correct = 0
    num_samples = 0
    model.eval()

    with torch.no_grad():
        for x,y in loader:
            x = x.to(device=device).squeeze(1)
            y = y.to(device=device)
            # x = x.reshape(x.shape[0], -1)

            scores = model(x)
            _, predictions = scores.max(1)
            num_correct += (predictions == y).sum()
            num_samples += predictions.size(0)
        print(f'Got {num_correct} / {num_samples} with accuracy {float(num_correct)/float(num_samples)*100:.2f}')
    model.train()
    

check_accuracy(train_loader, model)
check_accuracy(test_loader, model)