First, connect to my own drive. This has the data.py file in it.

In [41]:
from google.colab import drive
drive.mount('/content/drive')
import sys
sys.path.insert(0,'/content/drive/My Drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


Import some libraries.

In [42]:
#libraries
import plotly.graph_objects as go 
from plotly.subplots import make_subplots
from plotly.offline import iplot,init_notebook_mode
import torch #cpu only version
import torchvision
import torchvision.transforms as transforms
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import numpy as np
import torch
from copy import deepcopy


Install extra dependency.

In [43]:
!pip install wget
from data import *




Loading in the imdb data set.


In [44]:
(x_train, y_train), (x_val, y_val) , (i2w, w2i), numcls =  load_imdb(final = False, char = False)

Some statistics about the data set.

In [45]:
def batch_max(sequences: list): 
    """
    Determines the max length of the lists in the list.
    """
    return max(len(sequence) for sequence in sequences)

max_ = batch_max(x_train)
print(max_) #2514

2514


Code below adds '.start', '.end' and '.pad' to the batches. Each batch contains approximately 3,500 tokens. This is causing batches to be variable. Since the maximum number of tokens in one instance is 2514, the minimal number of tokens in a batch is 2514. 


In [46]:
(x_train, y_train), (x_val, y_val) , (i2w, w2i), numcls =  load_imdb(final = False, char = False)
MAXIMUM_TOKENS =  15000

def add_start(sequence: list): sequence.insert(0, 1)
def add_end(sequence: list): sequence.append(2)
def delete_first(sequence: list): del sequence[0]
def add_zero(sequence: list): sequence.append(0)

def add_pad(sequence: list, padding: int):  
    for _ in range(padding): sequence.append(0)

def pre_process_sequences(sequences : list, targets : list):
    batches, batch_targets = [],[]
    number_of_sequences = len(sequences)
    start = 0
    for _ in range(number_of_sequences):
        end =  start
        number_of_tokens = 0
        #determine batch size
        while (number_of_tokens + len(sequences[end-1]) < (MAXIMUM_TOKENS )):
            if end < number_of_sequences : number_of_tokens += len(sequences[end])
            else: break
            end += 1

        #make batch
        b, b_targets = sequences[start:end], targets[start:end]
        
        #add special tokens
        max_seq_length = len(b[-1])
        for sequence in b:
            add_start(sequence)
            add_end(sequence)
            padding =  max_seq_length + 2 - len(sequence)
            if padding > 0: add_pad(sequence, padding = padding )
            #if len(sequence) != max_seq_length
        b =  torch.tensor(b, dtype=torch.long)
        b_targets =  torch.tensor(b_targets, dtype=torch.long)
        

        batches.append(b)
        batch_targets.append(b_targets)

        start = end
        if end >= number_of_sequences: break

    return batches, batch_targets

In [48]:
batches, batch_targets = pre_process_sequences(x_train, y_train)
batches_val, batches_targets = pre_process_sequences(x_val, y_val)

Quick check. Seems to be in order.

In [50]:
print('Number of batches:', len(batches))
print('Number of batches (targets):', len(batch_targets))
print('')
print('Length of first batch:', len(batches[0]))
print('Length of first batch:', len(batch_targets[0]))
print('')
print('Length of last batch:', len(batches[-1]))
print('Length of last batch:', len(batch_targets[-1]))
print('')
print('Example sentence: ', ' '.join([i2w[word] for word in list(batches[0][3])]))
print('Shape of batch: ', batch_targets[-1].size())

Number of batches: 325
Number of batches (targets): 325

Length of first batch: 368
Length of first batch: 368

Length of last batch: 8
Length of last batch: 8

Example sentence:  .start long boring blasphemous never have i been so glad to see ending credits roll .end .pad .pad .pad .pad .pad .pad .pad .pad .pad .pad .pad .pad .pad .pad .pad .pad .pad .pad .pad .pad .pad .pad .pad .pad .pad .pad .pad .pad .pad .pad .pad .pad .pad .pad .pad
Shape of batch:  torch.Size([8])


Batches have now variable size (and padding). One disadvantage is that it requires some RAM to have them all loaded in. On the other side, it is possible to easily shuffle the batches during training. The function below does that.

In [55]:
from random import shuffle
def shuffle_lists(list1: list, list2: list):
  """
  Shuffels two lists in the same order.
  In our case the two lists containing torch tensors (data and targets).
  Based on: https://stackoverflow.com/questions/23289547/shuffle-two-list-at-once-with-same-order.
  This function should be called upon each beginning, or end, of each epoch.
  """
  list1, list2 =  deepcopy(list1), deepcopy(list2)
  original = list(zip(list1, list2))
  shuffle(original)
  list1, list2 = zip(*original)
  return list1, list2

Next, GPU for training.


In [51]:
if torch.cuda.is_available(): device = torch.device("cuda")
else: device = torch.device("cpu")

In [52]:
torch.version.cuda

'10.1'

Linear model



In [53]:
vocab_size = len(i2w)
class EmbedLin(nn.Module):
    def __init__(self, emb_dim = 300, vocab_size = 20, hidden_size = 300, classes = 2 ): 
        super(EmbedLin, self).__init__()
        self.emb_dim =  emb_dim
        self.vocab_size =  vocab_size
        self.hidden_size = hidden_size
        self.classes = classes

        self.emb = nn.Embedding(self.vocab_size, self.emb_dim)
        self.fc1 = nn.Linear(self.emb_dim, self.hidden_size)
        self.fc2 = nn.Linear(self.hidden_size, self.classes)

    def forward(self, x):
        x = self.emb(x)
        x = F.relu(self.fc1(x))
        x = torch.max(x, 1)[0]
        x = self.fc2(x)
        return x

Elman RNN

In [54]:
class Elman(nn.Module):
  def __init__ ( self , insize= 300 , outsize= 300 , hsize= 300 ):
    super(). __init__ ()

    self.lin1 = nn.Linear(insize + hsize, hsize)
    self.lin2 = nn.Linear(hsize, outsize)

  def forward( self , x, hidden= None):
    b, t, e = x.size()

    if hidden is None :
      hidden = torch.zeros(b, e, dtype =torch.float)

    outs = []
    x = x.to(device)
    hidden = hidden.to(device)

    for i in range (t):

      inp = torch.cat([x[:, i, :], hidden], dim = 1 )
      hidden = torch.sigmoid(self.lin1(inp))
      out = self.lin2(hidden)

      outs.append(out[:, None , :])

    return torch.cat(outs, dim = 1 ), hidden

class ElmanRNN(nn.Module):
    def __init__(self, emb_dim = 300, vocab_size = 20, 
                 hidden_size = 300, classes = 2 ): 
        super(ElmanRNN, self).__init__()
        self.emb_dim =  emb_dim
        self.vocab_size =  vocab_size
        self.hidden_size = hidden_size
        self.classes = classes

        self.emb = nn.Embedding(self.vocab_size, self.emb_dim)
        self.elman = Elman()
        self.fc2 = nn.Linear(self.hidden_size, self.classes)

    def forward(self, x, hidden = None):
        x = self.emb(x)
        x = self.elman(x, hidden)[0]
        x = F.relu(x)
        x = torch.max(x, 1)[0]
        x = self.fc2(x)
        return x



RNN, LSTM, Bi-LSTM. Not the best implementation but it does the job.


In [None]:
class RecNet(nn.Module):
    def __init__(self, emb_dim = 300, vocab_size = 20, 
                 hidden_size = 300, classes = 2, model = 'elman'): 
        super(RecNet, self).__init__()
        self.model = model
        self.emb_dim =  emb_dim
        self.vocab_size =  vocab_size
        self.hidden_size = hidden_size
        self.classes = classes

        if self.model == 'elman':
          self.emb = nn.Embedding(self.vocab_size, self.emb_dim)
          self.unit = nn.RNN(self.emb_dim, self.hidden_size, nonlinearity='relu')
          self.fc2 = nn.Linear(self.hidden_size, self.classes)
        
        if self.model == 'lstm':
          self.emb = nn.Embedding(self.vocab_size, self.emb_dim)
          self.unit = nn.LSTM(self.emb_dim, self.hidden_size)
          self.fc2 = nn.Linear(self.hidden_size, self.classes)
        
        if self.model == 'bilstm':
          self.emb = nn.Embedding(self.vocab_size, self.emb_dim)
          self.unit = nn.LSTM(self.emb_dim, self.hidden_size, bidirectional = True )
          self.fc2 = nn.Linear(self.hidden_size * 2, self.classes)

    def forward(self, x):
      if self.model == 'elman':
        x = self.emb(x)
        x = self.unit(x)[0]
        x = torch.max(x, 1)[0]
        x = self.fc2(x)

      if self.model == 'lstm':
        x = self.emb(x)
        x = F.relu(self.unit(x)[0])
        x = torch.max(x, 1)[0]
        x = self.fc2(x)

      if self.model == 'bilstm':
        x = self.emb(x)
        x = F.relu(self.unit(x)[0])
        x = torch.max(x, 1)[0]
        x = self.fc2(x)

      return x

Training loop

In [67]:
def accuracy(net_, x:list, targets: list):
    correct = 0
    total = 0
    with torch.no_grad():
      i = 0
      for values, labels in zip(x, targets):
        # print(len(values), len(labels),i)
        values = values.to(device)
        labels = labels.to(device)
        outputs = net_(values)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        i+=1
    accuracy = correct/total
    return accuracy

def calculate_loss(net_,  x :list, targets: list):
    losses = 0.0
    for values, labels in zip(x, targets):
        # get the inputs; data is a list of [inputs, labels]
        values = values.to(device)
        labels = labels.to(device)
        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net_(values)
        loss = criterion(outputs, labels)
        losses += loss
    return losses / len(x)
        
def train(net_, X_train, y_train, criterion, scheduler, 
          optimizer, X_val = None, y_val = None ,epochs = 5):
    #lists for results
    losses_train, losses_val = [],[]
    accuracy_train, accuracy_val = [],[]
    running_loss,accuracy_batch = [], []

    for epoch in range(epochs):  # loop over the dataset multiple times
        print("Epoch: ", epoch)
        epoch_loss = []
        #print accuracies
        acc= accuracy(net_, X_train, y_train)
        accuracy_train.append(acc)
        #shuffling lists (to have longer and shorter sequences random)
        X_train, y_train = shuffle_lists(X_train, y_train)

        if X_val is not None: #if not final run
            acc_val = accuracy(net_, X_val, y_val)
            accuracy_val.append(acc_val)

        for inputs, labels in zip(X_train, y_train):
            #inputs, labels = data
            inputs = inputs.to(device)
            labels = labels.to(device)
            # zero the parameter gradients
            net_.zero_grad()
            optimizer.zero_grad()

            # forward + backward + optimize
 
            outputs = net_(inputs)

            #batch accuracy
            _, predicted = torch.max(outputs.data, 1)
            total = labels.size(0)
            correct = (predicted == labels).sum().item()
            accuracy_batch.append(correct/total)

            #loss
            loss = criterion(outputs, labels)
            epoch_loss.append(loss.detach().cpu().numpy())
            loss.backward()
            

            #optimizer
            optimizer.step()

        #scheduler step
        scheduler.step()    
        running_loss.append(np.mean(epoch_loss))
        #final run has no validation set, so printing will be different
        if X_val is not None:
            print("Accuracy train: {} --- Accuracy validation: {} --- Mean running loss: {}\n ".format(round(acc,2), round(acc_val,2), running_loss[-1]))
        else: print("Accuracy train: {}\n ".format(round(acc,2)))
    
    # evaluate last epoch
    acc= accuracy(net_, X_train, y_train)
    accuracy_train.append(acc)

    if X_val is not None: #if not final run
        acc_val = accuracy(net_, X_val, y_val)
        accuracy_val.append(acc_val)
    if X_val is not None:
          print("Accuracy train: {} --- Accuracy validation: {} --- Mean running loss: {}\n ".format(round(acc,2), round(acc_val,2), running_loss[-1]))
    else: print("Accuracy train: {}\n ".format(round(acc,2)))
    print('Finished Training')

    if X_val is not None:
        return net_, accuracy_train, accuracy_val, running_loss, accuracy_batch

    else: return net_, accuracy_train, running_loss, accuracy_batch

Running the different models below


In [68]:
criterion =   nn.CrossEntropyLoss()
net = EmbedLin(vocab_size=vocab_size)
net.cuda()
optimizer = optim.Adam(net.parameters(), lr = 0.0001)
scheduler = optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.95, last_epoch=-1, verbose=False)
net, acc_train, acc_val, running_loss, accuracy_batch = train(net, batches, batch_targets, criterion, 
                                                              scheduler, optimizer, batches_val, batches_targets)

Epoch:  0
Accuracy train: 0.5 --- Accuracy validation: 0.5 --- Mean running loss: 0.6059263944625854
 
Epoch:  1
Accuracy train: 0.81 --- Accuracy validation: 0.78 --- Mean running loss: 0.44229763746261597
 
Epoch:  2
Accuracy train: 0.85 --- Accuracy validation: 0.8 --- Mean running loss: 0.36199748516082764
 
Epoch:  3
Accuracy train: 0.87 --- Accuracy validation: 0.83 --- Mean running loss: 0.31871864199638367
 
Epoch:  4
Accuracy train: 0.88 --- Accuracy validation: 0.84 --- Mean running loss: 0.29047954082489014
 
Accuracy train: 0.89 --- Accuracy validation: 0.85 --- Mean running loss: 0.29047954082489014
 
Finished Training


In [69]:
net2 = ElmanRNN(vocab_size=vocab_size)
net2.cuda()
optimizer2 = optim.Adam(net2.parameters(), lr = 0.005)
scheduler2 = optim.lr_scheduler.ExponentialLR(optimizer2, gamma=0.95, last_epoch=-1, verbose=False)
net2, acc_train2, acc_val2, running_loss2, accuracy_batch2 = train(net2, batches, batch_targets, criterion, 
                                                              scheduler2, optimizer2, batches_val, batches_targets)


Epoch:  0
Accuracy train: 0.5 --- Accuracy validation: 0.5 --- Mean running loss: 0.7024572491645813
 
Epoch:  1
Accuracy train: 0.63 --- Accuracy validation: 0.62 --- Mean running loss: 0.601559042930603
 
Epoch:  2
Accuracy train: 0.72 --- Accuracy validation: 0.7 --- Mean running loss: 0.5346124172210693
 
Epoch:  3
Accuracy train: 0.76 --- Accuracy validation: 0.72 --- Mean running loss: 0.46462148427963257
 
Epoch:  4
Accuracy train: 0.82 --- Accuracy validation: 0.76 --- Mean running loss: 0.387845516204834
 
Accuracy train: 0.84 --- Accuracy validation: 0.75 --- Mean running loss: 0.387845516204834
 
Finished Training


In [70]:
net3 = RecNet(vocab_size=vocab_size, model = 'elman')
net3.cuda()
optimizer3 = optim.Adam(net3.parameters(), lr = 0.0005)
scheduler3 = optim.lr_scheduler.ExponentialLR(optimizer3, gamma=0.999, last_epoch=-1, verbose=False)
net3, acc_train3, acc_val3, running_loss3, accuracy_batch3 = train(net3, batches, batch_targets, criterion, 
                                                              scheduler3, optimizer3, batches_val, batches_targets)

Epoch:  0
Accuracy train: 0.5 --- Accuracy validation: 0.51 --- Mean running loss: 0.45167025923728943
 
Epoch:  1
Accuracy train: 0.9 --- Accuracy validation: 0.86 --- Mean running loss: 0.2534712255001068
 
Epoch:  2
Accuracy train: 0.92 --- Accuracy validation: 0.88 --- Mean running loss: 0.1831967830657959
 
Epoch:  3
Accuracy train: 0.96 --- Accuracy validation: 0.9 --- Mean running loss: 0.11715751141309738
 
Epoch:  4
Accuracy train: 0.96 --- Accuracy validation: 0.9 --- Mean running loss: 0.07012201100587845
 
Accuracy train: 0.99 --- Accuracy validation: 0.91 --- Mean running loss: 0.07012201100587845
 
Finished Training


In [71]:
net4 = RecNet(vocab_size=vocab_size, model = 'lstm')
net4.cuda()
optimizer4 = optim.Adam(net4.parameters(), lr = 0.0001)
scheduler4 = optim.lr_scheduler.ExponentialLR(optimizer4, gamma=0.95, last_epoch=-1, verbose=False)
net4, acc_train4, acc_val4, running_loss4, accuracy_batch4 = train(net4, batches, batch_targets, criterion, 
                                                              scheduler4, optimizer4, batches_val, batches_targets)

Epoch:  0
Accuracy train: 0.5 --- Accuracy validation: 0.5 --- Mean running loss: 0.6089031100273132
 
Epoch:  1
Accuracy train: 0.79 --- Accuracy validation: 0.71 --- Mean running loss: 0.5050760507583618
 
Epoch:  2
Accuracy train: 0.83 --- Accuracy validation: 0.73 --- Mean running loss: 0.44978103041648865
 
Epoch:  3
Accuracy train: 0.84 --- Accuracy validation: 0.78 --- Mean running loss: 0.40538644790649414
 
Epoch:  4
Accuracy train: 0.87 --- Accuracy validation: 0.75 --- Mean running loss: 0.36929625272750854
 
Accuracy train: 0.89 --- Accuracy validation: 0.81 --- Mean running loss: 0.36929625272750854
 
Finished Training


In [72]:
net5 = RecNet(vocab_size=vocab_size, model = 'bilstm')
net5.cuda()
optimizer5 = optim.Adam(net5.parameters(), lr = 0.0001)
scheduler5 = optim.lr_scheduler.ExponentialLR(optimizer5, gamma=0.95, last_epoch=-1, verbose=False)
net5, acc_train5, acc_val5, running_loss5, accuracy_batch5 = train(net5, batches, batch_targets, criterion, 
                                                              scheduler5, optimizer5, batches_val, batches_targets)

Epoch:  0
Accuracy train: 0.5 --- Accuracy validation: 0.5 --- Mean running loss: 0.587306022644043
 
Epoch:  1
Accuracy train: 0.85 --- Accuracy validation: 0.86 --- Mean running loss: 0.43278372287750244
 
Epoch:  2
Accuracy train: 0.88 --- Accuracy validation: 0.86 --- Mean running loss: 0.3489980399608612
 
Epoch:  3
Accuracy train: 0.91 --- Accuracy validation: 0.87 --- Mean running loss: 0.289218008518219
 
Epoch:  4
Accuracy train: 0.91 --- Accuracy validation: 0.89 --- Mean running loss: 0.24656076729297638
 
Accuracy train: 0.94 --- Accuracy validation: 0.86 --- Mean running loss: 0.24656076729297638
 
Finished Training


All models got good scores. Next, I checked the number of parameters per model.

In [73]:
print("Model 1: ",[p.numel() for p in net.parameters() if p.requires_grad])
print("Model 2: ",[p.numel() for p in net2.parameters() if p.requires_grad])
print("Model 3: ", [p.numel() for p in net3.parameters() if p.requires_grad])
print("Model 4: ", [p.numel() for p in net4.parameters() if p.requires_grad])
print("Model 5: ", [p.numel() for p in net4.parameters() if p.requires_grad])

Model 1:  [29829000, 90000, 300, 600, 2]
Model 2:  [29829000, 180000, 300, 90000, 300, 600, 2]
Model 3:  [29829000, 90000, 90000, 300, 300, 600, 2]
Model 4:  [29829000, 360000, 360000, 1200, 1200, 600, 2]


Graphs the running loss (batches) and epoch accuracy.

In [76]:
#graph
# plotting different learning curves
x = list(np.arange(0,6))
x_loss = list(np.arange(1, 6))
fig = make_subplots(1,3, horizontal_spacing=0.12, subplot_titles= ("Validation Accuracy", "Training Accuracy", "Mean running Loss"))
#validation
fig.add_trace(go.Scatter(x = x ,
                         y = acc_val,
                         name = "Linear",
                        connectgaps = True, 
                        line_color = 'olive'),
                        row=1, col=1)
fig.add_trace(go.Scatter(x = x ,
                         y = acc_val2,
                         name = "Elman (implemented)",
                        connectgaps = True,
                        line_color = 'red'),
                        row=1, col=1)
fig.add_trace(go.Scatter(x = x ,
                         y = acc_val3,
                         name = "Elman (PyTorch)",
                        connectgaps = True,
                        line_color = 'green'),
                        row=1, col=1)
fig.add_trace(go.Scatter(x = x ,
                         y = acc_val4,
                         name = "LSTM",
                        connectgaps = True,
                        line_color = 'blue'),
                        row=1, col=1)

fig.add_trace(go.Scatter(x = x ,
                         y = acc_val5,
                         name = "Bi-LSTM",
                        connectgaps = True,
                        line_color = 'black'),
                        row=1, col=1)
#training
fig.add_trace(go.Scatter(x = x ,
                         y = acc_train,
                         name = "Linear",
                        connectgaps = True,
                        showlegend=False,
                        line_color = 'olive'),
                        row=1, col=2)
fig.add_trace(go.Scatter(x = x ,
                         y = acc_train2,
                         name = "Elman (implemented)",
                        connectgaps = True,
                        showlegend=False,
                        line_color = 'red'),
                        row=1, col=2)
fig.add_trace(go.Scatter(x = x ,
                         y = acc_train3,
                           name = "Elman (PyTorch)",
                        connectgaps = True,
                        showlegend=False,
                        line_color = 'green'),
                        row=1, col=2)
fig.add_trace(go.Scatter(x = x ,
                         y = acc_train4,
                        name = "LSTM",
                        connectgaps = True,
                        showlegend=False,
                        line_color = 'blue'),
                        row=1, col=2)
fig.add_trace(go.Scatter(x = x ,
                         y = acc_train5,
                        name = "Bi-LSTM",
                        connectgaps = True,
                        showlegend=False,
                        line_color = 'black'),
                        row=1, col=2)
#batch
fig.add_trace(go.Scatter(x = x_loss ,
                         y = running_loss,
                        name = "Linear",
                        connectgaps = True,
                         showlegend=False,
                        line_color = 'olive'),
                        row=1, col=3)
fig.add_trace(go.Scatter(x = x_loss ,
                         y = running_loss2,
                         name = "Elman (implemented)",
                        connectgaps = True,
                        showlegend=False,
                        line_color = 'red'),
                        row=1, col=3)
fig.add_trace(go.Scatter(x = x_loss ,
                         y = running_loss3,
                         name = "Elman (PyTorch)",
                        connectgaps = True,
                        showlegend=False,
                        line_color = 'green'),
                        row=1, col=3)
fig.add_trace(go.Scatter(x = x_loss ,
                         y = running_loss4,
                         name = "LSTM",
                        connectgaps = True,
                        showlegend=False,
                        line_color = 'blue'),
                        row=1, col=3)
fig.add_trace(go.Scatter(x = x_loss ,
                         y = running_loss5,
                         name = "Bi-LSTM",
                        connectgaps = True,
                        showlegend=False,
                        line_color = 'black'),
                        row=1, col=3)
fig.update_xaxes(title_text = "Epoch", row = 1, col = 1)
fig.update_yaxes(title_text = "Accuracy", range=[0.0, 1.0], row =1, col =1)

fig.update_xaxes(title_text = "Epoch",row = 1, col = 2)
fig.update_yaxes(title_text = "Accuracy", range=[0.0, 1.0],row =1, col =2)

fig.update_xaxes(title_text = "Epoch" ,row = 1, col = 3)
fig.update_yaxes(title_text = "Mean Running Loss",row =1, col =3)


fig.update_xaxes(nticks = 6,row = 1, col = 3)
fig.update_xaxes(nticks = 6,row = 1, col = 2)
fig.update_xaxes(nticks = 6,row = 1, col = 1)
fig.update_layout(
    title = "The validation/training accuracy and mean running loss on the IMDB data",
    title_x=0.5,
    height=450, width=1200,
    font=dict(
        family="Courier New, monospace",
        size=12,
        color="black"
    ),
    legend=dict(
    orientation="h",
    yanchor="bottom",
    y=-0.45,
    xanchor="center",
    x=0.5
)
)


Next, the final runs. It can be seen above that the models overfit after the third epoch, therefore the final runs will be done with 2 epochs


In [35]:
(x_train_full, y_train_full), (x_test, y_test) , (i2w, w2i), numcls =  load_imdb(final = True, char = False)
batches_full, batch_targets_full = pre_process_sequences(x_train_full, y_train_full)
batches_test, batches_targets_test = pre_process_sequences(x_test, y_test) # due to time i have not made a function that makes just pads to the same length

The full model is trained.

In [65]:
net_final_elman = RecNet(vocab_size=vocab_size, model = 'elman')
net_final_elman.cuda()
optimizer_final_elman = optim.Adam(net_final_elman.parameters(), lr = 0.0005)
scheduler_final_elman = optim.lr_scheduler.ExponentialLR(optimizer_final_elman, gamma=0.999, last_epoch=-1, verbose=False)
net_final_elman, acc_train_final_elman, running_loss_final_elman, accuracy_batch_final_elman = train(net_final_elman, batches_full, batch_targets_full, criterion, 
                                                              scheduler_final_elman, optimizer_final_elman, epochs=2)

Epoch:  0
Accuracy train: 0.5
 
Epoch:  1
Accuracy train: 0.86
 
Accuracy train: 0.94
 
Finished Training


In [57]:
net_final_lstm = RecNet(vocab_size=vocab_size, model = 'lstm')
net_final_lstm.cuda()
optimizer_final_lstm  = optim.Adam(net_final_lstm .parameters(), lr = 0.0001)
scheduler_final_lstm  = optim.lr_scheduler.ExponentialLR(optimizer_final_lstm , gamma=0.95, last_epoch=-1, verbose=False)
net_final_lstm , acc_train_final_lstm ,  running_loss_final_lstm , accuracy_batch_final_lstm  = train(net_final_lstm ,  batches_full, batch_targets_full,criterion, 
                                                              scheduler_final_lstm , optimizer_final_lstm  , epochs=2)

Epoch:  0
Accuracy train: 0.5
 
Epoch:  1
Accuracy train: 0.8
 
Accuracy train: 0.83
 
Finished Training


In [61]:
net_final_bilstm = RecNet(vocab_size=vocab_size, model = 'bilstm')
net_final_bilstm.cuda()
optimizer_final_bilstm  = optim.Adam(net_final_bilstm .parameters(), lr = 0.0001)
scheduler_final_bilstm  = optim.lr_scheduler.ExponentialLR(optimizer_final_bilstm , gamma=0.95, last_epoch=-1, verbose=False)
net_final_bilstm , acc_train_final_bilstm , running_loss_final_bilstm , accuracy_batch_final_bilstm  = train(net_final_bilstm , batches_full, batch_targets_full, criterion, 
                                                              scheduler_final_bilstm , optimizer_final_bilstm  , epochs=2)

Epoch:  0
Accuracy train: 0.5
 
Epoch:  1
Accuracy train: 0.83
 
Accuracy train: 0.87
 
Finished Training


In [62]:
net_final_linear = EmbedLin(vocab_size=vocab_size)
net_final_linear.cuda()
optimizer_final_linear = optim.Adam(net_final_linear.parameters(), lr = 0.0001)
scheduler_final_linear = optim.lr_scheduler.ExponentialLR(optimizer_final_linear, gamma=0.95, last_epoch=-1, verbose=False)
net_final_linear, acc_train_final_linear,  running_loss_final_linear, accuracy_batch_final_linear = train(net_final_linear, batches_full, batch_targets_full, criterion, 
                                                              scheduler_final_linear, optimizer_final_linear, epochs=2)

Epoch:  0
Accuracy train: 0.5
 
Epoch:  1
Accuracy train: 0.82
 
Accuracy train: 0.85
 
Finished Training


In [77]:
print("Linear")
print("The training accuracy: ", accuracy(net_final_linear, batches_full, batch_targets_full))
print("The test accuracy: ", accuracy(net_final_linear, batches_test, batches_targets_test))
print("")
print("Elman")
print("The training accuracy: ", accuracy(net_final_elman, batches_full, batch_targets_full))
print("The test accuracy: ", accuracy(net_final_elman, batches_test, batches_targets_test))
print("")
print("LSTM")
print("The training accuracy: ", accuracy(net_final_lstm, batches_full, batch_targets_full))
print("The test accuracy: ", accuracy(net_final_lstm, batches_test, batches_targets_test))
print("")
print("Bi-LSTM")
print("The training accuracy: ", accuracy(net_final_bilstm, batches_full, batch_targets_full))
print("The test accuracy: ", accuracy(net_final_bilstm, batches_test, batches_targets_test))

Linear
The training accuracy:  0.8526
The test accuracy:  0.84356

Elman
The training accuracy:  0.9378
The test accuracy:  0.90908

LSTM
The training accuracy:  0.827
The test accuracy:  0.82108

Bi-LSTM
The training accuracy:  0.87372
The test accuracy:  0.86852
