In [37]:
#!pip install tensorflow==2.8.0

# install gpu 
#!pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118

# install cpu version
!pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu

Defaulting to user installation because normal site-packages is not writeable
Looking in indexes: https://download.pytorch.org/whl/cpu


In [38]:
%matplotlib notebook
import matplotlib.pyplot as plt
import numpy as np
from time import sleep
import random
import sys
import torch
import torch.autograd as autograd
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
random.seed( 10 ) # set the random seed (for reproducibility)

In [39]:
def getSample(seqL,k,testFlag=False):
    #returns a random sequence of integers of length = seqL
    kthInt=0
    x =  torch.zeros(seqL,20)
    for i in range(0,seqL):
        randomIntegerNumber = random.randint(0,9)
        if i==k-1:
            kthInt=randomIntegerNumber
        if testFlag:
            sys.stdout.write(str(randomIntegerNumber) + ' ')
        x[i,randomIntegerNumber-1] = 1

    if testFlag:
            sys.stdout.write('--> ' + str(kthInt) + '\n')
    x=x.unsqueeze(1) #extra dimension for Batch
    y=torch.tensor([kthInt]) #target is the number at kth position in the sequence 

    return x,y

In [40]:
class Memorize (nn.Module):
    def __init__(self,stateDim):
        super(Memorize, self).__init__()
        self.stateDim = stateDim
        self.inputDim = 20  # integer is represented as 1 hot vector of dimension=10
        self.outputDim = 20  # 10 nodes for 10 classes
        # currently the model uses the 'LSTM' cell. You could try
        # others like: tanh, GRU. See: https://github.com/pytorch/examples/blob/master/word_language_model/model.py#L11
        self.lstm = nn.LSTM(self.inputDim, self.stateDim )
        self.outputLayer = nn.Linear(self.stateDim, self.outputDim)
        self.softmax = nn.Softmax()
        

    def forward(self, x):
        """
        X: [L,B,inputDim(=10)] dimensional input tensor
            L: Sequence length
            B: is the "batch" dimension. As we are training on 
               single examples, B = 1 for us.
        """
        lstmOut,_ = self.lstm(x)
        L,B,D  = lstmOut.size(0),lstmOut.size(1),lstmOut.size(2) # L is seq len, B is batch size and D is feature dimension
        #lstmOut holds the outputs at all timesteps but we require  only the output at last time step (L-1)
        lstmOut_lastTimeStep = lstmOut[L-1,0,:]
        #print (lstmOut_lastTimeStep.size())
        
        #lstmOut = lstmOut.view(L*B,D)
        outputLayerActivations = self.outputLayer(lstmOut_lastTimeStep)
        #outputSoftMax=self.softmax(outputLayerActivations)
        # project lstm states to "output"
        
    
        return outputLayerActivations.unsqueeze(0)

In [None]:
# set here the size of the RNN state:
stateSize = 40
# set here the size of the binary strings to be used for training:
k=2 # we want the RNN to remember the number at 2nd position
minSeqLength = 6
maxSeqLength = 16

## sequenceLengths would be in range in range minSeqLength - maxSeqLength


# create the model:
model = Memorize(stateSize)
print ('Model initialized')

# create the loss-function:
lossFunction = nn.CrossEntropyLoss() # or nn.CrossEntropyLoss() -- see question #2 below

# uncomment below to change the optimizers:
#optimizer = optim.SGD(model.parameters(), lr=3e-2, momentum=0.8)
optimizer = optim.Adam(model.parameters(),lr=0.01)
iterations = 1000
min_epochs = 20
num_epochs,totalLoss = 0,float("inf")
lossList = []
while num_epochs < min_epochs:
    print("[epoch %d/%d] Avg. Loss for last 500 samples = %lf"%(num_epochs+1,min_epochs,totalLoss))
    num_epochs += 1
    totalLoss = 0
    for i in range(0,iterations):
        # get a new random training sample:
        sequenceLength = random.randint(minSeqLength,maxSeqLength)
        x,y = getSample(sequenceLength,k)
 
        model.zero_grad()

        pred = model(x)

        # compute the loss:
        loss = lossFunction(pred,y)
        totalLoss += loss.data
        optimizer.zero_grad()
        # perform the backward pass:
        loss.backward()
        # update the weights:
        optimizer.step()
    totalLoss=totalLoss/iterations
    lossList.append(int(totalLoss))
print('Training finished!')
epochs =  np.arange(1,21)
# plot the loss over epcohs:
plt.plot(epochs,lossList)
plt.xlabel('epochs'); plt.ylabel('loss'); plt.xticks(epochs,epochs)
plt.ylim([0,5]); 

Model initialized
[epoch 1/20] Avg. Loss for last 500 samples = inf
[epoch 2/20] Avg. Loss for last 500 samples = 2.362351
[epoch 3/20] Avg. Loss for last 500 samples = 2.327593
[epoch 4/20] Avg. Loss for last 500 samples = 2.312582
[epoch 5/20] Avg. Loss for last 500 samples = 2.318265
[epoch 6/20] Avg. Loss for last 500 samples = 2.308613
[epoch 7/20] Avg. Loss for last 500 samples = 2.317080
[epoch 8/20] Avg. Loss for last 500 samples = 2.291474
[epoch 9/20] Avg. Loss for last 500 samples = 2.257941
[epoch 10/20] Avg. Loss for last 500 samples = 2.237626
[epoch 11/20] Avg. Loss for last 500 samples = 2.240568
[epoch 12/20] Avg. Loss for last 500 samples = 2.156755


In [None]:
testSeqL = 6
x,y = getSample(testSeqL,k,testFlag=True)

pred = model(x)
ind=  torch.argmax(pred)
print ( 'number at kth position is ',int(ind))