In [1]:
# Imports
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import os
import numpy as np

import torchvision
import torchvision.transforms as transforms

import matplotlib.pyplot as plt
%matplotlib inline
import sklearn.metrics
import seaborn as sns
import random

# To display youtube videos
from IPython.display import YouTubeVideo


def set_seed(seed = 1234):
    '''Sets the seed of the entire notebook so results are the same every time we run.
    This is for REPRODUCIBILITY.'''
    np.random.seed(seed)
    random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    # When running on the CuDNN backend, two further options must be set
    torch.backends.cudnn.deterministic = True
    # Set a fixed value for the hash seed
    os.environ['PYTHONHASHSEED'] = str(seed)
    
set_seed()

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Device available now:', device)

Device available now: cpu


In [12]:
# The Neural Network
class RNNVanilla(nn.Module):
    # __init__: the function where we create the architecture
    def __init__(self, n_inputs, n_neurons):
        super(RNNVanilla, self).__init__()
        
        # Weights are random at first
        # U contains connection weights for the inputs of the current time step
        self.U = torch.randn(n_inputs, n_neurons) # for 1 neuron: size = 4 rows and 1 column
        
        # W contains connection weights for the outputs of the previous time step
        self.W = torch.randn(n_neurons, n_neurons) # for 1 neuron: size = 1 row and 1 column
        
        # The bias
        self.b = torch.zeros(1, n_neurons) # for 1 neuron: size = 1 row and 1 column
    
    # forward: function where we apply the architecture to the input
    def forward(self, input0, input1):
        # Computes two outputs, one for each time step (two overall).
        self.output0 = torch.tanh(torch.mm(input0, self.U) + self.b)
        
        self.output1 = torch.tanh(torch.mm(self.output0, self.W) + torch.mm(input1, self.U) + self.b)
        
        return self.output0, self.output1

In [13]:
n_inputs=3
n_neurons=5

input0 = torch.tensor([[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, 0, 1]], dtype = torch.float)
print('input time_0 shape:', input0.shape)

input1 = torch.tensor([[9, 8, 7], [0, 0, 0], [6, 5, 4], [3, 2, 1]], dtype = torch.float)
print('input time_1 shape:', input1.shape)

input time_0 shape: torch.Size([4, 3])
input time_1 shape: torch.Size([4, 3])


In [14]:
# Creating the model
rnn_n_neurons = RNNVanilla(n_inputs, n_neurons)

# Checking the output
output0, output1 = rnn_n_neurons(input0, input1)
print('output0:', output0, '\n')
print('output1:', output1)

output0: tensor([[ 0.7501, -0.9918, -0.5887, -0.6949,  0.8775],
        [ 0.7263, -1.0000, -0.9999, -0.9998,  0.8766],
        [ 0.7006, -1.0000, -1.0000, -1.0000,  0.8757],
        [-0.9999, -1.0000, -0.8574, -1.0000, -0.9348]]) 

output1: tensor([[-0.9755, -1.0000, -1.0000, -1.0000, -0.8716],
        [-0.9380,  0.8805,  0.7666, -0.0397, -0.1628],
        [-0.9924, -1.0000, -1.0000, -1.0000, -0.9083],
        [-0.9998, -1.0000, -1.0000,  0.8486,  0.4386]])


In [15]:
# Parameters
print('U:', rnn_n_neurons.U)
print('W:', rnn_n_neurons.W)
print('bias:', rnn_n_neurons.b)

U: tensor([[-0.5909, -1.1263, -0.2108, -0.8226, -0.3061],
        [ 0.1735,  1.5392, -1.9031,  0.0428, -0.7550],
        [ 0.3999, -2.1429,  0.6137, -0.4501,  1.0599]])
W: tensor([[-0.5510,  2.6617,  2.1479, -0.8555, -0.7208],
        [ 1.3755,  0.9988,  0.1549, -0.9053, -1.1421],
        [ 2.0506, -0.1523, -0.7381, -0.7189,  0.7937],
        [-0.6129,  0.2229,  0.5868, -0.2878, -0.3234],
        [ 1.7023,  0.5865, -0.6212, -1.5174, -0.3566]])
bias: tensor([[0., 0., 0., 0., 0.]])


## MNIST Classification

In [17]:
trans=transforms.Compose([transforms.ToTensor()])

mnist_train=torchvision.datasets.MNIST('data',train=True,download=True,transform=trans)
mnist_test=torchvision.datasets.MNIST('data',train=False,download=True,transform=trans)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to data/MNIST/raw/train-images-idx3-ubyte.gz


  0%|          | 0/9912422 [00:00<?, ?it/s]

Extracting data/MNIST/raw/train-images-idx3-ubyte.gz to data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to data/MNIST/raw/train-labels-idx1-ubyte.gz


  0%|          | 0/28881 [00:00<?, ?it/s]

Extracting data/MNIST/raw/train-labels-idx1-ubyte.gz to data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to data/MNIST/raw/t10k-images-idx3-ubyte.gz


  0%|          | 0/1648877 [00:00<?, ?it/s]

Extracting data/MNIST/raw/t10k-images-idx3-ubyte.gz to data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to data/MNIST/raw/t10k-labels-idx1-ubyte.gz


  0%|          | 0/4542 [00:00<?, ?it/s]

Extracting data/MNIST/raw/t10k-labels-idx1-ubyte.gz to data/MNIST/raw



In [28]:
class vanillaRNN_MNIST(nn.Module):
    
    def __init__(self,batch_size,input_size,hidden_size,output_size):
        super(vanillaRNN_MNIST,self).__init__()
        
        self.batch_size=batch_size
        self.input_size=input_size
        self.hidden_size=hidden_size
        self.output_size=output_size
        
        self.rnn_layer=nn.RNN(input_size,hidden_size)
        self.layer=nn.Linear(hidden_size,self.output_size)
        
    def forward(self,images,prints=False):
        if prints:
            print('original image shape',images.shape)
        images=images.permute(1,0,2)
        if prints:
            print('Permutes images shape',images.shape)
        
        hidden_state=torch.zeros(1,self.batch_size,self.hidden_size)
        if prints:
            print('intial hidden state shape',hidden_state.shape)
            
        hidden_outputs,hidden_state=self.rnn_layer(images,hidden_state)
        
        out=self.layer(hidden_state)
        if prints:
            print('----hidden_outputs shape:', hidden_outputs.shape, '\n' +
                  '----final hidden state:', hidden_state.shape, '\n' +
                  '----out shape:', out.shape)
            
        out=out.view(-1,self.output_size)
        if prints:
            print('Out final shape')

        return out
    
        
        
        
        
        

In [29]:
batch_size = 64        # how many images to be trained in one iteration
input_size = 28        # image 28 by 28
hidden_size = 150      # can be changed to any number: neurons
output_size = 10       # 10 different digits
# =================


In [30]:
train_loader = torch.utils.data.DataLoader(mnist_train, batch_size=64)

# Select one full batch from the data
images_example, labels_example = next(iter(train_loader))
print('original images shape:', images_example.shape)

# Reshape
images_example = images_example.view(-1, 28, 28)
print('changed images shape:', images_example.shape)
print('labels shape:', labels_example.shape, '\n')

# Creating the model
model_example = vanillaRNN_MNIST(batch_size, input_size, hidden_size, output_size)


out = model_example(images_example, prints=True)

original images shape: torch.Size([64, 1, 28, 28])
changed images shape: torch.Size([64, 28, 28])
labels shape: torch.Size([64]) 

original image shape torch.Size([64, 28, 28])
Permutes images shape torch.Size([28, 64, 28])
intial hidden state shape torch.Size([1, 64, 150])
----hidden_outputs shape: torch.Size([28, 64, 150]) 
----final hidden state: torch.Size([1, 64, 150]) 
----out shape: torch.Size([1, 64, 10])
Out final shape


In [31]:
# Understand Model Parameters
print('Len parameters:', len(list(model_example.parameters())), '\n' +
      'Parameters 0 - U:', list(model_example.parameters())[0].shape, '\n' +
      'Parameters 1 - W:', list(model_example.parameters())[1].shape, '\n' +
      'Parameters 2 - Bias:', list(model_example.parameters())[2].shape, '\n' +
      'Parameters 3 - Bias:', list(model_example.parameters())[3].shape, '\n' +
      'Parameters 4 - FNN weights:', list(model_example.parameters())[4].shape, '\n' +
      'Parameters 5 - Predictions:', list(model_example.parameters())[5].shape)

Len parameters: 6 
Parameters 0 - U: torch.Size([150, 28]) 
Parameters 1 - W: torch.Size([150, 150]) 
Parameters 2 - Bias: torch.Size([150]) 
Parameters 3 - Bias: torch.Size([150]) 
Parameters 4 - FNN weights: torch.Size([10, 150]) 
Parameters 5 - Predictions: torch.Size([10])


In [32]:
def get_accuracy(out,actual_labels,batch_size):
    
    predictions=out.max(dim=1)[1]
    correct=(predictions==actual_labels).sum().item()
    accuracy=correct/batch_size
    
    return accuracy

In [51]:
from tqdm import tqdm
def train_network(model,train_data,test_data,batch_size=64,num_epochs=1,learning_rate=0.001):
    
    print('Data_ready:')
    
    train_loader=torch.utils.data.DataLoader(dataset=train_data,
                                            batch_size=batch_size,
                                            shuffle=True,
                                            drop_last=True)
    test_loader=torch.utils.data.DataLoader(dataset=test_data,
                                           batch_size=batch_size,
                                           shuffle=False,
                                           drop_last=True)
    
    criterion=nn.CrossEntropyLoss()
    optimizer=optim.Adam(model.parameters(),lr=learning_rate)
    
    print('Start_training')
    
    for epoch in range(num_epochs):
        train_loss=0
        train_acc=0
        
        model.train()
        
        for k ,(images,labels) in tqdm(enumerate(train_loader),total=int(len(train_data)/batch_size)):
            images=images.view(-1,28,28)
            
            out= model(images)
            optimizer.zero_grad()
            
            loss=criterion(out,labels)
            
            loss.backward()
            
            optimizer.step()
            
            train_loss+=loss.item()
            
            train_acc+=get_accuracy(out,labels,batch_size)
        print('TRAIN | Epoch: {}/{} | Loss: {:.2f} | Accuracy: {:.2f}'.format(epoch+1, num_epochs, train_loss/k, train_acc/k))
    
        print('Testing the model :')
        test_acc=0
        model.eval()
        for k ,(images,labels) in tqdm(enumerate(test_loader),total=int(len(test_data)/batch_size)):
            
            images=images.view(-1,28,28)
            
            out=model(images)
            test_acc+=get_accuracy(out,labels,batch_size)
        print('TEST | Average Accuracy per {} Loaders: {:.5f}'.format(k, test_acc/k) )

In [52]:
# ==== STATICS ====
batch_size=64
input_size=28
hidden_size=150
output_size=10

# Instantiate the model
vanilla_rnn = vanillaRNN_MNIST(batch_size, input_size, hidden_size, output_size)

# ==== TRAIN ====
train_network(vanilla_rnn, mnist_train, mnist_test, num_epochs=10)

Data_ready:
Start_training


100%|██████████| 937/937 [00:19<00:00, 48.63it/s]


TRAIN | Epoch: 1/10 | Loss: 0.74 | Accuracy: 0.76
Testing the model :


100%|██████████| 156/156 [00:01<00:00, 82.55it/s]


TEST | Average Accuracy per 155 Loaders: 0.84244


100%|██████████| 937/937 [00:18<00:00, 49.88it/s]


TRAIN | Epoch: 2/10 | Loss: 0.30 | Accuracy: 0.91
Testing the model :


100%|██████████| 156/156 [00:02<00:00, 75.59it/s]


TEST | Average Accuracy per 155 Loaders: 0.94395


100%|██████████| 937/937 [00:19<00:00, 48.60it/s]


TRAIN | Epoch: 3/10 | Loss: 0.22 | Accuracy: 0.94
Testing the model :


100%|██████████| 156/156 [00:01<00:00, 82.37it/s]


TEST | Average Accuracy per 155 Loaders: 0.95232


100%|██████████| 937/937 [00:19<00:00, 47.91it/s]


TRAIN | Epoch: 4/10 | Loss: 0.18 | Accuracy: 0.95
Testing the model :


100%|██████████| 156/156 [00:01<00:00, 81.15it/s]


TEST | Average Accuracy per 155 Loaders: 0.96079


100%|██████████| 937/937 [00:19<00:00, 49.14it/s]


TRAIN | Epoch: 5/10 | Loss: 0.16 | Accuracy: 0.95
Testing the model :


100%|██████████| 156/156 [00:01<00:00, 81.20it/s]


TEST | Average Accuracy per 155 Loaders: 0.95645


100%|██████████| 937/937 [00:18<00:00, 49.56it/s]


TRAIN | Epoch: 6/10 | Loss: 0.15 | Accuracy: 0.96
Testing the model :


100%|██████████| 156/156 [00:01<00:00, 81.09it/s]


TEST | Average Accuracy per 155 Loaders: 0.96321


100%|██████████| 937/937 [00:19<00:00, 48.92it/s]


TRAIN | Epoch: 7/10 | Loss: 0.14 | Accuracy: 0.96
Testing the model :


100%|██████████| 156/156 [00:01<00:00, 82.26it/s]


TEST | Average Accuracy per 155 Loaders: 0.97036


100%|██████████| 937/937 [00:19<00:00, 48.19it/s]


TRAIN | Epoch: 8/10 | Loss: 0.13 | Accuracy: 0.96
Testing the model :


100%|██████████| 156/156 [00:01<00:00, 81.10it/s]


TEST | Average Accuracy per 155 Loaders: 0.97681


100%|██████████| 937/937 [00:18<00:00, 49.38it/s]


TRAIN | Epoch: 9/10 | Loss: 0.12 | Accuracy: 0.97
Testing the model :


100%|██████████| 156/156 [00:01<00:00, 85.82it/s]


TEST | Average Accuracy per 155 Loaders: 0.97067


100%|██████████| 937/937 [00:19<00:00, 48.53it/s]


TRAIN | Epoch: 10/10 | Loss: 0.11 | Accuracy: 0.97
Testing the model :


100%|██████████| 156/156 [00:01<00:00, 81.58it/s]

TEST | Average Accuracy per 155 Loaders: 0.97409





In [61]:
class MultilayerRNN_MNIST(nn.Module):
    
    def __init__(self,input_size,
                 hidden_size,
                 layer_size,
                 output_size,
                 relu=True):
        
        super(MultilayerRNN_MNIST,self).__init__()
        
        self.input_size=input_size
        self.hidden_size=hidden_size
        self.layer_size=layer_size
        self.output_size=output_size
        
        
        if relu:
            self.rnn=nn.RNN(input_size,
                            hidden_size,
                            layer_size,
                            batch_first=True,
                           nonlinearity='relu')
        else:
            self.rnn=nn.RNN(input_size,
                            hidden_size,
                            layer_size,
                            batch_first=True,
                           nonlinearity='tanh')
            
        self.fnn=nn.Linear(hidden_size,output_size)
        
    def forward(self,
               images,
               prints=False):
        
        if prints:
            print('images shapes',images.shape)
            
        hidden_state=torch.zeros(self.layer_size,images.size(0),self.hidden_size)
        
        if prints:
            print('Hidden state shape:',hidden_state.shape)
        output,last_hidden_state=self.rnn(images,hidden_state.detach())
        if prints:
            print('RNN Output shape:', output.shape, '\n' +
                         'RNN last_hidden_state shape', last_hidden_state.shape)
        
        output=self.fnn(output[:,-1,:])
        if prints: print('FNN Output shape:', output.shape)
        
        return output
        
        
            
            
            
        
        

In [62]:
batch_size = 64
input_size = 28
hidden_size = 100      # neurons
layer_size = 2         # layers
output_size = 10

In [63]:
train_loader_example = torch.utils.data.DataLoader(mnist_train, batch_size=64)

# Taking a single batch of the images
images, labels = next(iter(train_loader_example))
print('original images shape:', images.shape)

# Remove channel from shape
images = images.reshape(-1, 28, 28)
print('reshaped images shape:', images.shape, '\n')

# Create model instance
multilayer_rnn_example = MultilayerRNN_MNIST(input_size, hidden_size, layer_size, output_size, relu=False)
print(multilayer_rnn_example)


# Making log predictions:
out = multilayer_rnn_example(images, prints=True)

original images shape: torch.Size([64, 1, 28, 28])
reshaped images shape: torch.Size([64, 28, 28]) 

MultilayerRNN_MNIST(
  (rnn): RNN(28, 100, num_layers=2, batch_first=True)
  (fnn): Linear(in_features=100, out_features=10, bias=True)
)
images shapes torch.Size([64, 28, 28])
Hidden state shape: torch.Size([2, 64, 100])
RNN Output shape: torch.Size([64, 28, 100]) 
RNN last_hidden_state shape torch.Size([2, 64, 100])
FNN Output shape: torch.Size([64, 10])


In [64]:
batch_size = 64
input_size = 28
hidden_size = 100  
layer_size = 2         
output_size = 10

# Instantiate the model
# We'll use TANH as our activation function
multilayer_rnn = MultilayerRNN_MNIST(input_size, hidden_size, layer_size, output_size, relu=False)

# ==== TRAIN ====
train_network(multilayer_rnn, mnist_train, mnist_test, num_epochs=10)

Data_ready:
Start_training


100%|██████████| 937/937 [00:24<00:00, 37.75it/s]


TRAIN | Epoch: 1/10 | Loss: 0.66 | Accuracy: 0.78
Testing the model :


100%|██████████| 156/156 [00:02<00:00, 70.92it/s]


TEST | Average Accuracy per 155 Loaders: 0.90605


100%|██████████| 937/937 [00:24<00:00, 37.81it/s]


TRAIN | Epoch: 2/10 | Loss: 0.29 | Accuracy: 0.91
Testing the model :


100%|██████████| 156/156 [00:02<00:00, 70.71it/s]


TEST | Average Accuracy per 155 Loaders: 0.93831


100%|██████████| 937/937 [00:24<00:00, 37.79it/s]


TRAIN | Epoch: 3/10 | Loss: 0.20 | Accuracy: 0.94
Testing the model :


100%|██████████| 156/156 [00:02<00:00, 69.89it/s]


TEST | Average Accuracy per 155 Loaders: 0.95958


100%|██████████| 937/937 [00:24<00:00, 38.03it/s]


TRAIN | Epoch: 4/10 | Loss: 0.17 | Accuracy: 0.95
Testing the model :


100%|██████████| 156/156 [00:02<00:00, 59.62it/s]


TEST | Average Accuracy per 155 Loaders: 0.95393


100%|██████████| 937/937 [00:24<00:00, 38.36it/s]


TRAIN | Epoch: 5/10 | Loss: 0.15 | Accuracy: 0.96
Testing the model :


100%|██████████| 156/156 [00:02<00:00, 69.26it/s]


TEST | Average Accuracy per 155 Loaders: 0.96452


100%|██████████| 937/937 [00:24<00:00, 37.92it/s]


TRAIN | Epoch: 6/10 | Loss: 0.14 | Accuracy: 0.96
Testing the model :


100%|██████████| 156/156 [00:02<00:00, 70.90it/s]


TEST | Average Accuracy per 155 Loaders: 0.95312


100%|██████████| 937/937 [00:24<00:00, 38.08it/s]


TRAIN | Epoch: 7/10 | Loss: 0.13 | Accuracy: 0.96
Testing the model :


100%|██████████| 156/156 [00:02<00:00, 70.07it/s]


TEST | Average Accuracy per 155 Loaders: 0.96774


100%|██████████| 937/937 [00:24<00:00, 37.83it/s]


TRAIN | Epoch: 8/10 | Loss: 0.12 | Accuracy: 0.97
Testing the model :


100%|██████████| 156/156 [00:02<00:00, 66.80it/s]


TEST | Average Accuracy per 155 Loaders: 0.97389


100%|██████████| 937/937 [00:24<00:00, 37.63it/s]


TRAIN | Epoch: 9/10 | Loss: 0.11 | Accuracy: 0.97
Testing the model :


100%|██████████| 156/156 [00:02<00:00, 67.71it/s]


TEST | Average Accuracy per 155 Loaders: 0.98014


100%|██████████| 937/937 [00:24<00:00, 38.07it/s]


TRAIN | Epoch: 10/10 | Loss: 0.12 | Accuracy: 0.97
Testing the model :


100%|██████████| 156/156 [00:02<00:00, 69.73it/s]

TEST | Average Accuracy per 155 Loaders: 0.96260



