In [1]:
# Imports
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import os
import numpy as np

import torchvision
import torchvision.transforms as transforms

import matplotlib.pyplot as plt
%matplotlib inline
import sklearn.metrics
import seaborn as sns
import random

# To display youtube videos
from IPython.display import YouTubeVideo


def set_seed(seed = 1234):
    '''Sets the seed of the entire notebook so results are the same every time we run.
    This is for REPRODUCIBILITY.'''
    np.random.seed(seed)
    random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    # When running on the CuDNN backend, two further options must be set
    torch.backends.cudnn.deterministic = True
    # Set a fixed value for the hash seed
    os.environ['PYTHONHASHSEED'] = str(seed)
    
set_seed()

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Device available now:', device)

Device available now: cpu


In [12]:
# The Neural Network
class RNNVanilla(nn.Module):
    # __init__: the function where we create the architecture
    def __init__(self, n_inputs, n_neurons):
        super(RNNVanilla, self).__init__()
        
        # Weights are random at first
        # U contains connection weights for the inputs of the current time step
        self.U = torch.randn(n_inputs, n_neurons) # for 1 neuron: size = 4 rows and 1 column
        
        # W contains connection weights for the outputs of the previous time step
        self.W = torch.randn(n_neurons, n_neurons) # for 1 neuron: size = 1 row and 1 column
        
        # The bias
        self.b = torch.zeros(1, n_neurons) # for 1 neuron: size = 1 row and 1 column
    
    # forward: function where we apply the architecture to the input
    def forward(self, input0, input1):
        # Computes two outputs, one for each time step (two overall).
        self.output0 = torch.tanh(torch.mm(input0, self.U) + self.b)
        
        self.output1 = torch.tanh(torch.mm(self.output0, self.W) + torch.mm(input1, self.U) + self.b)
        
        return self.output0, self.output1

In [13]:
n_inputs=3
n_neurons=5

input0 = torch.tensor([[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, 0, 1]], dtype = torch.float)
print('input time_0 shape:', input0.shape)

input1 = torch.tensor([[9, 8, 7], [0, 0, 0], [6, 5, 4], [3, 2, 1]], dtype = torch.float)
print('input time_1 shape:', input1.shape)

input time_0 shape: torch.Size([4, 3])
input time_1 shape: torch.Size([4, 3])


In [14]:
# Creating the model
rnn_n_neurons = RNNVanilla(n_inputs, n_neurons)

# Checking the output
output0, output1 = rnn_n_neurons(input0, input1)
print('output0:', output0, '\n')
print('output1:', output1)

output0: tensor([[ 0.7501, -0.9918, -0.5887, -0.6949,  0.8775],
        [ 0.7263, -1.0000, -0.9999, -0.9998,  0.8766],
        [ 0.7006, -1.0000, -1.0000, -1.0000,  0.8757],
        [-0.9999, -1.0000, -0.8574, -1.0000, -0.9348]]) 

output1: tensor([[-0.9755, -1.0000, -1.0000, -1.0000, -0.8716],
        [-0.9380,  0.8805,  0.7666, -0.0397, -0.1628],
        [-0.9924, -1.0000, -1.0000, -1.0000, -0.9083],
        [-0.9998, -1.0000, -1.0000,  0.8486,  0.4386]])


In [15]:
# Parameters
print('U:', rnn_n_neurons.U)
print('W:', rnn_n_neurons.W)
print('bias:', rnn_n_neurons.b)

U: tensor([[-0.5909, -1.1263, -0.2108, -0.8226, -0.3061],
        [ 0.1735,  1.5392, -1.9031,  0.0428, -0.7550],
        [ 0.3999, -2.1429,  0.6137, -0.4501,  1.0599]])
W: tensor([[-0.5510,  2.6617,  2.1479, -0.8555, -0.7208],
        [ 1.3755,  0.9988,  0.1549, -0.9053, -1.1421],
        [ 2.0506, -0.1523, -0.7381, -0.7189,  0.7937],
        [-0.6129,  0.2229,  0.5868, -0.2878, -0.3234],
        [ 1.7023,  0.5865, -0.6212, -1.5174, -0.3566]])
bias: tensor([[0., 0., 0., 0., 0.]])
