<a href="https://colab.research.google.com/github/shivammehta007/NLPResearch/blob/master/RNN_From_Scratch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
import numpy as np
import torch
import torch.nn as nn

In [0]:
text = ['It is such a beautiful weather today' , 'I feel like going for a walk', 'Trees and sky also looks very beautiful']

# Create a Vocabulary of Characters

vocab = set(' '.join(text))

In [0]:
int2char = dict(enumerate(vocab))
char2int = {char : index for index, char in int2char.items()}

In [4]:
print(int2char, char2int)

{0: 'd', 1: 'I', 2: 's', 3: 'u', 4: 'k', 5: 'a', 6: 'T', 7: 'v', 8: 'w', 9: ' ', 10: 'b', 11: 'h', 12: 'n', 13: 'c', 14: 'e', 15: 'g', 16: 'i', 17: 'f', 18: 'y', 19: 't', 20: 'l', 21: 'r', 22: 'o'} {'d': 0, 'I': 1, 's': 2, 'u': 3, 'k': 4, 'a': 5, 'T': 6, 'v': 7, 'w': 8, ' ': 9, 'b': 10, 'h': 11, 'n': 12, 'c': 13, 'e': 14, 'g': 15, 'i': 16, 'f': 17, 'y': 18, 't': 19, 'l': 20, 'r': 21, 'o': 22}


In [0]:
# Padding all the text to that of maximum length
max_length = len(max(text, key=len))

In [0]:
for i in range(len(text)):
    while len(text[i]) < max_length:
        text[i] += ' '

As we will predict next character in sequence , we will deveide the data into 
* Input
* Ground Truth/ Target

In [0]:
input_sequence = [text[i][:-1] for i in range(len(text))]
target_sequence = [text[i][1:] for i in range(len(text))]

In [8]:
print('Input_Sentence : {} \nOutput_Sentence: {}'.format(input_sequence[2], target_sequence[2]))

Input_Sentence : Trees and sky also looks very beautifu 
Output_Sentence: rees and sky also looks very beautiful


Lets Convert all the Text to sequence of integers from the vocabulary, since computers understand numbers better than text anyday

In [0]:
for i in range(len(text)):
    input_sequence[i] = [char2int[x] for x in input_sequence[i]]
    target_sequence[i] = [char2int[x] for x in target_sequence[i]]

In [10]:
print('Input_Sentence : {} \nOutput_Sentence: {}'.format(input_sequence[2], target_sequence[2]))

Input_Sentence : [6, 21, 14, 14, 2, 9, 5, 12, 0, 9, 2, 4, 18, 9, 5, 20, 2, 22, 9, 20, 22, 22, 4, 2, 9, 7, 14, 21, 18, 9, 10, 14, 5, 3, 19, 16, 17, 3] 
Output_Sentence: [21, 14, 14, 2, 9, 5, 12, 0, 9, 2, 4, 18, 9, 5, 20, 2, 22, 9, 20, 22, 22, 4, 2, 9, 7, 14, 21, 18, 9, 10, 14, 5, 3, 19, 16, 17, 3, 20]


In [0]:
dict_size = len(char2int)
seq_len = len(input_sequence[0])
batch_size = len(text)


In [12]:
print(dict_size, seq_len, batch_size)

23 38 3


In [0]:
# Create One Hot representation of it on dict size
def one_hot_representation(sequence, dict_size, seq_len, batch_size):
    features = np.zeros((batch_size, seq_len, dict_size), dtype=np.float32)
    for i in range(batch_size):
        for u in range(seq_len):
            features[i, u, sequence[i][u]] = 1
    return features

In [14]:
input_sequence = one_hot_representation(input_sequence, dict_size, seq_len, batch_size)
print(input_sequence.shape)
print(input_sequence[0,0])

(3, 38, 23)
[0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]


In [0]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

In [16]:
device

device(type='cpu')

In [0]:
#Putting the input and output to Tensors 
input_sequence = torch.from_numpy(input_sequence)
target_sequence = torch.Tensor(target_sequence)


In [0]:
class Model(nn.Module):
    def __init__(self,  input_size, output_size, hidden_dim, n_layers):
        super(Model, self).__init__()
        self.hidden_dim = hidden_dim
        self.n_layers = n_layers

        self.rnn = nn.RNN(input_size, hidden_dim, n_layers, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_size) 
    
    def forward(self, x):
        batch_size = x.size(0)
        hidden = torch.zeros(self.n_layers, batch_size, self.hidden_dim)
        out, hidden = self.rnn(x, hidden)
        out = out.contiguous().view(-1, self.hidden_dim)
        out = self.fc(out)

        return out, hidden



In [0]:
model = Model(input_size=dict_size, output_size=dict_size, hidden_dim=12, n_layers=1)
model.to(device)
input_sequence = input_sequence.to(device)

In [0]:
epochs = 400
lr = 0.01

loss_criteria = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=lr)

In [21]:
for epoch in range(epochs):
    optimizer.zero_grad()
    input_sequence = input_sequence.to(device)
    output, hidden = model(input_sequence)
    loss = loss_criteria(output, target_sequence.view(-1).long())
    loss.backward()
    optimizer.step()

    if epoch % 10 == 0:
        print('Epoch: {}/{}.............'.format(epoch, epochs), end=' ')
        print("Loss: {:.4f}".format(loss.item()))


Epoch: 0/400............. Loss: 3.1206
Epoch: 10/400............. Loss: 2.6526
Epoch: 20/400............. Loss: 2.4161
Epoch: 30/400............. Loss: 2.1373
Epoch: 40/400............. Loss: 1.8292
Epoch: 50/400............. Loss: 1.5236
Epoch: 60/400............. Loss: 1.2383
Epoch: 70/400............. Loss: 0.9988
Epoch: 80/400............. Loss: 0.7995
Epoch: 90/400............. Loss: 0.6351
Epoch: 100/400............. Loss: 0.5033
Epoch: 110/400............. Loss: 0.4036
Epoch: 120/400............. Loss: 0.3309
Epoch: 130/400............. Loss: 0.2925
Epoch: 140/400............. Loss: 0.2422
Epoch: 150/400............. Loss: 0.2101
Epoch: 160/400............. Loss: 0.2093
Epoch: 170/400............. Loss: 0.1998
Epoch: 180/400............. Loss: 0.1642
Epoch: 190/400............. Loss: 0.1422
Epoch: 200/400............. Loss: 0.1254
Epoch: 210/400............. Loss: 0.1129
Epoch: 220/400............. Loss: 0.1028
Epoch: 230/400............. Loss: 0.0943
Epoch: 240/400.............

In [0]:
def predict(model, character):
    character = np.array([[char2int[c] for c in character]])
    character = one_hot_representation(character, dict_size, character.shape[1], 1)
    character = torch.from_numpy(character)
    character.to(device)

    out, hidden = model(character)

    prob = nn.functional.softmax(out[-1], dim=0).data
    print(prob)
    char_index = torch.max(prob, dim=0)[1].item()

    print(torch.max(prob, dim=0)[1])

    return int2char[char_index], hidden


In [0]:
def sample(model, out_len, start='hey'):
    model.eval() # eval mode
    start = start.lower()
    # First off, run through the starting characters
    chars = [ch for ch in start]
    size = out_len - len(chars)
    # Now pass in the previous characters and get a new one
    for ii in range(size):
        char, h = predict(model, chars)
        chars.append(char)

    return ''.join(chars)

In [24]:
sample(model, 20, 'good')

tensor([1.3324e-04, 1.2969e-05, 4.3902e-04, 1.1530e-08, 8.4264e-07, 1.5567e-03,
        2.0978e-05, 1.1320e-06, 4.2832e-05, 4.8840e-05, 1.5949e-05, 7.9627e-07,
        1.7953e-06, 3.1495e-04, 3.3396e-06, 3.3394e-07, 3.4322e-02, 8.0553e-07,
        1.6653e-02, 7.9277e-08, 3.6279e-07, 9.4643e-01, 5.8938e-07])
tensor(21)
tensor([6.0160e-07, 3.5080e-06, 1.6277e-02, 8.4036e-06, 1.4118e-04, 7.7540e-01,
        7.3494e-06, 4.7181e-07, 4.4646e-06, 1.9672e-01, 1.0428e-05, 2.5977e-08,
        6.1613e-06, 7.9516e-06, 3.4217e-04, 3.8261e-06, 6.8783e-07, 2.3451e-04,
        7.0744e-03, 1.7581e-05, 3.9243e-06, 3.6935e-03, 4.2082e-05])
tensor(5)
tensor([6.5516e-07, 5.9588e-06, 2.6132e-04, 3.0863e-04, 6.8798e-06, 6.0230e-03,
        1.2980e-05, 3.7207e-05, 3.8905e-04, 2.2506e-01, 7.7097e-06, 9.1019e-07,
        9.1851e-07, 1.9621e-04, 1.7750e-04, 3.1469e-05, 7.4341e-08, 4.8775e-05,
        1.1323e-05, 1.2204e-01, 6.4538e-01, 7.3080e-07, 3.7513e-06])
tensor(20)
tensor([3.3429e-05, 4.3210e-04, 6.9178e-0

'goodralooks sotndrs '