<a href="https://colab.research.google.com/github/fillerInk/ml-with-projects/blob/main/4_Text_Generation_with_RNNs.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

import torch
import torch.nn as nn
import torch.nn.functional as F

In [None]:
text = [
        'hey how are you',
        'i am good',
        'thank you',
        'have a nice time',
        'wish you all the best',
        'good luck',
        'how was your day',
        'take care',
        'i am doing fine',
        'tell me about it',
        'i dont know',
        'great'
      ]

In [None]:
chars = set(''.join(text))

In [None]:
chars

{' ',
 'a',
 'b',
 'c',
 'd',
 'e',
 'f',
 'g',
 'h',
 'i',
 'k',
 'l',
 'm',
 'n',
 'o',
 'r',
 's',
 't',
 'u',
 'v',
 'w',
 'y'}

In [None]:
int2char = dict(enumerate(chars))

In [None]:
char2int = {char: ind for ind, char in int2char.items()}

In [None]:
maxlen = len(max(text, key=len))

In [None]:
for i in range(len(text)):
  while len(text[i])<maxlen:
    text[i]+=' '

In [None]:
text

['hey how are you      ',
 'i am good            ',
 'thank you            ',
 'have a nice time     ',
 'wish you all the best',
 'good luck            ',
 'how was your day     ',
 'take care            ',
 'i am doing fine      ',
 'tell me about it     ',
 'i dont know          ',
 'great                ']

In [None]:
input_seq = []
target_seq = []

for i in range(len(text)):

  input_seq.append(text[i][:-1])
  target_seq.append(text[i][1:])

In [None]:
input_seq

['hey how are you     ',
 'i am good           ',
 'thank you           ',
 'have a nice time    ',
 'wish you all the bes',
 'good luck           ',
 'how was your day    ',
 'take care           ',
 'i am doing fine     ',
 'tell me about it    ',
 'i dont know         ',
 'great               ']

In [None]:
target_seq

['ey how are you      ',
 ' am good            ',
 'hank you            ',
 'ave a nice time     ',
 'ish you all the best',
 'ood luck            ',
 'ow was your day     ',
 'ake care            ',
 ' am doing fine      ',
 'ell me about it     ',
 ' dont know          ',
 'reat                ']

In [None]:
for i in range(len(text)):
  input_seq[i] = [char2int[character] for character in input_seq[i]]
  target_seq[i] = [char2int[character] for character in target_seq[i]]

In [None]:
input_seq

[[3, 8, 6, 10, 3, 19, 11, 10, 9, 1, 8, 10, 6, 19, 14, 10, 10, 10, 10, 10],
 [2,
  10,
  9,
  15,
  10,
  13,
  19,
  19,
  17,
  10,
  10,
  10,
  10,
  10,
  10,
  10,
  10,
  10,
  10,
  10],
 [0, 3, 9, 12, 20, 10, 6, 19, 14, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10],
 [3, 9, 18, 8, 10, 9, 10, 12, 2, 7, 8, 10, 0, 2, 15, 8, 10, 10, 10, 10],
 [11, 2, 21, 3, 10, 6, 19, 14, 10, 9, 4, 4, 10, 0, 3, 8, 10, 5, 8, 21],
 [13,
  19,
  19,
  17,
  10,
  4,
  14,
  7,
  20,
  10,
  10,
  10,
  10,
  10,
  10,
  10,
  10,
  10,
  10,
  10],
 [3, 19, 11, 10, 11, 9, 21, 10, 6, 19, 14, 1, 10, 17, 9, 6, 10, 10, 10, 10],
 [0, 9, 20, 8, 10, 7, 9, 1, 8, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10],
 [2, 10, 9, 15, 10, 17, 19, 2, 12, 13, 10, 16, 2, 12, 8, 10, 10, 10, 10, 10],
 [0, 8, 4, 4, 10, 15, 8, 10, 9, 5, 19, 14, 0, 10, 2, 0, 10, 10, 10, 10],
 [2,
  10,
  17,
  19,
  12,
  0,
  10,
  20,
  12,
  19,
  11,
  10,
  10,
  10,
  10,
  10,
  10,
  10,
  10,
  10],
 [13, 1, 8, 9, 0, 10, 10, 10, 10, 10, 10

In [None]:
dict_size = len(char2int)
seq_len = maxlen-1
batch_size = len(text)

def one_hot_encode(sequence,dict_size,seq_len,batch_size):

  features = np.zeros((batch_size,seq_len,dict_size),dtype=np.float32)

  for i in range(batch_size):
    for u in range(seq_len):
      features[i,u,sequence[i][u]] = 1
  return features    

In [None]:
input_seq = one_hot_encode(input_seq,dict_size,seq_len,batch_size)
input_seq

array([[[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]],

       [[0., 0., 1., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]],

       [[1., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]],

       ...,

       [[1., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0.

In [None]:
input_seq = torch.from_numpy(input_seq)
target_seq = torch.Tensor(target_seq)

In [None]:
is_cuda = torch.cuda.is_available()

if is_cuda:
    device = torch.device("cuda")
    print("GPU is available")
else:
    device = torch.device("cpu")
    print("GPU not available, CPU used")

GPU not available, CPU used


In [None]:
class Model(nn.Module):
  def __init__(self, input_size,output_size,hidden_dim,n_layers):
    super(Model, self).__init__()

    self.hidden_dim = hidden_dim
    self.n_layers = n_layers    
    
    self.fc = nn.Linear(hidden_dim,output_size)
    self.rnn = nn.RNN(input_size,hidden_dim,n_layers,batch_first=True)

  def forward(self,x):

    batch_size = x.size(0)
    hidden = self.init_hidden(batch_size)
    out,hidden = self.rnn(x,hidden)

    out = out.contiguous().view(-1,self.hidden_dim)
    out = self.fc(out)

    return out,hidden

  def init_hidden(self,batch_size):
    hidden = torch.zeros(self.n_layers,batch_size, self.hidden_dim)
    return hidden


In [None]:
device

device(type='cpu')

In [None]:
epochs = 500
lr = 0.01

model = Model(input_size=dict_size,output_size=dict_size,hidden_dim=12,n_layers=1)


In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(),lr=lr)

In [None]:
for epoch in range(1, epochs + 1):
  optimizer.zero_grad() # Clears existing gradients from previous epoch
  input_seq.to(device)
  output, hidden = model(input_seq)
  loss = criterion(output, target_seq.view(-1).long())
  loss.backward() # Does backpropagation and calculates gradients
  optimizer.step() # Updates the weights accordingly

  if epoch%10 == 0:
    print('Epoch {}/{}...............'.format(epoch,epochs),end=' ')
    print('Loss: {:.4f}'.format(loss.item()))

Epoch 10/500............... Loss: 2.2165
Epoch 20/500............... Loss: 2.0177
Epoch 30/500............... Loss: 1.9482
Epoch 40/500............... Loss: 1.8595
Epoch 50/500............... Loss: 1.7255
Epoch 60/500............... Loss: 1.5871
Epoch 70/500............... Loss: 1.4421
Epoch 80/500............... Loss: 1.3071
Epoch 90/500............... Loss: 1.1862
Epoch 100/500............... Loss: 1.0703
Epoch 110/500............... Loss: 0.9577
Epoch 120/500............... Loss: 0.8502
Epoch 130/500............... Loss: 0.7500
Epoch 140/500............... Loss: 0.6603
Epoch 150/500............... Loss: 0.5779
Epoch 160/500............... Loss: 0.5049
Epoch 170/500............... Loss: 0.4406
Epoch 180/500............... Loss: 0.3843
Epoch 190/500............... Loss: 0.3359
Epoch 200/500............... Loss: 0.3123
Epoch 210/500............... Loss: 0.2716
Epoch 220/500............... Loss: 0.2489
Epoch 230/500............... Loss: 0.2198
Epoch 240/500............... Loss: 0.2127
E

In [None]:
def predict(model, character):
    # One-hot encoding our input to fit into the model
    character = np.array([[char2int[c] for c in character]])
    character = one_hot_encode(character, dict_size, character.shape[1], 1)
    character = torch.from_numpy(character)
    character.to(device)
    
    out, hidden = model(character)

    prob = nn.functional.softmax(out[-1], dim=0).data
    # Taking the class with the highest probability score from the output
    char_ind = torch.max(prob, dim=0)[1].item()

    return int2char[char_ind], hidden

In [None]:
def sample(model, out_len, start='hey'):
    model.eval() # eval mode
    start = start.lower()
    # First off, run through the starting characters
    chars = [ch for ch in start]
    size = out_len - len(chars)
    # Now pass in the previous characters and get a new one
    for ii in range(size):
        char, h = predict(model, chars)
        chars.append(char)

    return ''.join(chars)

In [None]:
sample(model,15,'hey how')

'hey how are you'