In [1]:
from google.colab import drive
drive.mount("/content/drive")

Mounted at /content/drive


In [3]:
import os
import math
import numpy as np
import random
import torch
from torch import nn
import torch.nn.functional as F

In [4]:
text = ["hey how are you", "good I am fine", "have a nice day"]

# Join all the sentences together and extract the unique characters from the combined sentence
chars = set("".join(text))

# Creating a dictionary that maps integers to the characters
int2char = dict(enumerate(chars))

# Creating another dictionary that maps characters to integers
char2int = {char: idx for idx, char in int2char.items()}

In [5]:
int2char

{0: 'm',
 1: 'I',
 2: 'u',
 3: 'f',
 4: 'h',
 5: 'd',
 6: 'o',
 7: 'e',
 8: 'w',
 9: 'a',
 10: 'i',
 11: 'n',
 12: 'g',
 13: 'c',
 14: ' ',
 15: 'r',
 16: 'y',
 17: 'v'}

In [6]:
char2int

{'m': 0,
 'I': 1,
 'u': 2,
 'f': 3,
 'h': 4,
 'd': 5,
 'o': 6,
 'e': 7,
 'w': 8,
 'a': 9,
 'i': 10,
 'n': 11,
 'g': 12,
 'c': 13,
 ' ': 14,
 'r': 15,
 'y': 16,
 'v': 17}

In [7]:
# Finding the length of the longest string in our data
maxlen = len(max(text, key=len))
maxlen

15

In [8]:
# Padding

# A simple loop that loops through the list of sentences and adds a ' ' whitespace until the length of
# the sentence matches the length of the longest sentence
for i in range(len(text)):
  while len(text[i])<maxlen:
    text[i] += " "

In [9]:
text

['hey how are you', 'good I am fine ', 'have a nice day']

In [10]:
# Creating lists that will hold our input and target sequences
input_seq = []
target_seq = []

for i in range(len(text)):
  # Remove last character for input sequence
  input_seq.append(text[i][:-1])

  # Remove first character for target sequence
  target_seq.append(text[i][1:])
  print("Input Sequence: {}\nTarget Sequence: {}".format(input_seq[i], target_seq[i]))


Input Sequence: hey how are yo
Target Sequence: ey how are you
Input Sequence: good I am fine
Target Sequence: ood I am fine 
Input Sequence: have a nice da
Target Sequence: ave a nice day


In [11]:
for i in range(len(text)):
  input_seq[i] = [char2int[character] for character in input_seq[i]]
  target_seq[i] = [char2int[character] for character in target_seq[i]]

In [12]:
input_seq

[[4, 7, 16, 14, 4, 6, 8, 14, 9, 15, 7, 14, 16, 6],
 [12, 6, 6, 5, 14, 1, 14, 9, 0, 14, 3, 10, 11, 7],
 [4, 9, 17, 7, 14, 9, 14, 11, 10, 13, 7, 14, 5, 9]]

Before encoding our input sequence into one-hot vectors, we'll define 3 key variables:

    dict_size: Dictionary size - The number of unique characters that we have in our text
        This will determine the one-hot vector size as each character will have an assigned index in that vector
    seq_len: The length of the sequences that we're feeding into the model
        As we standardized the length of all our sentences to be equal to the longest sentences, this value will be the max length - 1 as we removed the last character input as well
    batch_size: The number of sentences that we defined and are going to feed into the model as a batch


In [14]:
dict_size = len(char2int)
seq_len = maxlen - 1
batch_size = len(text)

def one_hot_encode(sequence, dict_size, seq_len, batch_size):
  # Creating a multi-dimensional array of zeros with the desired output shape
  features = np.zeros((batch_size, seq_len, dict_size), dtype=np.float32)

  # Replacing the 0 at the relevant character index with a 1 to represent that character
  for i in range(batch_size):
    for u in range(seq_len):
      features[i, u, sequence[i][u]] = 1
  return features

In [19]:
x = [
    [
        [1, 2, 3, 4],
        [5, 6, 7, 8]
    ],
    [
        [9, 10, 11, 12],
        [13, 14, 15, 16]
    ],
    [ 
        [17, 18, 19, 20],
        [21, 22, 23, 24]
    ] 
]
x

[[[1, 2, 3, 4], [5, 6, 7, 8]],
 [[9, 10, 11, 12], [13, 14, 15, 16]],
 [[17, 18, 19, 20], [21, 22, 23, 24]]]

In [20]:
x[1]

[[9, 10, 11, 12], [13, 14, 15, 16]]

In [24]:
type(x)

list

In [26]:
x = np.array(x)
type(x)

numpy.ndarray

In [27]:
x[1, 1]

array([13, 14, 15, 16])

In [28]:
x[1, 1, 2]

15

In [15]:
# Input shape --> (Batch Size, Sequence Length, One-Hot Encoding Size)
input_seq = one_hot_encode(input_seq, dict_size, seq_len, batch_size)

In [16]:
input_seq

array([[[0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         1., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0.,
         0., 0.],
        [0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0.],
        [0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0.,
         0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0.,
         0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0.,
         0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.,
         0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0.],

In [29]:
input_seq = torch.from_numpy(input_seq)
target_seq = torch.Tensor(target_seq)

In [30]:
# torch.cuda.is_available() checks and returns a Boolean True if a GPU is available, else it'll return False
is_cuda = torch.cuda.is_available()

# If we have a GPU available, we'll set our device to GPU. We'll use this device variable later in our code.
if is_cuda:
    device = torch.device("cuda")
    print("GPU is available")
else:
    device = torch.device("cpu")
    print("GPU not available, CPU used")

GPU not available, CPU used


In [31]:
class Model(nn.Module):
  def __init__(self, input_size, output_size, hidden_dim, n_layers):
    super(Model, self).__init__()

    # Defining some parameters
    self.hidden_dim = hidden_dim
    self.n_layers = n_layers

    # Defining the layers
    # RNN Layer
    self.rnn = nn.RNN(input_size, hidden_dim, n_layers, batch_first=True)

    # Fully connected layer
    self.fc = nn.Linear(hidden_dim, output_size)

  def forward(self, x):

    batch_size = x.size(0)

    # Initializing hidden state for first input using method defined below
    hidden = self.init_hidden(batch_size)

    # Passing in the input and hidden state into the model and obtaining outputs
    out, hidden = self.rnn(x, hidden)

    # Reshaping the outputs such that it can be fit into the fully connected layer
    out = out.contiguous().view(-1, self.hidden_dim)
    out = self.fc(out)

    return out, hidden

  def init_hidden(self, batch_size):
    # This method generates the first hidden state of zeros which we'll use in the forward pass
    # We'll send the tensor holding the hidden state to the device we specified earlier as well
    hidden = torch.zeros(self.n_layers, batch_size, self.hidden_dim)
    return hidden 

In [32]:
# Instantiate the model with hyperparameters
model = Model(input_size=dict_size, output_size=dict_size, hidden_dim=12, n_layers=1)
# We'll also set the model to the device that we defined earlier (default is CPU)
model.to(device)


# Define hyperparameters
n_epochs = 100
lr = 0.01


# Define Loss, Optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=lr)

In [33]:
# Training Run
for epoch in range(1, n_epochs + 1):
    optimizer.zero_grad() # Clears existing gradients from previous epoch
    input_seq.to(device)
    output, hidden = model(input_seq)
    loss = criterion(output, target_seq.view(-1).long())
    loss.backward() # Does backpropagation and calculates gradients
    optimizer.step() # Updates the weights accordingly
    
    if epoch%10 == 0:
        print('Epoch: {}/{}.............'.format(epoch, n_epochs), end=' ')
        print("Loss: {:.4f}".format(loss.item()))

Epoch: 10/100............. Loss: 2.5148
Epoch: 20/100............. Loss: 2.0893
Epoch: 30/100............. Loss: 1.6282
Epoch: 40/100............. Loss: 1.1809
Epoch: 50/100............. Loss: 0.8217
Epoch: 60/100............. Loss: 0.5566
Epoch: 70/100............. Loss: 0.3757
Epoch: 80/100............. Loss: 0.2624
Epoch: 90/100............. Loss: 0.1937
Epoch: 100/100............. Loss: 0.1511


In [34]:
# This function takes in the model and character as arguments and returns the next character prediction and hidden state
def predict(model, character):
    # One-hot encoding our input to fit into the model
    character = np.array([[char2int[c] for c in character]])
    character = one_hot_encode(character, dict_size, character.shape[1], 1)
    character = torch.from_numpy(character)
    character.to(device)
    
    out, hidden = model(character)

    prob = nn.functional.softmax(out[-1], dim=0).data
    # Taking the class with the highest probability score from the output
    char_ind = torch.max(prob, dim=0)[1].item()

    return int2char[char_ind], hidden

In [35]:
# This function takes the desired output length and input characters as arguments, returning the produced sentence
def sample(model, out_len, start='hey'):
    model.eval() # eval mode
    start = start.lower()
    # First off, run through the starting characters
    chars = [ch for ch in start]
    size = out_len - len(chars)
    # Now pass in the previous characters and get a new one
    for ii in range(size):
        char, h = predict(model, chars)
        chars.append(char)

    return ''.join(chars)

In [36]:
sample(model, 15, 'good')

'good I am fine '