In [1]:
import torch
from torch import nn

import numpy as np

# torch.cuda.is_available() checks and returns a Boolean True if a GPU is available, else it'll return False
is_cuda = torch.cuda.is_available()

device = torch.device("cpu")
if is_cuda:
    device = torch.device("cuda")
    print("GPU is available")

GPU is available


In [2]:
class RNN(nn.Module):
    def __init__(self, input_size, output_size, hidden_dim, n_layers, activation="tanh", dropout=0.0):
        super(RNN, self).__init__()

        # Defining some parameters
        self.hidden_dim = hidden_dim
        self.n_layers = n_layers

        #Defining the layers
        # RNN Layer
        self.rnn = nn.RNN(input_size, hidden_dim, n_layers,
                          batch_first=True,
                          nonlinearity=activation,
                          dropout=dropout)   
        # Fully connected layer
        self.fc = nn.Linear(hidden_dim, output_size)

    def init_hidden(self, batch_size):
        # This method generates the first hidden state of zeros which we'll use in the forward pass
        return(torch.zeros(self.n_layers, batch_size, self.hidden_dim).to(device))
    
    def forward(self, x):
        
        batch_size = x.size(0)

        # Initializing hidden state for first input using method defined below
        hidden = self.init_hidden(batch_size)

        # Passing in the input and hidden state into the model and obtaining outputs
        out, hidden = self.rnn(x, hidden)
        
        # Reshaping the outputs such that it can be fit into the fully connected layer
        out = out.contiguous().view(-1, self.hidden_dim)
        out = self.fc(out)
        
        return(out, hidden)

In [3]:
def one_hot_encode(sequence, dict_size, seq_len, batch_size):
    # Creating a multi-dimensional array of zeros with the desired output shape
    features = np.zeros((batch_size, seq_len, dict_size), dtype=np.float32)
    
    # Replacing the 0 at the relevant character index with a 1 to represent that character
    for i in range(batch_size):
        for u in range(seq_len):
            features[i, u, sequence[i][u]] = 1
    return features

# whitespace padding all sequences to same length
def ws_pad(text, new_len):
    # note that string * int will generate the string repeated int times.
    return(f"{text}{' ' * (new_len - len(text))}")

In [4]:
text = ["hey how are you?","good i am fine, thank you", "have a nice day!", "you too!", "under the bridge"]

# padding with white space so all are same length
maxlen = len(max(text, key=len))
text = [ws_pad(t, maxlen) for t in text]

# Join all the sentences together and extract the unique characters from the combined sentences
chars = set(''.join(text))

# Creating a dictionary that maps integers to the characters
int2char = dict(enumerate(chars))

# Creating another dictionary that maps characters to integers
char2int = {char: ind for ind, char in int2char.items()}

# Creating lists that will hold our input and target sequences
input_seq = []
target_seq = []

for i in range(len(text)):
    # Remove last character for input sequence
    input_seq.append(text[i][:-1])
    
    # Remove first character for target sequence
    target_seq.append(text[i][1:])
    print("Input Sequence: {}\nTarget Sequence: {}".format(input_seq[i], target_seq[i]))

Input Sequence: hey how are you?        
Target Sequence: ey how are you?         
Input Sequence: good i am fine, thank yo
Target Sequence: ood i am fine, thank you
Input Sequence: have a nice day!        
Target Sequence: ave a nice day!         
Input Sequence: you too!                
Target Sequence: ou too!                 
Input Sequence: under the bridge        
Target Sequence: nder the bridge         


In [5]:
# encode to integer
for i in range(len(text)):
    input_seq[i] = [char2int[character] for character in input_seq[i]]
    target_seq[i] = [char2int[character] for character in target_seq[i]]
    
# one hot encode
dict_size = len(char2int)
seq_len = maxlen - 1
batch_size = len(text)

# Input shape --> (Batch Size, Sequence Length, One-Hot Encoding Size)
input_seq = one_hot_encode(input_seq, dict_size, seq_len, batch_size)

In [6]:
# convert the inputs and targets to torch tensors
input_sq = torch.from_numpy(input_seq)
target_sq = torch.Tensor(target_seq)

input_sq = input_sq.to(device)
target_sq = target_sq.to(device)

In [7]:
# Instantiate the model with hyperparameters
model = RNN(
    input_size=dict_size, output_size=dict_size, # don't mess with these
    hidden_dim=15, n_layers=2, activation="tanh", dropout=0.1 # experiment with these
    ) 
# move to device
model.to(device)

# Define additional hyperparameters
n_epochs = 500
lr=0.02

# Define Loss, Optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=lr)

In [8]:
# Training Run
for epoch in range(1, n_epochs + 1):
    optimizer.zero_grad() # Clears existing gradients from previous epoch
    output, hidden = model(input_sq)
    output = output.to(device)
    loss = criterion(output, target_sq.view(-1).long())
    loss.backward() # Does backpropagation and calculates gradients
    optimizer.step() # Updates the weights accordingly
    
    if epoch%25 == 0:
        print('Epoch: {}/{}.............'.format(epoch, n_epochs), end=' ')
        print("Loss: {:.4f}".format(loss.item()))

Epoch: 25/500............. Loss: 1.7273
Epoch: 50/500............. Loss: 0.7160
Epoch: 75/500............. Loss: 0.2761
Epoch: 100/500............. Loss: 0.1056
Epoch: 125/500............. Loss: 0.0887
Epoch: 150/500............. Loss: 0.0773
Epoch: 175/500............. Loss: 0.0528
Epoch: 200/500............. Loss: 0.0355
Epoch: 225/500............. Loss: 0.0420
Epoch: 250/500............. Loss: 0.0260
Epoch: 275/500............. Loss: 0.0347
Epoch: 300/500............. Loss: 0.0199
Epoch: 325/500............. Loss: 0.0251
Epoch: 350/500............. Loss: 0.0457
Epoch: 375/500............. Loss: 0.0397
Epoch: 400/500............. Loss: 0.0385
Epoch: 425/500............. Loss: 0.0239
Epoch: 450/500............. Loss: 0.0174
Epoch: 475/500............. Loss: 0.0230
Epoch: 500/500............. Loss: 0.0176


In [100]:
def predict_one(model, character):
    # One-hot encoding our input to fit into the model
    character = np.array([[char2int[c] for c in character]])
    character = one_hot_encode(character, dict_size, character.shape[1], 1)
    character = torch.from_numpy(character)
    character = character.to(device)
    
    out, hidden = model(character) # _ = hidden state
    print(hidden)
    
    prob = nn.functional.softmax(out[-1], dim=0).data
    # Taking the class with the highest probability score from the output
    char_ind = torch.max(prob, dim=0)[1].item()
    return int2char[char_ind]

In [101]:
def predict_seq(model, out_len, start='hey'):
    model.eval() # eval mode
    start = start.lower()
    # First off, run through the starting characters
    chars = [ch for ch in start]
    size = out_len - len(chars)
    # Now pass in the previous characters and get a new one
    for ii in range(size):
        char = predict_one(model, chars)
        chars.append(char)

    return ''.join(chars)

In [103]:
predict_seq(model, 20, 'hey a').strip() # remove final white space

tensor([[[-0.0284, -0.8003, -0.0492,  0.1104, -0.4383,  0.7508, -0.6349,
          -0.0290,  0.0487,  0.1591,  0.7267,  0.7501,  0.3882, -0.2377,
          -0.9318]],

        [[ 0.9552, -0.9528, -1.0000,  0.9870,  0.9703, -0.9577, -0.9946,
           0.7940,  0.9179, -0.9998,  0.9994,  0.6558,  0.3912, -0.9842,
           0.9217]]], device='cuda:0', grad_fn=<CudnnRnnBackward>)
tensor([[[-0.8557,  0.4711,  0.8419,  0.0656, -0.9898, -0.9801,  0.2434,
           0.2707, -0.7429, -0.9123,  0.9579,  0.7909, -0.9557, -0.6194,
          -0.9767]],

        [[-0.9997,  1.0000, -1.0000,  0.9897,  1.0000, -0.9491,  0.9997,
           0.9995, -0.9892, -0.9984,  0.9853,  0.9981,  0.9996,  0.1147,
          -0.3026]]], device='cuda:0', grad_fn=<CudnnRnnBackward>)
tensor([[[ 0.5126,  0.5460, -0.8963, -0.9479,  0.8314, -0.1483, -0.7762,
           0.5620,  0.7413, -0.2096,  0.1159, -0.8713,  0.2776, -0.8319,
          -0.0027]],

        [[-1.0000,  0.9870,  0.9997, -0.9693,  1.0000, -0.9474, -1.000

'hey am fine, thank y'