In [2]:
import torch
from torch import nn

import numpy as np

In [3]:
text = ['hey how are you','good i am fine','have a nice day']

chars = set(''.join(text))
int2char = dict(enumerate(chars))
char2int = {char: ind for ind,char in int2char.items()}

In [4]:
maxlen = len(max(text, key=len))
print("The longest string has {} characters".format(maxlen))

The longest string has 15 characters


In [5]:
# padding the text
for i in range(len(text)):
    while len(text[i]) < maxlen:
        text[i] += ' '

In [6]:
input_seq = []
target_seq = []

for i in range(len(text)):
    # remove the first token

    input_seq.append(text[i][:-1])
    target_seq.append(text[i][1:])
    print("Input Sequence:{}\n Target Sequence:{}".format(
        input_seq[i],
        target_seq[i]
    ))

Input Sequence:hey how are yo
 Target Sequence:ey how are you
Input Sequence:good i am fine
 Target Sequence:ood i am fine 
Input Sequence:have a nice da
 Target Sequence:ave a nice day


In [7]:
for i in range(len(text)):
    input_seq[i] = [char2int[character] for character in input_seq[i]]
    target_seq[i] = [char2int[character] for character in target_seq[i]]

In [8]:
dict_size = len(char2int)
seq_len = maxlen - 1
batch_size = len(text)

def one_hot_encode(sequence,dict_size,seq_len,batch_size):
    features = np.zeros((batch_size,seq_len,dict_size),dtype = np.float32)

    for i in range(batch_size):
        for u in range(seq_len):
            features[i,u,sequence[i][u]] = 1
    return features


In [9]:
input_seq = one_hot_encode(input_seq,dict_size,seq_len,batch_size)


In [10]:
print("Input shape: {} --> (Batch Size, Sequence Length, One-Hot Encoding Size)".format(input_seq.shape))

Input shape: (3, 14, 17) --> (Batch Size, Sequence Length, One-Hot Encoding Size)


In [11]:
input_seq = torch.from_numpy(input_seq)
target_seq = torch.Tensor(target_seq)

In [12]:
# torch.cuda.is_available() checks and returns a Boolean True if a GPU is available, else it'll return False
is_cuda = torch.cuda.is_available()

# If we have a GPU available, we'll set our device to GPU. We'll use this device variable later in our code.
if is_cuda:
    device = torch.device("cuda")
    print("GPU is available")
else:
    device = torch.device("cpu")
    print("GPU not available, CPU used")

GPU is available


In [13]:
class Model(nn.Module):
    def __init__(self,input_size,output_size,hidden_dim,n_layers):
        super(Model,self).__init__()

        self.hidden_dim = hidden_dim
        self.n_layers = n_layers

        # define the layer
        self.rnn = nn.RNN(input_size,hidden_dim,n_layers,batch_first= True)
        self.fc = nn.Linear(hidden_dim,output_size)

    def forward(self,x):
        batch_size = x.size(0)

        hidden = self.init_hidden(batch_size)

        # passing in the input and hidden state into the rnn model

        out,hidden = self.rnn(x,hidden)
        out = out.contiguous().view(-1,self.hidden_dim)
        out = self.fc(out)

        return out, hidden
    def init_hidden(self,batch_size):
        hidden = torch.zeros(self.n_layers,batch_size,self.hidden_dim).to(device)
        return hidden



In [14]:
model = Model(input_size=dict_size,output_size=dict_size,hidden_dim=12,n_layers=1)

model = model.to(device)

n_epochs = 100
lr = 0.01

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(),lr = lr)




In [16]:
input_seq = input_seq.to(device)
for epoch in range(1, n_epochs + 1):
    optimizer.zero_grad() # Clears existing gradients from previous epoch
    #input_seq = input_seq.to(device)
    output, hidden = model(input_seq)
    output = output.to(device)
    target_seq = target_seq.to(device)
    loss = criterion(output, target_seq.view(-1).long())
    loss.backward() # Does backpropagation and calculates gradients
    optimizer.step() # Updates the weights accordingly
    
    if epoch%10 == 0:
        print('Epoch: {}/{}.............'.format(epoch, n_epochs), end=' ')
        print("Loss: {:.4f}".format(loss.item()))

Epoch: 10/100............. Loss: 0.1607
Epoch: 20/100............. Loss: 0.1301
Epoch: 30/100............. Loss: 0.1094
Epoch: 40/100............. Loss: 0.0949
Epoch: 50/100............. Loss: 0.0844
Epoch: 60/100............. Loss: 0.0766
Epoch: 70/100............. Loss: 0.0706
Epoch: 80/100............. Loss: 0.0659
Epoch: 90/100............. Loss: 0.0622
Epoch: 100/100............. Loss: 0.0591


In [17]:
def predict(model, character):
    # One-hot encoding our input to fit into the model
    character = np.array([[char2int[c] for c in character]])
    character = one_hot_encode(character, dict_size, character.shape[1], 1)
    character = torch.from_numpy(character)
    character = character.to(device)
    
    out, hidden = model(character)

    prob = nn.functional.softmax(out[-1], dim=0).data
    # Taking the class with the highest probability score from the output
    char_ind = torch.max(prob, dim=0)[1].item()

    return int2char[char_ind], hidden
def sample(model, out_len, start='hey'):
    model.eval() # eval mode
    start = start.lower()
    # First off, run through the starting characters
    chars = [ch for ch in start]
    size = out_len - len(chars)
    # Now pass in the previous characters and get a new one
    for ii in range(size):
        char, h = predict(model, chars)
        chars.append(char)

    return ''.join(chars)

In [18]:
sample(model, 15, 'hey')

'hey how are you'