https://github.com/gabrielloye/RNN-walkthrough/blob/master/main.ipynb

In [33]:
import torch
from torch import nn

import re
import numpy as np

In [35]:
with open('t8.shakespeare.txt') as f:
    text_dataset_raw = f.read()


In [36]:
text_dataset = re.findall('...............',text_dataset_raw)

text_dataset[:10]

['This is the 100',
 'th Etext file p',
 'resented by Pro',
 'ject Gutenberg,',
 'is presented in',
 ' cooperation wi',
 'th World Librar',
 'y, Inc., from t',
 'Library of the ',
 'Future and Shak']

In [52]:
# text = ['hey how are you','good i am fine','have a nice day']
text = text_dataset

# Join all the sentences together and extract the unique characters from the combined sentences
chars = set(''.join(text))

# Creating a dictionary that maps integers to the characters
int2char = dict(enumerate(chars))

# Creating another dictionary that maps characters to integers
char2int = {char: ind for ind, char in int2char.items()}

In [53]:
maxlen = len(max(text, key=len))

for i in range(len(text)):
    while len(text[i]) < maxlen:
        text[i] += ' '

text

['This is the 100',
 'th Etext file p',
 'resented by Pro',
 'ject Gutenberg,',
 'is presented in',
 ' cooperation wi',
 'th World Librar',
 'y, Inc., from t',
 'Library of the ',
 'Future and Shak',
 'espeare CDROMS.',
 '  Project Guten',
 'often releases ',
 'Etexts that are',
 ' NOT placed in ',
 'the Public Doma',
 '*This Etext has',
 ' certain copyri',
 'ght implication',
 's you should re',
 '<<THIS ELECTRON',
 'IC VERSION OF T',
 'HE COMPLETE WOR',
 'SHAKESPEARE IS ',
 'COPYRIGHT 1990-',
 '1993 BY WORLD L',
 'IBRARY, INC., A',
 'PROVIDED BY PRO',
 'JECT GUTENBERG ',
 'ETEXT OF ILLINO',
 'IS BENEDICTINE ',
 'WITH PERMISSION',
 '.  ELECTRONIC A',
 'ND MACHINE READ',
 'ABLE COPIES MAY',
 'DISTRIBUTED SO ',
 'LONG AS SUCH CO',
 'PIES (1) ARE FO',
 'R YOUR OR OTHER',
 'PERSONAL USE ON',
 'LY, AND (2) ARE',
 ' NOT DISTRIBUTE',
 'COMMERCIALLY.  ',
 'PROHIBITED COMM',
 'ERCIAL DISTRIBU',
 'TION INCLUDES B',
 'SERVICE THAT CH',
 'ARGES FOR DOWNL',
 'OAD TIME OR FOR',
 '*Project Gutenb',


In [85]:
input_seqs = []
target_seqs = []

for sentence in text:
    input_seqs.append(sentence[:-1])
    target_seqs.append(sentence[1:])


In [88]:
train_data_set = []

for sentence in text:
    train_data_set.append((sentence[:-1],sentence[1:]))

train_data_set[:10]

[('This is the 10', 'his is the 100'),
 ('th Etext file ', 'h Etext file p'),
 ('resented by Pr', 'esented by Pro'),
 ('ject Gutenberg', 'ect Gutenberg,'),
 ('is presented i', 's presented in'),
 (' cooperation w', 'cooperation wi'),
 ('th World Libra', 'h World Librar'),
 ('y, Inc., from ', ', Inc., from t'),
 ('Library of the', 'ibrary of the '),
 ('Future and Sha', 'uture and Shak')]

In [90]:
for i in range(len(train_data_set)):
    train_data_set[i] = ([char2int[char] for char in train_data_set[i][0]], [char2int[char] for char in train_data_set[i][1]])

train_data_set[:10]

[([78, 72, 66, 48, 11, 66, 48, 11, 69, 72, 56, 11, 17, 47],
  [72, 66, 48, 11, 66, 48, 11, 69, 72, 56, 11, 17, 47, 47]),
 ([69, 72, 11, 82, 69, 56, 54, 69, 11, 51, 66, 39, 56, 11],
  [72, 11, 82, 69, 56, 54, 69, 11, 51, 66, 39, 56, 11, 28]),
 ([87, 56, 48, 56, 80, 69, 56, 10, 11, 29, 74, 11, 71, 87],
  [56, 48, 56, 80, 69, 56, 10, 11, 29, 74, 11, 71, 87, 44]),
 ([37, 56, 33, 69, 11, 23, 62, 69, 56, 80, 29, 56, 87, 79],
  [56, 33, 69, 11, 23, 62, 69, 56, 80, 29, 56, 87, 79, 85]),
 ([66, 48, 11, 28, 87, 56, 48, 56, 80, 69, 56, 10, 11, 66],
  [48, 11, 28, 87, 56, 48, 56, 80, 69, 56, 10, 11, 66, 80]),
 ([11, 33, 44, 44, 28, 56, 87, 14, 69, 66, 44, 80, 11, 12],
  [33, 44, 44, 28, 56, 87, 14, 69, 66, 44, 80, 11, 12, 66]),
 ([69, 72, 11, 20, 44, 87, 39, 10, 11, 30, 66, 29, 87, 14],
  [72, 11, 20, 44, 87, 39, 10, 11, 30, 66, 29, 87, 14, 87]),
 ([74, 85, 11, 38, 80, 33, 3, 85, 11, 51, 87, 44, 68, 11],
  [85, 11, 38, 80, 33, 3, 85, 11, 51, 87, 44, 68, 11, 69]),
 ([30, 66, 29, 87, 14, 87, 74, 11,

# Set batch size when using real dataset

In [91]:
len(train_data_set)

303642

In [192]:
dict_size = len(char2int)
seq_len = maxlen - 1
#batch_size = len(text)
batch_size = 16

def one_hot_encode(seq, dict_size, seq_len):
    features = np.zeros((seq_len, dict_size), dtype=np.float32)
    for i in range(seq_len):
        features[i,seq[i]] = 1
    
    return features


In [96]:
train_data_set[0][0]

[78, 72, 66, 48, 11, 66, 48, 11, 69, 72, 56, 11, 17, 47]

In [106]:
hot_encoded_train = [(one_hot_encode(train_data_set[i][0], dict_size, seq_len), train_data_set[i][1]) for i in range(len(train_data_set))]

In [140]:
# encode target output
# target_seqs_batched = []
# batch_size

# for index in target_seqs:
#     batch_offset = index % batch_size
#     if index - (index % batch_size) == 0:
#         target_seqs_batched[]

target_batched = torch.utils.data.DataLoader(hot_encoded_train[:2000], batch_size=batch_size, shuffle=True) 

In [144]:
x, y = next(iter(target_batched))

x


tensor([[[0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         ...,
         [0., 0., 0.,  ..., 0., 0., 1.],
         [0., 0., 0.,  ..., 0., 1., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.]],

        [[0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         ...,
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 1.],
         [0., 0., 0.,  ..., 0., 0., 1.]],

        [[0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         ...,
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.]],

        ...,

        [[0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 1.],
         ...,
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0., 

In [127]:
input_final = torch.from_numpy(input_seq_encoded)
target_seq = torch.Tensor(target_batched)

TypeError: new(): data must be a sequence (got DataLoader)

In [71]:
target_seq.shape

torch.Size([303642, 14])

In [118]:
is_cuda = torch.cuda.is_available()

if is_cuda:
    device = torch.device('gpu')
else:
    device = torch.device('cpu')

In [145]:
class Model(nn.Module):
    def __init__(self, input_size, output_size, hidden_dim, n_layers):
        super(Model, self).__init__()

        # Defining some parameters
        self.hidden_dim = hidden_dim
        self.n_layers = n_layers

        #Defining the layers
        # RNN Layer
        self.rnn = nn.RNN(input_size, hidden_dim, n_layers, batch_first=True)   
        # Fully connected layer
        self.fc = nn.Linear(hidden_dim, output_size)
    
    def forward(self, x):
        
        batch_size = x.size(0)

        #Initializing hidden state for first input using method defined below
        hidden = self.init_hidden(batch_size)

        # Passing in the input and hidden state into the model and obtaining outputs
        out, hidden = self.rnn(x, hidden)
        
        # Reshaping the outputs such that it can be fit into the fully connected layer
        out = out.contiguous().view(-1, self.hidden_dim)
        out = self.fc(out)
        
        return out, hidden
    
    def init_hidden(self, batch_size):
        # This method generates the first hidden state of zeros which we'll use in the forward pass
        hidden = torch.zeros(self.n_layers, batch_size, self.hidden_dim).to(device)
         # We'll send the tensor holding the hidden state to the device we specified earlier as well
        return hidden

In [154]:
# Instantiate the model with hyperparameters
model = Model(input_size=dict_size, output_size=dict_size, hidden_dim=100, n_layers=10)
# We'll also set the model to the device that we defined earlier (default is CPU)
model = model.to(device)

# Define hyperparameters
n_epochs = 100
lr=0.01

# Define Loss, Optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=lr)

In [155]:
# Training Run
input_seq = input_final.to(device)
for epoch in range(1, n_epochs + 1):
    batch_index = 0
    for data in target_batched:
        batch_index += 1
        x , y = data

        x = torch.Tensor(x)
        y = torch.stack(y)

        input_seq = x.to(device)
        target_seq = y.to(device)
        

        optimizer.zero_grad() # Clears existing gradients from previous epoch

        output, hidden = model(input_seq)
        output = output.to(device)

        loss = criterion(output, target_seq.view(-1).long())
        loss.backward() # Does backpropagation and calculates gradients
        optimizer.step() # Updates the weights accordingly
        
    if epoch%10 == 0:
        print('Epoch: {}/{}.............'.format(epoch, n_epochs), end=' ')
        print("Loss: {:.4f}".format(loss.item()))

Epoch: 10/100............. Loss: 3.3464
Epoch: 20/100............. Loss: 3.2245
Epoch: 30/100............. Loss: 3.2022
Epoch: 40/100............. Loss: 3.3893
Epoch: 50/100............. Loss: 3.1892
Epoch: 60/100............. Loss: 3.3161
Epoch: 70/100............. Loss: 3.3968
Epoch: 80/100............. Loss: 3.4370
Epoch: 90/100............. Loss: 3.0402
Epoch: 100/100............. Loss: 3.4283


In [197]:
def predict(model, character):
    # One-hot encoding our input to fit into the model
    character = np.array([char2int[c] for c in character])
    print("dict_size ", dict_size)
    character = one_hot_encode(character, dict_size, character.shape[0])
    print("character: ", character)
    character = torch.from_numpy(character)
    character = character.to(device)
    
    out, hidden = model(character)

    prob = nn.functional.softmax(out[-1], dim=0).data
    # Taking the class with the highest probability score from the output
    char_ind = torch.max(prob, dim=0)[1].item()

    return int2char[char_ind], hidden

In [195]:
def sample(model, out_len, start='hey'):
    model.eval() # eval mode
    start = start.lower()
    # First off, run through the starting characters
    chars = [ch for ch in start]
    print("chars", chars)
    size = out_len - len(chars)
    # Now pass in the previous characters and get a new one
    for ii in range(size):
        char, h = predict(model, chars)
        chars.append(char)

    return ''.join(chars)

In [198]:
sample(model, 15, start= 'i love')

chars ['i', ' ', 'l', 'o', 'v', 'e']
dict_size  88
seq:   [66 11 39 44 24 56]
i, seq[i]  0 66
i, seq[i]  1 11
i, seq[i]  2 39
i, seq[i]  3 44
i, seq[i]  4 24
i, seq[i]  5 56
character:  [[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.

RuntimeError: For unbatched 2-D input, hx should also be 2-D but got 3-D tensor