https://github.com/gabrielloye/RNN-walkthrough/blob/master/main.ipynb

In [811]:
import torch
from torch import nn
import torch.nn.functional as F

import re
import numpy as np

# Open dataset

In [812]:
with open('./drake_archive/drake_lyrics.txt') as f:
    text_dataset_raw = f.read()

text_dataset_raw = text_dataset_raw.lower()

Breakdown dataset into sequences of SENTENCE_LEN

In [813]:
SENTENCE_LEN = 64
text_dataset = re.findall('.'*SENTENCE_LEN,text_dataset_raw)

text_dataset[:5]

['wassup to all the ladies on the northside, southside, eastside, ',
 "she moved out of state, and shit done went left, she's seekin' f",
 "i watch her climb to the top of the pole and then get to slidin'",
 "i'ma just give it to you direct, instead of me throwin' this shi",
 'we used to do pornos when you would come over but now you got mo']

In [816]:
text = text_dataset

# Join all the sentences together and extract the unique characters from the combined sentences
chars = set(''.join(text))

# Creating a dictionary that maps integers to the characters
int2char = dict(enumerate(chars))

# Creating another dictionary that maps characters to integers
char2int = {char: ind for ind, char in int2char.items()}

Pad all inputs

In [818]:
maxlen = len(max(text, key=len))

for i in range(len(text)):
    while len(text[i]) < maxlen:
        text[i] += ' '

text[:5]

['wassup to all the ladies on the northside, southside, eastside, ',
 "she moved out of state, and shit done went left, she's seekin' f",
 "i watch her climb to the top of the pole and then get to slidin'",
 "i'ma just give it to you direct, instead of me throwin' this shi",
 'we used to do pornos when you would come over but now you got mo']

In [842]:
train_data_set = []

for sentence in text:
    train_data_set.append((sentence[:-1],sentence[1:]))

train_data_set[:5]

[('wassup to all the ladies on the northside, southside, eastside,',
  'assup to all the ladies on the northside, southside, eastside, '),
 ("she moved out of state, and shit done went left, she's seekin' ",
  "he moved out of state, and shit done went left, she's seekin' f"),
 ('i watch her climb to the top of the pole and then get to slidin',
  " watch her climb to the top of the pole and then get to slidin'"),
 ("i'ma just give it to you direct, instead of me throwin' this sh",
  "'ma just give it to you direct, instead of me throwin' this shi"),
 ('we used to do pornos when you would come over but now you got m',
  'e used to do pornos when you would come over but now you got mo')]

In [843]:
for i in range(len(train_data_set)):
    train_data_set[i] = ([char2int[char] for char in train_data_set[i][0]], [char2int[char] for char in train_data_set[i][1]])

# train_data_set[:10]

# Set batch size

In [844]:
dict_size = len(char2int)
seq_len = maxlen - 1
batch_size = 8

def one_hot_encode(seq, dict_size, seq_len):
    features = np.zeros((seq_len, dict_size), dtype=np.float32)
    for i in range(seq_len):
        features[i,seq[i]] = 1
    
    return features


Hot encode all inputs and outputs

In [847]:
hot_encoded_train = [(one_hot_encode(train_data_set[i][0], dict_size, seq_len), one_hot_encode(train_data_set[i][1], dict_size, seq_len)) for i in range(len(train_data_set))]

In [828]:
len(hot_encoded_train)

891

In [850]:
import random

random.shuffle(hot_encoded_train)
batch_size = 8

target_batched = torch.utils.data.DataLoader(hot_encoded_train[:], batch_size=batch_size, shuffle=True) 
len(target_batched)

112

In [830]:
is_cuda = torch.cuda.is_available()

if is_cuda:
    device = torch.device('gpu')
else:
    device = torch.device('cpu')

In [851]:
class Model(nn.Module):
    def __init__(self, input_size, output_size, hidden_dim, n_layers, dropout=0.1):
        super(Model, self).__init__()

        # Defining some parameters
        self.hidden_dim = hidden_dim
        self.n_layers = n_layers

        #Defining the layers
        # RNN Layer
        self.rnn = nn.RNN(input_size, hidden_dim, n_layers, batch_first=True)   

        self.dropout = nn.Dropout(dropout)
        # Fully connected layer
        self.fc = nn.Linear(hidden_dim, output_size)
    
    def forward(self, x):
        
        batch_size = x.size(0)

        #Initializing hidden state for first input using method defined below
        hidden = self.init_hidden(batch_size)

        # Passing in the input and hidden state into the model and obtaining outputs
        out, hidden = self.rnn(x, hidden)

        # out = self.dropout(out)
        
        # Reshaping the outputs such that it can be fit into the fully connected layer
        out = out.contiguous().view(-1, self.hidden_dim)
        out = self.fc(out)
        out = F.softmax(out, dim=1)
        
        return out, hidden
    
    def init_hidden(self, batch_size):
        # This method generates the first hidden state of zeros which we'll use in the forward pass
        hidden = torch.zeros(self.n_layers, batch_size, self.hidden_dim).to(device)
         # We'll send the tensor holding the hidden state to the device we specified earlier as well
        return hidden

In [852]:
# Instantiate the model with hyperparameters
model = Model(input_size=dict_size, output_size=dict_size, hidden_dim=500, n_layers=3)
# We'll also set the model to the device that we defined earlier (default is CPU)
model = model.to(device)

# Define hyperparameters

lr=0.001

# Define Loss, Optimizer
# criterion = nn.SmoothL1Loss()
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=lr)

In [853]:
N_EPOCHS = 10
# Training Run
for epoch in range(1, N_EPOCHS + 1):
    batch_index = 0
    for data in target_batched:
        batch_index += 1
        x , y = data

        x = torch.Tensor(x)
        y = torch.Tensor(y)

        # use .stack for not hot encoded output

        input_seq = x.to(device)
        target_seq = y.to(device)        

        optimizer.zero_grad() # Clears existing gradients from previous epoch

        output, hidden = model(input_seq)
        output = output.to(device)

        loss = criterion(output.view(-1), target_seq.view(-1))
        loss.backward() # Does backpropagation and calculates gradients
        optimizer.step() # Updates the weights accordingly
        
    # if epoch % 10 == 0:
    print('Epoch: {}/{}.............'.format(epoch, N_EPOCHS), end=' ')
    print("Loss: {:.4f}".format(loss.item()))

Epoch: 1/10............. Loss: 0.0128


In [455]:
def predict(model, character):
    # One-hot encoding our input to fit into the model
    character = np.array([char2int[c] for c in character])
    character = one_hot_encode(character, dict_size, character.shape[0])
    character = torch.tensor([character])
    character = character.to(device)
    
    out, hidden = model(character)

    prob = nn.functional.softmax(out[-1], dim=0).data
    # Taking the class with the highest probability score from the output
    char_ind = torch.max(prob, dim=0)[1].item()

    return int2char[char_ind], hidden

In [312]:
def sample(model, out_len, start='hey'):
    model.eval() # eval mode
    start = start.lower()
    # First off, run through the starting characters
    chars = [ch for ch in start]
    print("chars", chars)
    size = out_len - len(chars)
    # Now pass in the previous characters and get a new one
    for ii in range(size):
        char, h = predict(model, chars)
        chars.append(char)

    return ''.join(chars)

In [836]:
sample(model, 100, start= 'i ca')

chars ['i', ' ', 'c', 'a']


"i can't to the tore, the way to the tore, the way to the tore, the way to the tore, the way to the t"

'..........'

# Training notes

1. nn.SmoothL1Loss() produced nice words
    BCELoss was pretty repetetive after few words
    MSE worked good as well
    Crossentropy not so good
2. Genrally 30-50 hidden dim
3. 3-5 hidden layers
4. 30 epochs was generally good results 
5. Batch size 8-18
6. On smaller inputs 200 epochs was good results


# Best Results
SENTENCE_LEN = 64
LR = 0.001
BATCH_SIZE = 8
No Dropout
MSELoss
hidden_dim=500, n_layers=3
Adam
