# Summary

First approach based on [Practical PyTorch: Generating Shakespeare with a Character-Level RNN](https://github.com/spro/practical-pytorch/blob/master/char-rnn-generation/char-rnn-generation.ipynb) has some issues. Trying a different approach based on what was done by Andrew Ng.

In [2]:
import math
import string
import time
import random
import re
import sys

import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import numpy as np

import torch
import torch.nn as nn
from torch.autograd import Variable

%matplotlib inline

## Package Versions

In [3]:
print("Python version: {}\n".format(sys.version))
print("Numpy version: {}".format(np.__version__))
print("Torch version: {}".format(torch.__version__))

if torch.cuda.is_available():
    print("CUDA available")
    print("Decvice name: {}".format(torch.cuda.get_device_name(0)))
    print("Torch CUDNN Version: {}".format(torch.backends.cudnn.version()))

Python version: 3.6.8 |Anaconda, Inc.| (default, Dec 29 2018, 19:04:46) 
[GCC 4.2.1 Compatible Clang 4.0.1 (tags/RELEASE_401/final)]

Numpy version: 1.15.4
Torch version: 1.0.0


# Hyperparameters

In [4]:
ALL_CHARACTERS = string.printable
HP = {
    
    # Data pre-processing
    
    "all_chars": ALL_CHARACTERS,
    "char_set": set(ALL_CHARACTERS),
    "random_seed": 42,
    "n_chars": 0, # will need to update
    "chunk_len": 40,
    
    # Training model
    
    "n_epochs": 2000,
    "hidden_size": 100,
    "n_layers": 1,
    "lr": 0.01,
    "temperature": 0.8,
    "prime_str": "a",
    "predict_len": 100,
    
    # Model reporting
    
    "print_every": 100,
    "plot_every": 10,
    
}

# Pre-processing data

Read the data file of `shakespeare.txt` and prepare the inputs/outputs to your model.

In [5]:
with open("data/shakespeare.txt", "r") as infile:
    text = infile.read().lower()
    text = re.sub("\d|\n{2,}|\s{2,}","",text)
print('corpus length:', len(text))

corpus length: 93366


In [6]:
chars = sorted(list(set(text)))
print('total chars:', len(chars))
char_indices = dict((c, i) for i, c in enumerate(chars))
indices_char = dict((i, c) for i, c in enumerate(chars))

total chars: 38


In [7]:
HP["n_chars"] = len(text)

In [8]:
# cut the text in semi-redundant sequences of maxlen characters
maxlen = 40
step = 3
sentences = []
next_chars = []
for i in range(0, len(text) - maxlen, step):
    sentences.append(text[i: i + maxlen])
    next_chars.append(text[i + maxlen])
print('nb sequences:', len(sentences))

print('Vectorization...')
x = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool)
y = np.zeros((len(sentences), len(chars)), dtype=np.bool)
for i, sentence in enumerate(sentences):
    for t, char in enumerate(sentence):
        x[i, t, char_indices[char]] = 1
    y[i, char_indices[next_chars[i]]] = 1

nb sequences: 31109
Vectorization...


# Build the Model

Implement a character-based LSTM model 

In [77]:
class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, n_layers=1):
        super(RNN, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.n_layers = n_layers
        
        self.encoder = nn.Embedding(input_size, hidden_size)
        self.lstm = nn.LSTM(hidden_size, hidden_size, n_layers)
        self.decoder = nn.Linear(hidden_size, output_size)
    
    def forward(self, input, hidden):
        input = self.encoder(input)
        print("input: {}, hidden: {}".format(input.shape, hidden.shape))
        output, hidden = self.lstm(input, hidden)
        output = self.decoder(output.view(1, -1))
        return output, hidden

    def init_hidden(self):
        return Variable(torch.zeros(self.n_layers, 1, self.hidden_size))

In [78]:
def train(inp, target):
    hidden = decoder.init_hidden()
    decoder.zero_grad()
    loss = 0

    for c in range(HP['chunk_len']):
        output, hidden = decoder(inp[c], hidden)
        label = target[c]
        label = label.unsqueeze(0)
        loss += criterion(output, label)

    loss.backward()
    decoder_optimizer.step()

    return loss.data.item() / HP['chunk_len']

In [79]:
def time_since(since):
    s = time.time() - since
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)

## Dimensionality Compatibility

In [80]:
def random_training_set(x_input, y_target):
    random.seed(HP["random_seed"])
    start_index = random.randint(0, x_input.shape[0])
    end_index = start_index + HP["chunk_len"]
    
    inp = x_input[start_index:end_index]
    target = y_target[start_index:end_index]
    
    return inp, target

In [81]:
HP["n_chars"]

93366

In [82]:
HP["chunk_len"]

40

# Training

In [83]:
x.shape

(31109, 40, 38)

In [84]:
y.shape

(31109, 38)

In [85]:
xt = torch.from_numpy(1*x)

In [86]:
yt = torch.from_numpy(1*y)

In [87]:
decoder = RNN(HP['n_chars'], HP['hidden_size'], HP['n_chars'], HP['n_layers'])
decoder_optimizer = torch.optim.Adam(decoder.parameters(), lr=HP['lr'])
criterion = nn.CrossEntropyLoss()

start = time.time()
all_losses = []
loss_avg = 0

In [88]:
for epoch in range(1, HP['n_epochs'] + 1):
    loss = train(*random_training_set(xt, yt))       
    loss_avg += loss

    if epoch % HP['print_every'] == 0:
        print('[%s (%d %d%%) %.4f]' % (time_since(start), epoch, epoch / HP['n_epochs'] * 100, loss))
        print(evaluate('Wh', HP["predict_len"]), '\n')

    if epoch % HP['plot_every'] == 0:
        all_losses.append(loss_avg / HP['plot_every'])
        loss_avg = 0

input: torch.Size([40, 38, 100]), hidden: torch.Size([1, 1, 100])


RuntimeError: Expected hidden[0] size (1, 38, 100), got (1, 100)

In [75]:
inp, target = random_training_set(xt,yt)

In [62]:
inp.shape

torch.Size([40, 40, 38])

In [48]:
target.shape

torch.Size([40, 38])