In [1]:
import unidecode
import string
import random
import re
import numpy as np
all_characters = string.printable
n_characters = len(all_characters)

file = open('./data/shakespeare.txt').read()
file_len = len(file)
print('file_len =', file_len)

file_len = 1115393


In [6]:
import torch
from torch import nn
import torch.nn.functional as F
from torch import autograd

class RNN(nn.Module):
    def __init__(self, *, input_size, embedding_size, hidden_size, output_size, n_layers=1):
        super(RNN, self).__init__()
        self.input_size = input_size
        self.embedding_size = embedding_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.n_layers = n_layers
        
        # map inputs to embeddings
        self.embedding_layer = nn.Embedding(input_size, embedding_size)
        # forward embeddings through LSTM
        self.LSTM = nn.LSTM(embedding_size, hidden_size, n_layers)
        # compute a linear transformation to output space
        self.linear = nn.Linear(hidden_size, output_size)
    
    def forward(self, input, hidden):
        input = self.embedding_layer(input.view(1, -1))
        output, hidden = self.LSTM(input.view(1, 1, -1), hidden)
        output = self.linear(output.view(1, -1))
        scores = F.softmax(output)
        return output, hidden, scores

    def init_hidden(self):
        return (autograd.Variable(torch.zeros(self.n_layers, 1, self.hidden_size)),
                autograd.Variable(torch.zeros(self.n_layers, 1, self.hidden_size)))

In [7]:
seq_len = 200

def get_seq(seq_len = 200):
    start = np.random.randint(0, file_len - seq_len)
    seq = file[start:start + seq_len]
    assert len(seq) == 200
    return seq

def to_vector(seq, chars = all_characters):
    return autograd.Variable(torch.LongTensor([chars.index(s) for s in seq]))

def generate_training_set():
    seq = get_seq()
    inputs, labels = to_vector(seq[:-1]), to_vector(seq[1:])
    return inputs, labels

In [8]:

def evaluate(prime_str='A', predict_len=100, temperature=0.8):
    hidden = rnn.init_hidden()
    prime_input = to_vector(prime_str)
    predicted = prime_str

    # Use priming string to "build up" hidden state
    for p in range(len(prime_str) - 1):
        print('doing this shit')
        _, hidden, scores = rnn(prime_input[p], hidden)
    inp = prime_input[-1]
    
    for p in range(predict_len):
        output, hidden, scores = rnn(inp, hidden)
        
        # Sample from the network as a multinomial distribution
        output_dist = output.data.view(-1).div(temperature).exp()
        
        top_i = torch.multinomial(output_dist, 1)[0]
        
        # Add predicted character to string and use as next input
        predicted_char = all_characters[top_i]
        predicted += predicted_char
        inp = to_vector(predicted_char)

    return predicted

In [None]:
n_epochs = 5000
hidden_size = 200
embedding_size = 100
n_layers = 4

def my_evaluate(start_char, hidden, predict_len = 100):
    predicted_chars = []
    for i in range(predict_len):
        output, hidden, scores = rnn(start_char, hidden)
        idx = np.argmax(scores.data.numpy())
        predicted_char = all_characters[idx]
        predicted_chars.append(predicted_char)
        start_char = to_vector(predicted_char)
    assert len(predicted_chars) == 100
    return "".join([str(x) for x in predicted_chars])
        
rnn = RNN(input_size = n_characters,
          embedding_size = embedding_size,
          hidden_size = hidden_size, 
          output_size = n_characters,
          n_layers = n_layers)

optim = torch.optim.Adam(rnn.parameters(), lr = 0.005)
criterion = nn.CrossEntropyLoss()
all_losses = []

for epoch in range(n_epochs):
    # re-init hidden and zero the grads
    hidden = rnn.init_hidden()
    rnn.zero_grad()
    inputs, labels = generate_training_set()
    # run through the inputs one by one, accumulating a loss
    loss = 0
    for c in range(seq_len - 1):
        output, hidden, scores = rnn(inputs[c], hidden)
        loss += criterion(output, labels[c])
    loss.backward(retain_graph = True)
    optim.step()
    all_losses.append(loss.data[0]/seq_len)
    if epoch % 50 == 0:
        print('EPOCH: {}'.format(epoch))
        predicted = evaluate()
        print(predicted)
        print('LOSS: {}'.format(loss.data[0]/seq_len))




EPOCH: 0
A
)JQV ^;reH}CwGS6}'*	T*:@"DEdZG|Kh){&0c^|h}8.s.;H<y~;8syYxd!r|.K/
Wf}qy_(\ikMSvPFpxrS
9l^T0OqsX@_(
LOSS: 4.6016323852539065
