# Assignment 2

Use this notebook to write your code for assignment 2

# Dependencies

In [1]:
import math
import string
import time
import unidecode
import random
import re

import matplotlib.pyplot as plt
import matplotlib.ticker as ticker

import torch
import torch.nn as nn
from torch.autograd import Variable

%matplotlib inline

# Hyperparameters

Scavenger hunts are fun but I like putting my hyperparameters in one place.

In [2]:
ALL_CHARACTERS = ' abcdefghijklmnopqrstuvwxyz'
HP = {
    
    # Data pre-processing
    
    "all_chars": ALL_CHARACTERS,
    "char_set": set(ALL_CHARACTERS),
    "random_seed": 42,
    "n_chars": 0, # will need to update
    "chunk_len": 40,
    
    # Training model
    
    "n_epochs": 2000,
    "hidden_size": 100,
    "n_layers": 1,
    "lr": 0.005,
    "temperature": 0.8,
    "prime_str": "A",
    "predict_len": 100,
    
    # Model reporting
    
    "print_every": 100,
    "plot_every": 10,
    
}

# Pre-processing data

Read the data file of `shakespeare.txt` and prepare the inputs/outputs to your model.

## Data pre-processing helper functions

In [3]:
def clean_data(data):
    chars = list(HP["char_set"])
    lines = []
    for line in data:
        line = ''.join([i for i in line.lower().strip() if i in chars])
        if len(line) > 0:
            lines.append(line)
    return lines

In [4]:
def random_chunk():
    random.seed(HP["random_seed"])
    start_index = random.randint(0, HP["n_chars"] - HP["chunk_len"])
    end_index = start_index + HP["chunk_len"] + 1
    return FILE[start_index:end_index]

In [5]:
# Turn string into list of longs
def char_tensor(string):
    tensor = torch.zeros(len(string)).long()
    for c in range(len(string)):
        tensor[c] = HP["all_chars"].index(string[c])
    return Variable(tensor)

In [6]:
def random_training_set():    
    chunk = random_chunk()
    inp = char_tensor(chunk[:-1])
    target = char_tensor(chunk[1:])
    return inp, target

## Data ingest



In [7]:
with open("data/shakespeare.txt", "r") as infile:
    data = infile.readlines()
    
data = clean_data(data)
FILE = ''.join(data)
HP['n_chars'] = len(FILE)

In [8]:
print('There are %d total characters and %d unique characters in your data.' % (HP['n_chars'], len(HP['char_set'])))

print("\nExample file chunk: '{}'".format(FILE[:HP['chunk_len']]))

There are 88382 total characters and 27 unique characters in your data.

Example file chunk: 'from fairest creatures we desire increas'


In [9]:
rchunk = random_chunk()
rutens = char_tensor(rchunk)

print("Random chunk:\n{}\n\nTransforms to char tensor:\n{}\n".format(rchunk, rutens))

Random chunk:
e dying thenmy love is as a fever longing

Transforms to char tensor:
tensor([ 5,  0,  4, 25,  9, 14,  7,  0, 20,  8,  5, 14, 13, 25,  0, 12, 15, 22,
         5,  0,  9, 19,  0,  1, 19,  0,  1,  0,  6,  5, 22,  5, 18,  0, 12, 15,
        14,  7,  9, 14,  7])



# Build the Model

Implement a character-based LSTM model 

In [10]:
class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, n_layers=1):
        super(RNN, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.n_layers = n_layers
        
        self.encoder = nn.Embedding(input_size, hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size, n_layers)
        self.decoder = nn.Linear(hidden_size, output_size)
    
    def forward(self, input, hidden):
        input = self.encoder(input.view(1, -1))
        output, hidden = self.gru(input.view(1, 1, -1), hidden)
        output = self.decoder(output.view(1, -1))
        return output, hidden

    def init_hidden(self):
        return Variable(torch.zeros(self.n_layers, 1, self.hidden_size))

In [11]:
def train(inp, target):
    hidden = decoder.init_hidden()
    decoder.zero_grad()
    loss = 0

    for c in range(HP['chunk_len']):
        output, hidden = decoder(inp[c], hidden)
        label = target[c]
        label = label.unsqueeze(0)
        loss += criterion(output, label)

    loss.backward()
    decoder_optimizer.step()

    return loss.data.item() / HP['chunk_len']

In [15]:
def evaluate(prime_str='a', predict_len=100, temperature=0.8):
    hidden = decoder.init_hidden()
    prime_input = char_tensor(prime_str)
    predicted = prime_str

    # Use priming string to "build up" hidden state
    for p in range(len(prime_str) - 1):
        _, hidden = decoder(prime_input[p], hidden)
    inp = prime_input[-1]
    
    for p in range(predict_len):
        output, hidden = decoder(inp, hidden)
        
        # Sample from the network as a multinomial distribution
        output_dist = output.data.view(-1).div(temperature).exp()
        top_i = torch.multinomial(output_dist, 1)[0]
        
        # Add predicted character to string and use as next input
        predicted_char = HP['all_chars'][top_i]
        predicted += predicted_char
        inp = char_tensor(predicted_char)

    return predicted

In [16]:
def time_since(since):
    s = time.time() - since
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)

# Training

The main training routine

In [None]:

decoder = RNN(HP['n_chars'], HP['hidden_size'], HP['n_chars'], HP['n_layers'])
decoder_optimizer = torch.optim.Adam(decoder.parameters(), lr=HP['lr'])
criterion = nn.CrossEntropyLoss()

start = time.time()
all_losses = []
loss_avg = 0

for epoch in range(1, HP['n_epochs'] + 1):
    loss = train(*random_training_set())       
    loss_avg += loss

    if epoch % HP['print_every'] == 0:
        print('[%s (%d %d%%) %.4f]' % (time_since(start), epoch, epoch / HP['n_epochs'] * 100, loss))
        print(evaluate('wh', 100), '\n')

    if epoch % HP['plot_every'] == 0:
        all_losses.append(loss_avg / HP['plot_every'])
        loss_avg = 0

[3m 20s (100 5%) 0.0494]
whe dying thenmy love is as as a feve is as a fever as a fever longing thenmy love is as a fever longi 

[6m 42s (200 10%) 0.0099]
whe dying thenmy love is as a fever longing thenmy love is as a fever longing thenmy love is as a feve 

[10m 5s (300 15%) 0.0047]
whenmy love is as a fever longing thenmy love is as a fever longing thenmy love is as a fever longing  



# Plotting the Training Losses

Plotting the historical loss during training:

In [None]:
plt.figure()
plt.plot(all_losses)

# Evaluating at different "temperatures"

Changing the `temperature` argument (variance) for 1.5, 0.75. 0.25 and sample outputs from the trained model.

In [None]:
###