# Assignment 2

Use this notebook to write your code for assignment 2

# Dependencies

In [1]:
!pip install unidecode



In [2]:
import math
import string
import time
import random
import re
import sys

import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import numpy as np
import unidecode

import torch
import torch.nn as nn
from torch.autograd import Variable

%matplotlib inline

## Package Versions

In [3]:
print("Python version: {}\n".format(sys.version))
print("Numpy version: {}".format(np.__version__))
print("Torch version: {}".format(torch.__version__))

if torch.cuda.is_available():
    print("CUDA available")
    print("Decvice name: {}".format(torch.cuda.get_device_name(0)))
    print("Torch CUDNN Version: {}".format(torch.backends.cudnn.version()))

Python version: 3.6.8 |Anaconda, Inc.| (default, Dec 29 2018, 19:04:46) 
[GCC 4.2.1 Compatible Clang 4.0.1 (tags/RELEASE_401/final)]

Numpy version: 1.15.4
Torch version: 1.0.0


# Hyperparameters

Scavenger hunts are fun but I like putting my hyperparameters in one place.

In [4]:
#ALL_CHARACTERS = ' abcdefghijklmnopqrstuvwxyz'
ALL_CHARACTERS = string.printable
HP = {
    
    # Data pre-processing
    
    "all_chars": ALL_CHARACTERS,
    "char_set": set(ALL_CHARACTERS),
    "random_seed": 42,
    "n_chars": 0, # will need to update
    "chunk_len": 40,
    
    # Training model
    
    "n_epochs": 3000,
    "hidden_size": 200,
    "n_layers": 1,
    "lr": 0.005,
    "temperature": 0.8,
    "prime_str": "A",
    "predict_len": 100,
    
    # Model reporting
    
    "print_every": 500,
    "plot_every": 10,
    
}

# Pre-processing data

Read the data file of `shakespeare.txt` and prepare the inputs/outputs to your model.

In [5]:
text = unidecode.unidecode(open('./data/shakespeare.txt').read())
text = re.sub("\d|\n{2,}|\s{2,}","",text)
    
FILE = ''.join(text)
HP['n_chars'] = len(FILE)
print('corpus length:', len(text))

corpus length: 93366


In [6]:
def random_chunk():
    random.seed(HP["random_seed"])
    start_index = random.randint(0, HP["n_chars"] - HP["chunk_len"])
    end_index = start_index + HP["chunk_len"] + 1
    return FILE[start_index:end_index]

In [7]:
# Turn string into list of longs
def char_tensor(string):
    tensor = torch.zeros(len(string)).long()
    for c in range(len(string)):
        tensor[c] = HP['all_chars'].index(string[c])
    return Variable(tensor)

In [8]:
print('There are %d total characters and %d unique characters in your data.' % (HP['n_chars'], len(HP['char_set'])))

print("\nExample file chunk: '{}'".format(FILE[:HP['chunk_len']]))

There are 93366 total characters and 100 unique characters in your data.

Example file chunk: '
From fairest creatures we desire increa'


In [9]:
rchunk = random_chunk()
rutens = char_tensor(rchunk)

print("Random chunk:\n{}\n\nTransforms to char tensor:\n{}\n".format(rchunk, rutens))

Random chunk:
ustify the wrong,
That thy unkindness lay

Transforms to char tensor:
tensor([30, 28, 29, 18, 15, 34, 94, 29, 17, 14, 94, 32, 27, 24, 23, 16, 73, 96,
        55, 17, 10, 29, 94, 29, 17, 34, 94, 30, 23, 20, 18, 23, 13, 23, 14, 28,
        28, 94, 21, 10, 34])



# Build the Model

Implement a character-based LSTM model 

In [10]:
class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers=1):
        super(RNN, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.num_layers = num_layers
        
        self.encoder = nn.Embedding(input_size, hidden_size)
        
        self.lstm = nn.LSTM(hidden_size, hidden_size, num_layers)
        
        self.decoder = nn.Linear(hidden_size, output_size)
    
    def forward(self, input, hidden):
        input = self.encoder(input.view(1, -1))
        output, hidden = self.lstm(input.view(1, 1, -1), hidden)
        output = self.decoder(output.view(1, -1))
        return output, hidden

    def init_hidden(self): 
        return (torch.zeros(1, 1, self.hidden_size),
                torch.zeros(1, 1, self.hidden_size))

# Training

The main training routine

In [11]:
def random_training_set():    
    chunk = random_chunk()
    inp = char_tensor(chunk[:-1])
    target = char_tensor(chunk[1:])
    return inp, target

In [12]:
def evaluate(prime_str='A', predict_len=100, temperature=0.8):
    hidden = decoder.init_hidden()
    prime_input = char_tensor(prime_str)
    predicted = prime_str

    # Use priming string to "build up" hidden state
    for p in range(len(prime_str) - 1):
        _, hidden = decoder(prime_input[p], hidden)
    inp = prime_input[-1]
    
    for p in range(predict_len):
        output, hidden = decoder(inp, hidden)
        
        # Sample from the network as a multinomial distribution
        output_dist = output.data.view(-1).div(temperature).exp()
        top_i = torch.multinomial(output_dist, 1)[0]
        
        # Add predicted character to string and use as next input
        predicted_char = HP['all_chars'][top_i]
        predicted += predicted_char
        inp = char_tensor(predicted_char)

    return predicted

In [13]:
def train(inp, target):
    hidden = decoder.init_hidden()
    decoder.zero_grad()

    loss = 0

    for c in range(HP['chunk_len']):
        output, hidden = decoder(inp[c], hidden)
        loss += criterion(output, torch.LongTensor([target[c]]))

    loss.backward()
    decoder_optimizer.step()

    return loss.data.item() / HP['chunk_len']

In [14]:
def time_since(since):
    s = time.time() - since
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)

In [15]:
decoder = RNN(HP['n_chars'], HP['hidden_size'], HP['n_chars'], HP['n_layers'])
decoder_optimizer = torch.optim.Adam(decoder.parameters(), lr=HP['lr'])
criterion = nn.CrossEntropyLoss()

start = time.time()
all_losses = []
loss_avg = 0

for epoch in range(1, HP['n_epochs'] + 1):
    loss = train(*random_training_set())       
    loss_avg += loss

    if epoch % HP['print_every'] == 0:
        print('[%s (%d %d%%) %.4f]' % (time_since(start), epoch, epoch / HP['n_epochs'] * 100, loss))
        print(evaluate('Wh', HP["predict_len"]), '\n')

    if epoch % HP['plot_every'] == 0:
        all_losses.append(loss_avg / HP['plot_every'])
        loss_avg = 0

KeyboardInterrupt: 

# Plotting the Training Losses

Plotting the historical loss during training:

In [None]:
plt.figure()
plt.plot(all_losses)

# Evaluating at different "temperatures"

Changing the `temperature` argument (variance) for 1.5, 0.75. 0.25 and sample outputs from the trained model.

In [None]:
temps = [1.5, 0.75, 0.25]

print(evaluate('Shall I compare thee to a summer''s day?\n', 300, temperature=1.5))