In [2]:
import numpy as np
import torch
from torch import nn
import torch.nn.functional as F
from torch.autograd import Variable

from utils import one_hot_encode, get_batches, get_lookup_tables
from model import CharRNN, sample

In [3]:
with open('../../data/text/shakespeare.txt', 'r', encoding="utf-8") as f:
    text = f.read()

Tokenize characters based on the passed in text corpus/data set

In [4]:
chars = tuple(set(text))
int2char, char2int = get_lookup_tables(text)
encoded = np.array([char2int[ch] for ch in text])

Define training function along with hyper-parameters for model-tuning

In [16]:
def train(net, data, epochs=10, n_seqs=10, n_steps=50, lr=0.001, clip=5, val_frac=0.1, cuda=False, print_every=10):
    ''' Traing a network 
    
        Arguments
        ---------
        
        net: CharRNN network
        data: text data to train the network
        epochs: Number of epochs to train
        n_seqs: Number of mini-sequences per mini-batch, aka batch size
        n_steps: Number of character steps per mini-batch
        lr: learning rate
        clip: gradient clipping
        val_frac: Fraction of data to hold out for validation
        cuda: Train with CUDA on a GPU
        print_every: Number of steps for printing training and validation loss
    
    '''
    
    net.train()
    opt = torch.optim.Adam(net.parameters(), lr=lr)
    criterion = nn.CrossEntropyLoss()
    
    # create training and validation data
    val_idx = int(len(data)*(1-val_frac))
    data, val_data = data[:val_idx], data[val_idx:]
    print("Cuda: ", cuda)
    if cuda:
        net.to("cuda:0")
    
    counter = 0
    n_chars = len(net.chars)
    for e in range(epochs):
        h = net.init_hidden(n_seqs)
        for x, y in get_batches(data, n_seqs, n_steps):
            counter += 1
            
            # One-hot encode our data and make them Torch tensors
            x = one_hot_encode(x, n_chars)
            x, y = torch.from_numpy(x), torch.from_numpy(y)
            
            inputs, targets = Variable(x), Variable(y)
            if cuda:
                inputs, targets = inputs.to("cuda:0"), targets.to("cuda:0")
            targets = targets.type(torch.LongTensor)

            # Creating new variables for the hidden state, otherwise
            # we'd backprop through the entire training history
            h = tuple([Variable(each.data) for each in h])

            net.zero_grad()
            
            output, h = net.forward(inputs, h)
            temp =  targets.view(n_seqs*n_steps).to("cuda:0") if cuda else targets.view(n_seqs*n_steps)
            if cuda:
                output.to("cuda:0")
            
            loss = criterion(output, temp)

            loss.backward()
            
            # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.
            nn.utils.clip_grad_norm(net.parameters(), clip)

            opt.step()
            
            if counter % print_every == 0:
                
                # Get validation loss
                val_h = net.init_hidden(n_seqs)
                val_losses = []
                for x, y in get_batches(val_data, n_seqs, n_steps):
                    # One-hot encode our data and make them Torch tensors
                    x = one_hot_encode(x, n_chars)
                    x, y = torch.from_numpy(x), torch.from_numpy(y)
                    
                    # Creating new variables for the hidden state, otherwise
                    # we'd backprop through the entire training history
                    val_h = tuple([Variable(each.data, volatile=True) for each in val_h])
                    
                    inputs, targets = Variable(x, volatile=True), Variable(y, volatile=True)
                    if cuda:
                        inputs, targets = inputs.to("cuda:0"), targets.to("cuda:0")
                    targets = targets.type(torch.LongTensor)

                    output, val_h = net.forward(inputs, val_h)
                    temp2 = targets.view(n_seqs*n_steps).to("cuda:0") if cuda else targets.view(n_seqs*n_steps)
                    if cuda:
                        output.to("cuda:0")
                    
                    val_loss = criterion(output, temp2)
                
                    val_losses.append(val_loss.data.item())
                
                print("Epoch: {}/{}...".format(e+1, epochs),
                      "Step: {}...".format(counter),
                      "Loss: {:.4f}...".format(loss.data.item()),
                      "Val Loss: {:.4f}".format(np.mean(val_losses)))
    
    return np.mean(val_losses)

    

In [26]:
use_cuda = False

In [28]:
net = CharRNN(chars, n_hidden=512, n_layers=2)
if use_cuda:
    net.to("cuda:0")
else:
    net.to("cpu:0")

In [22]:

n_seqs, n_steps = 128, 100
train(net, encoded, epochs=10, n_seqs=n_seqs, n_steps=n_steps, lr=0.001, cuda=use_cuda, print_every=10)

!!!!!!!!!!!!!!!Cuda:  False


  nn.utils.clip_grad_norm(net.parameters(), clip)
  val_h = tuple([Variable(each.data, volatile=True) for each in val_h])
  inputs, targets = Variable(x, volatile=True), Variable(y, volatile=True)


Epoch: 1/10... Step: 10... Loss: 3.3908... Val Loss: 3.4246
Epoch: 1/10... Step: 20... Loss: 3.2710... Val Loss: 3.3025


KeyboardInterrupt: 

In [None]:
checkpoint = {'n_hidden': net.n_hidden,
              'n_layers': net.n_layers,
              'state_dict': net.state_dict(),
              'tokens': net.chars}
with open('rnn.net', 'wb') as f:
    torch.save(checkpoint, f)

In [None]:
# Output sample after-training model
haiku = (sample(loaded, 75, cuda=True, top_k=10, prime="roses"))


In [None]:
haiku_syllables = [syllables.estimate(w) for w in haiku.split(" ")]
haiku_syllable

NameError: name 'syllables' is not defined

In [None]:
import syllables
syllables.estimate("estimate")

: 

In [None]:
h = "whose hiding white heart of the"
[syllables.estimate(w) for w in h.split(" ")]


: 

## Loading a checkpoint

In [None]:
with open('./checkpoints/rnn (haikus).net', 'rb') as f:
    checkpoint = torch.load(f)
    
loaded = CharRNN(checkpoint['tokens'], n_hidden=checkpoint['n_hidden'], n_layers=checkpoint['n_layers'])
loaded.load_state_dict(checkpoint['state_dict'])

<All keys matched successfully>

In [None]:
print(sample(loaded, 75, cuda=True, top_k=10, prime="roses"))

roses our hair and watches the conficture like he ain't such a guard was the trea
