In [4]:
import numpy as np
import torch
from torch import nn
import torch.nn.functional as F
from torch.autograd import Variable

In [5]:
path = 'data_album/aesop_rock.txt'
text = open(path).read().lower()

print('Corpus length: {}'.format(len(text)))

Corpus length: 407756


In [6]:
chars = sorted(list(set(text)))
print ('Total Characters: {}'.format(len(chars)))

Total Characters: 68


In [7]:
with open('data_album/aesop_rock.txt', 'r') as f:
    text = f.read()

In [8]:
chars = tuple(set(text))
int_to_char = dict(enumerate(chars))
char_to_int = {character : index for index, character in int_to_char.items()}
encoded = np.array([char_to_int[character] for character in text])

## Preprocessing the Data

In [9]:
def one_hot_encode(arr, n_labels):
    
    # initialize the encoded array
    one_hot = np.zeros((np.multiply(*arr.shape), n_labels), dtype = np.float32)
    
    # Fill the appropriate elements with ones
    one_hot[np.arange(one_hot.shape[0]), arr.flatten()] = 1
    
    # reshape it to get back to the original array
    one_hot = one_hot.reshape((*arr.shape, n_labels))
    
    return one_hot

In [10]:
def get_batches(arr, n_seqs, n_steps):
    '''
    Create a generator that returns mini-batches
    of size n_seqs x n_steps from arr
    '''
    
    batch_size = n_seqs * n_steps
    n_batches = len(arr)//batch_size
    
    # keep only enough characters to make full batches
    arr = arr[:n_batches*batch_size]
    # reshape into n_seqs rows
    arr = arr.reshape((n_seqs, -1))
    
    for n in range(0, arr.shape[1], n_steps):
        # the features
        x = arr[:, n:n+n_steps]
        # the targets, shifted by one
        y = np.zeros_like(x)
        try:
            y[:, :-1], y[:, -1] = x[:, 1:], arr[:, n+n_steps]
        except IndexError:
            y[:, :-1], y[:, -1] = x[:, 1:], arr[:, 0]
        yield x, y

## Define network with PyTorch

In [11]:
class CharRNN(nn.Module):
    def __init__(self, 
                 tokens, 
                 n_steps = 100, 
                 n_hidden = 256, 
                 n_layers = 2, 
                 drop_prob = 0.5, 
                 lr = 0.001):
        super().__init__()
        self.n_hidden = n_hidden
        self.n_layers = n_layers
        self.drop_prob = drop_prob
        self.lr = lr
        
        self.chars = tokens
        self.int_to_char = dict(enumerate(self.chars))
        self.char_to_int = {character:index for index, character in self.int_to_char.items()}
        
        self.dropout = nn.Dropout(drop_prob)
        self.lstm = nn.LSTM(len(self.chars),
                           n_hidden,
                           n_layers,
                           dropout = drop_prob,
                           batch_first = True)
        self.fc = nn.Linear(n_hidden,
                           len(self.chars))
        
        self.init_weights()
        
    def forward(self,
               x,
               hc):
        '''
        Forward pass through the network
        '''
        
        x,(h,c) = self.lstm(x,hc)
        x = self.dropout(x)
        
        # stack up LSTM outputs
        x = x.view(x.size()[0] * x.size()[1],
                  self.n_hidden)
        
        x = self.fc(x)
        
        return x, (h,c)
        
    def predict(self,
               char,
               h = None,
               cuda = False,
               top_k = None):
        '''
        Given a character, predict the next character
        Returns the predicted character and the hidden state
        '''
        if cuda:
            self.cuda()
        else:
            self.cpu()
        
        if h is None:
            h = self.init_hidden(1)
        
        x = np.array([[self.char_to_int[char]]])
        x = one_hot_encode(x, len(self.chars))
        inputs = Variable(torch.from_numpy(x),
                         volatile = True)
        if cuda:
            inputs = inputs.cuda()
        
        h = tuple([Variable(each.data, volatile = True) for each in h])
        out, h = self.forward(inputs, h)
        
        p = F.softmax(out).data
        if cuda:
            p = p.cpu()
        
        if top_k is None:
            top_ch = np.arange(len(self.chars))
        else:
            p, top_ch = p.topk(top_k)
            top_ch = top_ch.numpy().squeeze()
        
        p = p.numpy().squeeze()
        char = np.random.choice(top_ch, p = p/p.sum())
        
        return self.int_to_char[char], h
        
    
    def init_weights(self):
        '''
        Initialize weights for fully connected layer
        '''
        initrange = 0.1
        
        # set bias tensor to all zeros
        self.fc.bias.data.fill_(0)
        
        # FC weights as random uniform
        self.fc.weight.data.uniform_(-1,1)
        
    def init_hidden(self,
                   n_seqs):
        '''
        Initialize hidden state
        '''
        # Create two new tensors with sizes n_layers x n_seqs x n_hidden,
        # initialized to zero, for hidden state and cell state of LSTM
        weight = next(self.parameters()).data
        return (Variable(weight.new(self.n_layers, n_seqs, self.n_hidden).zero_()),
                Variable(weight.new(self.n_layers, n_seqs, self.n_hidden).zero_()))

In [12]:
def train(net,
         data,
         epochs = 10,
         n_seqs = 10,
         n_steps = 50,
         lr = 0.001,
         clip = 5,
         val_frac = 0.1,
         cuda = False,
         print_every = 10):
    
    net.train()
    opt = torch.optim.Adam(net.parameters(), lr = lr)
    criterion = nn.CrossEntropyLoss()
    
    # create training and validation data
    val_index = int(len(data)*(1-val_frac))
    data, val_data = data[:val_index], data[val_index:]
    
    if cuda:
        net.cuda()
    
    counter = 0
    n_characters = len(net.chars)
    for epoch in range(epochs):
        h = net.init_hidden(n_seqs)
        for x, y in get_batches(data, n_seqs, n_steps):
            counter += 1
            
            # One hot encode our data and make them Torch tensors
            x = one_hot_encode(x, n_characters)
            x, y = torch.from_numpy(x), torch.from_numpy(y)
            
            inputs, targets = Variable(x), Variable(y)
            if cuda:
                inputs, targets = inputs.cuda(), targets.cuda()
            
            # Create new variables for the hidden state
            # otherwise, we'd backprop through the entire training history
            h = tuple([Variable(each.data) for each in h])
            
            net.zero_grad()
            
            output, h = net.forward(inputs, h)
            loss = criterion(output, targets.view(n_seqs*n_steps))
            
            loss.backward()
            
            # clip_grad_norm helps prevent the explodng gradient problem in RNNs
            nn.utils.clip_grad_norm(net.parameters(),clip)
            
            opt.step()
            
            if counter % print_every == 0:
                # Get validation loss
                val_h = net.init_hidden(n_seqs)
                val_losses = []
                for x, y in get_batches(val_data, n_seqs, n_steps):
                    x = one_hot_encode(x, n_characters)
                    x, y = torch.from_numpy(x), torch.from_numpy(y)
                    
                    val_h = tuple([Variable(each.data, volatile = True) for each in val_h])
                    
                    inputs, targets = Variable(x, volatile = True), Variable(y, volatile = True)
                    if cuda:
                        inputs, targets = inputs.cuda(), targets.cuda()
                    
                    output, val_h = net.forward(inputs, val_h)
                    val_loss = criterion(output, targets.view(n_seqs*n_steps))
                    
                    val_losses.append(val_loss.data[0])
                    
                print('====================',
                      'Epoch: {}/{} ...'.format(epoch+1, epochs),
                      'Step: {}...'.format(counter),
                      'Loss: {:.4f}...'.format(loss.data[0]),
                      'Validation Loss: {:.4f}...'.format(np.mean(val_losses)),
                      '====================') 

## Train the network

In [13]:
if 'net' in locals():
    del net

In [14]:
net = CharRNN(chars,
             n_hidden = 512,
             n_layers = 2)

In [15]:
epochs = 50
n_seqs = 128
n_steps = 100
lr = 0.001
print_every_2 = 2

train(net,
     encoded,
     epochs,
     n_seqs,
     n_steps,
     lr,
     cuda = False, # CUDA support
     print_every = print_every_2)









KeyboardInterrupt: 

## Getting the Best Model

In [31]:
checkpoint = {'n_hidden': net.n_hidden,
              'n_layers': net.n_layers,
              'state_dict': net.state_dict(),
              'tokens': net.chars}
with open('rnn.net', 'wb') as f:
    torch.save(checkpoint, f)

## Sample the model

In [32]:
def sample(net,
          size,
          prime = 'The',
          top_k = None,
          cuda = False):
    
    if cuda:
        net.cuda()
    else:
        net.cpu()
    
    net.eval()
    
    chars = [character for character in prime]
    h = net.init_hidden(1)
    for character in prime:
        char, h = net.predict(character,
                              h,
                              cuda = cuda,
                              top_k = top_k)
    
    chars.append(char)
    
    for index in range(size):
        char, h = net.predict(chars[-1],
                              h,
                              cuda = cuda,
                              top_k = top_k)
        chars.append(char)
    
    return ''.join(chars)

In [33]:
print(sample(net,
             5000,
             prime = 'Aesop',
             top_k=5,
             cuda = False))

Aesop
That's be arrugul sparting tages a surrain battommers and but the mers on the cortes is only of brocks this battering ship it
Ass I see the little outs of dince
Sen the dream draging dreams of the mart on that through the pigh and bring strubble
And the pilling themest carrotor traised
The sittomerid that trangion and sticks, better my spirted sected, and the serent call pose
I save they are to adlight a sticks of my still tradge to bull of the pore and with the city faciou to make these me in a blum or the stareth
We the bash of the pins it asson the coust and secred
I crung allow the strough themild
That's the simple offariss
I ame cimalint first toother pactic ant merting
And a thisked to ship a brake pigate
I spon that along mill that climp ant market to the match
I see the pley out of the black frem
So I can a bod babage breedingershel out a pire asking the burlies and basiding
She tones, the clossed to turn the break of burn in the break
I'm a canny, I spread on the day the

## Loading checkpoint

In [34]:
with open('rnn.net', 'rb') as f:
    checkpoint = torch.load(f)
    
loaded = CharRNN(checkpoint['tokens'], n_hidden=checkpoint['n_hidden'], n_layers=checkpoint['n_layers'])
loaded.load_state_dict(checkpoint['state_dict'])

In [35]:
print(sample(loaded, 2000, cuda=False, top_k=5, prime="New York"))

New York those back and a stan and spreading outsed my servess tellors and sticks and steres
To beline the callioned speting
It's left she to damake to hand his as out a marty tricks
To my starting the pire of all spirit stalling that spete the muttle from but it's settle me wentert
Till it blind the bleaded teets bell the potal to masters
And the pation of the pathages
We themearn's brooder spensing fuck the shook and well
Aed a the blood on a shin off internives
Attic alove the sprung common the batch in
Aed the mother fung it's the beak for the motime
Ster in the stop
Anto they drug of the buck on a crom of the clunch
Bathon a shank and traped bad a mine and spon a stink bottle pasting ousto the morting the produmes
It's let the prin a purnen achuse my channies
That's the see a stolar some and turn to pull it
Low that hung a beak and head the bast
And I'm an the beff stop
And I'm a fing that walked then compoter
And a still me a so day, and dank, the cat apop the same
To the can to 