In [1]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F


In [2]:
with open("data/anna.txt", 'r') as f:
    data = f.read()



In [3]:
chars = tuple(set(data)) # unique chars set
int2char = dict(enumerate(chars))
char2int = {ch:ii for ii, ch in int2char.items()}
encoded = np.array([char2int[ch] for ch in data])

In [4]:
data[:100]

'Chapter 1\n\n\nHappy families are all alike; every unhappy family is unhappy in its own\nway.\n\nEverythin'

In [5]:
encoded[:100]

array([ 4, 36,  1, 52, 66, 21, 14, 69,  6, 78, 78, 78, 74,  1, 52, 52, 61,
       69, 56,  1,  8, 55, 32, 55, 21, 72, 69,  1, 14, 21, 69,  1, 32, 32,
       69,  1, 32, 55, 51, 21, 41, 69, 21,  3, 21, 14, 61, 69, 13, 59, 36,
        1, 52, 52, 61, 69, 56,  1,  8, 55, 32, 61, 69, 55, 72, 69, 13, 59,
       36,  1, 52, 52, 61, 69, 55, 59, 69, 55, 66, 72, 69, 35, 47, 59, 78,
       47,  1, 61, 37, 78, 78, 40,  3, 21, 14, 61, 66, 36, 55, 59])

In [6]:
def one_hot(arr, n_labels):
    one_hot = np.zeros((np.multiply(*arr.shape), n_labels), dtype=np.float32)
    one_hot[np.arange(one_hot.shape[0]), arr.flatten()] = 1.
    one_hot = one_hot.reshape((*arr.shape, n_labels))
    return one_hot

In [7]:
def get_batches(arr, n_seqs, n_steps):
    '''Create a generator that returns batches of size
       n_seqs x n_steps from arr.
       
       Arguments
       ---------
       arr: Array you want to make batches from
       n_seqs: Batch size, the number of sequences per batch
       n_steps: Number of sequence steps per batch
    '''
    
    # Get the number of characters per batch
    batch_size = n_seqs * n_steps
    
    ## Get the number of batches we can make
    n_batches = len(arr)//batch_size

    ## Keep only enough characters to make full batches
    arr = arr[:batch_size*n_batches]

    ## Reshape into batch_size rows
    arr = arr.reshape((n_seqs, -1))
    
    for n in range(0, arr.shape[1], n_steps):
        # The features
        x = arr[:, n:n+n_steps]
        # The targets, shifted by one
        y = np.zeros_like(x)
        try:
            y[:, :-1], y[:, -1] = x[:, 1:], x[:, n+n_steps]
        except IndexError:
            y[:, :-1], y[:, -1] = x[:, 1:], x[:, 0]
        yield x, y

In [8]:
batches = get_batches(encoded, 10, 50)
x, y = next(batches)
print("x\n", x[:10, :10])
print("\ny\n", y[:10, :10])

x
 [[ 4 36  1 52 66 21 14 69  6 78]
 [69  1  8 69 59 35 66 69 17 35]
 [ 3 55 59 37 78 78 33  5 21 72]
 [59 69 24 13 14 55 59 17 69 36]
 [69 55 66 69 55 72 70 69 72 55]
 [69 31 66 69 47  1 72 78 35 59]
 [36 21 59 69 57 35  8 21 69 56]
 [41 69 50 13 66 69 59 35 47 69]
 [66 69 55 72 59 53 66 37 69 54]
 [69 72  1 55 24 69 66 35 69 36]]

y
 [[36  1 52 66 21 14 69  6 78 78]
 [ 1  8 69 59 35 66 69 17 35 55]
 [55 59 37 78 78 33  5 21 72 70]
 [69 24 13 14 55 59 17 69 36 55]
 [55 66 69 55 72 70 69 72 55 14]
 [31 66 69 47  1 72 78 35 59 32]
 [21 59 69 57 35  8 21 69 56 35]
 [69 50 13 66 69 59 35 47 69 72]
 [69 55 72 59 53 66 37 69 54 36]
 [72  1 55 24 69 66 35 69 36 21]]


In [9]:
class CharRNN(nn.Module):
    def __init__(self, tokens, n_steps=100, n_hidden=256, n_layers=2, drop_prob=0.5, lr=0.001):
        super().__init__()
        self.n_steps = n_steps
        self.n_hidden = n_hidden
        self.n_layers = n_layers
        self.drop_prob = drop_prob

        self.chars = tokens
        self.int2char = dict(enumerate(self.chars))
        self.char2int = {ch:ii for ii, ch in self.int2char.items()}

        self.lstm = nn.LSTM(len(chars), n_hidden, n_layers, dropout=drop_prob, batch_first=True)
        self.dropout = nn.Dropout(p=drop_prob)
        self.fc = nn.Linear(n_hidden, len(self.chars))

        self.init_weights()

    def forward(self, x, hc):
        x, (h, c) = self.lstm(x, hc)
        x = self.dropout(x)
        x = x.view(x.size()[0]*x.size()[1], self.n_hidden)
        x = self.fc(x)
        return x, (h, c)

    def predict(self, char, h=None, cuda=True, top_k=None):
        if cuda:
            self.cuda()
        else:
            self.cpu()

        if h is None:
            h = self.init_hidden(1)
        x = np.array([[self.char2int[char]]])
        x = one_hot(x, len(self.chars))
        inputs = torch.from_numpy(x)
        if cuda:
            inputs = inputs.cuda()

        h = tuple([each.data for each in h])

        out, h = self.forward(inputs, h)

        p = F.softmax(out, dim=1).data
        if cuda:
            p = p.cpu()

        if top_k is None:
            top_ch = self.arange(len(self.chars))
        else:
            p, top_ch = p.topk(top_k)
            top_ch = top_ch.numpy().squeeze()

        p = p.numpy().squeeze()
        char = np.random.choice(top_ch, p=p/p.sum())
            
        return self.int2char[char], h
        

    def init_weights(self):
        self.fc.bias.data.fill_(0)
        self.fc.weight.data.uniform_(-1, 1)
    
    def init_hidden(self, n_seqs):
        weight = next(self.parameters()).data
        return (weight.new(self.n_layers, n_seqs, self.n_hidden).zero_().cuda(),
                weight.new(self.n_layers, n_seqs, self.n_hidden).zero_().cuda())

In [10]:
import os
import time
import requests

def train(net, data, epochs=10, n_seqs=10, n_steps=50, lr=0.001, clip=5, val_frac=0.1, cuda=True, print_every=10):
    net.train()
    optimizer = torch.optim.Adam(net.parameters(), lr=lr)
    criterion = nn.CrossEntropyLoss()

    val_idx = int(len(data)*(1-val_frac))
    data, val_data = data[:val_idx], data[val_idx:]

    if cuda: net.cuda()

    counter = 0
    n_chars = len(net.chars)

    for epoch in range(epochs):
        h = net.init_hidden(n_seqs)
        for x, y in get_batches(data, n_seqs, n_steps):
            counter += 1
            
            x = one_hot(x, n_chars)
            inputs, targets = torch.from_numpy(x), torch.from_numpy(y)

            if cuda:
                inputs, targets = inputs.cuda(), targets.cuda()

            h = tuple([each.data for each in h])
            net.zero_grad()

            output, h = net.forward(inputs, h)
            loss = criterion(output, targets.view(n_seqs*n_steps))

            loss.backward()

            nn.utils.clip_grad_norm_(net.parameters(), clip)

            optimizer.step()
            
            if counter % print_every == 0:
                
                # Get validation loss
                val_h = net.init_hidden(n_seqs)
                val_losses = []
                for x, y in get_batches(val_data, n_seqs, n_steps):
                    # One-hot encode our data and make them Torch tensors
                    x = one_hot(x, n_chars)
                    x, y = torch.from_numpy(x), torch.from_numpy(y)
                    
                    # Creating new variables for the hidden state, otherwise
                    # we'd backprop through the entire training history
                    val_h = tuple([each.data for each in val_h])
                    
                    inputs, targets = x, y
                    if cuda:
                        inputs, targets = inputs.cuda(), targets.cuda()

                    output, val_h = net.forward(inputs, val_h)
                    val_loss = criterion(output, targets.view(n_seqs*n_steps))
                
                    val_losses.append(val_loss.item())
                
                print("Epoch: {}/{}...".format(epoch+1, epochs),
                      "Step: {}...".format(counter),
                      "Loss: {:.4f}...".format(loss.item()),
                      "Val Loss: {:.4f}".format(np.mean(val_losses)))


In [11]:
net = CharRNN(chars, n_hidden=512, n_layers=2)
print(net)

CharRNN(
  (lstm): LSTM(83, 512, num_layers=2, batch_first=True, dropout=0.5)
  (dropout): Dropout(p=0.5, inplace=False)
  (fc): Linear(in_features=512, out_features=83, bias=True)
)


In [12]:
n_seqs, n_steps = 128, 100
train(net, encoded, epochs=25, n_seqs=n_seqs, n_steps=n_steps, lr=0.001, cuda=True, print_every=10)


s: 2.0892
Epoch: 3/25... Step: 280... Loss: 2.0230... Val Loss: 2.0764
Epoch: 3/25... Step: 290... Loss: 2.0142... Val Loss: 2.0715
Epoch: 3/25... Step: 300... Loss: 1.9707... Val Loss: 2.0479
Epoch: 3/25... Step: 310... Loss: 1.9550... Val Loss: 2.0414
Epoch: 3/25... Step: 320... Loss: 1.9291... Val Loss: 2.0328
Epoch: 3/25... Step: 330... Loss: 1.9045... Val Loss: 2.0214
Epoch: 3/25... Step: 340... Loss: 1.9532... Val Loss: 2.0127
Epoch: 3/25... Step: 350... Loss: 1.9102... Val Loss: 1.9982
Epoch: 3/25... Step: 360... Loss: 1.8532... Val Loss: 2.0246
Epoch: 3/25... Step: 370... Loss: 1.8925... Val Loss: 1.9748
Epoch: 3/25... Step: 380... Loss: 1.8950... Val Loss: 2.0188
Epoch: 3/25... Step: 390... Loss: 1.8572... Val Loss: 2.0122
Epoch: 3/25... Step: 400... Loss: 1.8464... Val Loss: 1.9541
Epoch: 3/25... Step: 410... Loss: 1.8473... Val Loss: 1.9934
Epoch: 4/25... Step: 420... Loss: 1.8510... Val Loss: 1.9375
Epoch: 4/25... Step: 430... Loss: 1.8339... Val Loss: 1.9280
Epoch: 4/25...

In [13]:
model_name = 'rnn_25_epoch.net'

checkpoint = {'n_hidden': net.n_hidden,
              'n_layers': net.n_layers,
              'state_dict': net.state_dict(),
              'tokens': net.chars}

with open(model_name, 'wb') as f:
    torch.save(checkpoint, f)

In [14]:
def sample(net, size, prime="The", top_k=None, cuda=True):
    if cuda: net.cuda()
    net.eval()
    chars = [ch for ch in prime]
    h = net.init_hidden(1)

    for ch in prime:
        char, h = net.predict(ch, h, cuda=cuda, top_k=top_k)

    chars.append(char)

    for i in range(size):
        char, h = net.predict(chars[-1], h, cuda=cuda, top_k=top_k)
        chars.append(char)

    return "".join(chars)

In [15]:
print(sample(net, 2000, prime='Anna', top_k=5, cuda=True))

Anna.

And the singly weathere were as though as he heard her husband he went
up, and was so awkward to his brother, who sat surrounded by the sick
man, and he could not have told some more, boining that the moter
sincered and the sense of whom he had been a long while there
she had thought for the state. He was not a child. Before he stood
beginning to her that he was simply, what had been a party
still at the committee that was a sort of stelling on a lot that
tricked at the sacious accesting on his singer with his humiliations
with her talking at the words.

The depression had been done, well, so he could not ask them at the same
time, before his beauty; to be still asterlious weare with his
brother that she went to his wife.

"I shall be, I can't go away," answered Levin, shaming her their head, the
daughter so as thinking to her, and the same talking of any official
significance. And wish that she was said to him.

"Yes, it's all at announce in this candue, that's the most
conditi