In [2]:
import numpy as np
import torch
from torch import nn
import torch.nn.functional as F

In [3]:
file = './deep-learning-v2-pytorch/recurrent-neural-networks/char-rnn/data/anna.txt'
with open(file) as f:
    text = f.read()

In [4]:
text[:200]

"Chapter 1\n\n\nHappy families are all alike; every unhappy family is unhappy in its own\nway.\n\nEverything was in confusion in the Oblonskys' house. The wife had\ndiscovered that the husband was carrying on"

In [5]:
chars = tuple(set(text))
int2char = dict(enumerate(chars))
char2int = {c:i for i, c in int2char.items()}


In [6]:
encoded = np.array([char2int[c] for c in text])

In [7]:
encoded.shape

(1985223,)

In [8]:
def one_hot_encode(arr, n_labels):
    
    # Initialize the the encoded array
    one_hot = np.zeros((np.multiply(*arr.shape), n_labels), dtype=np.float32)
    
    # Fill the appropriate elements with ones
    one_hot[np.arange(one_hot.shape[0]), arr.flatten()] = 1.
    
    # Finally reshape it to get back to the original array
    one_hot = one_hot.reshape((*arr.shape, n_labels))
    
    return one_hot


test_seq = np.array([[3, 5, 1]])
one_hot = one_hot_encode(test_seq, 8)

print(test_seq.shape)
print(one_hot.shape)
print(one_hot)
    

(1, 3)
(1, 3, 8)
[[[0. 0. 0. 1. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 1. 0. 0.]
  [0. 1. 0. 0. 0. 0. 0. 0.]]]


In [9]:
def get_batches(arr, batch_size, seq_length):
    '''Create a generator that returns batches of size
       batch_size x seq_length from arr.
       
       Arguments
       ---------
       arr: Array you want to make batches from
       batch_size: Batch size, the number of sequences per batch
       seq_length: Number of encoded chars in a sequence
    '''
    
    batch_size_total = batch_size * seq_length
    # total number of batches we can make
    n_batches = len(arr)//batch_size_total
    
    # Keep only enough characters to make full batches
    arr = arr[:n_batches * batch_size_total]
    # Reshape into batch_size rows
    arr = arr.reshape((batch_size, -1))
    
    # iterate through the array, one sequence at a time
    for n in range(0, arr.shape[1], seq_length):
        # The features
        x = arr[:, n:n+seq_length]
        # The targets, shifted by one
        y = np.zeros_like(x)
        try:
            y[:, :-1], y[:, -1] = x[:, 1:], arr[:, n+seq_length]
        except IndexError:
            y[:, :-1], y[:, -1] = x[:, 1:], arr[:, 0]
        yield x, y

In [10]:
batches = get_batches(encoded, 8, 50)

In [11]:
x, y = next(batches)

In [12]:
# printing out the first 10 items in a sequence
print('x\n', x[:10, :10])
print('\ny\n', y[:10, :10])

x
 [[52 15 14 69 58 68 39 57 56 80]
 [38 24 30 57 58 15 14 58 57 14]
 [68 30 63 57 24 39 57 14 57  6]
 [38 57 58 15 68 57 32 15  1 68]
 [57 38 14 70 57 15 68 39 57 58]
 [32 61 38 38  1 24 30 57 14 30]
 [57 36 30 30 14 57 15 14 63 57]
 [71  9 48 24 30 38 33 54 79 57]]

y
 [[15 14 69 58 68 39 57 56 80 80]
 [24 30 57 58 15 14 58 57 14 58]
 [30 63 57 24 39 57 14 57  6 24]
 [57 58 15 68 57 32 15  1 68  6]
 [38 14 70 57 15 68 39 57 58 68]
 [61 38 38  1 24 30 57 14 30 63]
 [36 30 30 14 57 15 14 63 57 38]
 [ 9 48 24 30 38 33 54 79 57 37]]


In [13]:
class CharNN(nn.Module):
    def __init__(self, 
                tokens,
                n_hidden=256,
                n_layers=2,
                drop_prob=0.5,
                lr=0.001):
        
        super().__init__()
        self.drop_prob = drop_prob
        self.n_layers = n_layers
        self.n_hidden = n_hidden
        self.lr = lr
        
        self.chars = tokens
        self.int2char = dict(enumerate(self.chars))
        self.char2int = {c:i for i, c in self.int2char.items()}
        
        self.lstm = nn.LSTM(len(self.chars), n_hidden, n_layers,
                           dropout=drop_prob, batch_first=True)
        
        self.dropout = nn.Dropout(drop_prob)
        
        self.fc = nn.Linear(n_hidden, len(self.chars))
        
        
    def forward(self, x, hidden):
        r_output, hidden = self.lstm(x, hidden)
        
        out = self.dropout(r_output)
        
        out = out.contiguous().view(-1, self.n_hidden)
        
        out = self.fc(out)
        
        return out, hidden
    
    
    def init_hidden(self, batch_size):
        weight = next(self.parameters()).data
        
        hidden = (
            weight.new(self.n_layers, batch_size, self.n_hidden).zero_(),
            weight.new(self.n_layers, batch_size, self.n_hidden).zero_()            
        )
        
        return hidden
        
         

In [16]:
test = CharNN(chars, 512, 2)
print(type(test.init_hidden(8)[0]))
print(type(test.init_hidden(8)[0].data))

<class 'torch.Tensor'>
<class 'torch.Tensor'>


In [1]:
val_frac = 0.1
lr = 0.001
batch_size = 128
seq_length = 100
epochs = 20
n_chars = len(chars)
net = CharNN(chars, 512, 2)
print(net)



NameError: name 'chars' is not defined

In [None]:
net.train()

opt = torch.optim.Adam(net.parameters(), lr=lr)
criterion = nn.CrossEntropyLoss()

# create training and validation data
val_idx = int(len(encoded)*(1-val_frac))
data, val_data = encoded[:val_idx], encoded[val_idx:]


counter = 0
n_chars = len(net.chars)
for e in range(epochs):
    # initialize hidden state
    h = net.init_hidden(batch_size)

    for x, y in get_batches(data, batch_size, seq_length):
        counter += 1

        # One-hot encode our data and make them Torch tensors
        x = one_hot_encode(x, n_chars)
        inputs, targets = torch.from_numpy(x), torch.from_numpy(y)

        # Creating new variables for the hidden state, otherwise
        # we'd backprop through the entire training history
        h = tuple([each.data for each in h])

        # zero accumulated gradients
        net.zero_grad()

        # get the output from the model
        output, h = net(inputs, h)

        # calculate the loss and perform backprop
        loss = criterion(output, targets.view(batch_size*seq_length))
        loss.backward()
        # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.
        nn.utils.clip_grad_norm_(net.parameters(), 5)
        opt.step()

        # loss stats
        if counter % 10 == 0:
            # Get validation loss
            val_h = net.init_hidden(batch_size)
            val_losses = []
            net.eval()
            for x, y in get_batches(val_data, batch_size, seq_length):
                # One-hot encode our data and make them Torch tensors
                x = one_hot_encode(x, n_chars)
                x, y = torch.from_numpy(x), torch.from_numpy(y)

                # Creating new variables for the hidden state, otherwise
                # we'd backprop through the entire training history
                val_h = tuple([each.data for each in val_h])

                inputs, targets = x, y

                output, val_h = net(inputs, val_h)
                val_loss = criterion(output, targets.view(batch_size*seq_length))

                val_losses.append(val_loss.item())

            net.train() # reset to train mode after iterationg through validation data

            print("Epoch: {}/{}...".format(e+1, epochs),
                  "Step: {}...".format(counter),
                  "Loss: {:.4f}...".format(loss.item()),
                  "Val Loss: {:.4f}".format(np.mean(val_losses)))

Epoch: 1/20... Step: 10... Loss: 3.1562... Val Loss: 3.1214
Epoch: 1/20... Step: 20... Loss: 3.1067... Val Loss: 3.1185
Epoch: 1/20... Step: 30... Loss: 3.1182... Val Loss: 3.1147
Epoch: 1/20... Step: 40... Loss: 3.0950... Val Loss: 3.1089
Epoch: 1/20... Step: 50... Loss: 3.1033... Val Loss: 3.0808
Epoch: 1/20... Step: 60... Loss: 2.9854... Val Loss: 2.9922
Epoch: 1/20... Step: 70... Loss: 2.8349... Val Loss: 2.8192
Epoch: 1/20... Step: 80... Loss: 2.7043... Val Loss: 2.7176
Epoch: 1/20... Step: 90... Loss: 2.6229... Val Loss: 2.5770
Epoch: 1/20... Step: 100... Loss: 2.5284... Val Loss: 2.5078
Epoch: 1/20... Step: 110... Loss: 2.4741... Val Loss: 2.4544
Epoch: 1/20... Step: 120... Loss: 2.3964... Val Loss: 2.4043
Epoch: 1/20... Step: 130... Loss: 2.4090... Val Loss: 2.3725
Epoch: 2/20... Step: 140... Loss: 2.3701... Val Loss: 2.3338
Epoch: 2/20... Step: 150... Loss: 2.3318... Val Loss: 2.2935
Epoch: 2/20... Step: 160... Loss: 2.2989... Val Loss: 2.2599
Epoch: 2/20... Step: 170... Loss: