<a href="https://colab.research.google.com/github/purvasingh96/Deep-learning-with-neural-networks/blob/master/Chapter-wise%20code/Code%20-%20PyTorch/3.%20Recurrent%20Neural%20Networks/Harry_Potter.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
import torch
import numpy as np
from torch import nn
import torch.nn.functional as F

In [0]:
with open('harry_potter_1.txt', 'r') as f:
  text = f.read()

In [188]:
text[:100]

'1 CHAPTER ONE The Boy Who Lived M r and Mrs Dursley, of number four, Privet Drive, were proud to say'

In [0]:
chars = tuple(set(text))
int2char = dict(enumerate(chars))
char2int = {ch : ii for ii, ch in int2char.items()}
encoded = np.array([char2int[ch] for ch in text])

In [0]:
def one_hot_encode(arr, n_labels):
    
    # Initialize the the encoded array
    one_hot = np.zeros((arr.size, n_labels), dtype=np.float32)
    
    # Fill the appropriate elements with ones
    one_hot[np.arange(one_hot.shape[0]), arr.flatten()] = 1.
    
    # Finally reshape it to get back to the original array
    one_hot = one_hot.reshape((*arr.shape, n_labels))
    
    return one_hot

In [191]:
test = np.array([[3, 4, 6]])
one_hot = one_hot_encode(test, 8)
print(one_hot)

[[[0. 0. 0. 1. 0. 0. 0. 0.]
  [0. 0. 0. 0. 1. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 1. 0.]]]


In [0]:
def get_batches(arr, batch_size, seq_length):
  batch_size_total = batch_size*seq_length
  n_batches = len(arr)//batch_size_total
  arr = arr[:n_batches*batch_size_total]
  arr = arr.reshape((batch_size, -1))

  for n in range(0, arr.shape[1], seq_length):
      # The features
      x = arr[:, n:n+seq_length]
      # The targets, shifted by one
      y = np.zeros_like(x)
      try:
        y[:, :-1], y[:, -1] = x[:, 1:], arr[:, n+seq_length]
      except IndexError:
          y[:, :-1], y[:, -1] = x[:, 1:], arr[:, 0]
      yield x, y


In [193]:
batches = generate_batches(encoded, 8, 50)
x, y = next(batches)

print('x\n', x[:10, :10])
print('\ny\n', y[:10, :10])

x
 [[29 63 64 60 37  9 53 59 73 63]
 [42 76 16 26 33 67 12 63 35  6]
 [65 63 22  6 30 16  4 11  6 26]
 [12  4 33  1 63 11 12 30 22 11]
 [63 34 42 40 56  6 12 44 63 68]
 [30 11 63 76 33 11 12  4 40 56]
 [40 30 33 67 12 63 43  6 30 33]
 [28 68 11 63 12 35 30 12 63  7]]

y
 [[63 64 60 37  9 53 59 73 63 25]
 [76 16 26 33 67 12 63 35  6 16]
 [63 22  6 30 16  4 11  6 26 63]
 [ 4 33  1 63 11 12 30 22 11 63]
 [34 42 40 56  6 12 44 63 68 12]
 [11 63 76 33 11 12  4 40 56  4]
 [30 33 67 12 63 43  6 30 33 63]
 [68 11 63 12 35 30 12 63  7 35]]


In [0]:
class CharRNN(nn.Module):
  def __init__(self, tokens, n_hidden=256, n_layers=2, drop_prob=0.5, lr=0.001):
    super().__init__()
    self.drop_prob = drop_prob
    self.n_layers = n_layers
    self.n_hidden = n_hidden
    self.lr = lr

    self.chars = tokens
    self.int2char = dict(enumerate(self.chars))
    self.char2int = {ch: ii for ii, ch in self.int2char.items()}

    self.lstm = nn.LSTM(len(self.chars), n_hidden, n_layers, dropout=drop_prob, batch_first=True)

    self.dropout = nn.Dropout(drop_prob)

    self.fc = nn.Linear(n_hidden, len(self.chars))
  
  def forward(self, x, hidden):
    r_output, hidden = self.lstm(x, hidden)
    out = self.dropout(r_output)
    out = out.contiguous().view(-1, self.n_hidden)
    out = self.fc(out)

    return out, hidden

  def init_hidden(self, batch_size):
    weight = next(self.parameters()).data
    hidden = (weight.new(self.n_layers, batch_size, self.n_hidden).zero_().cuda(),
              weight.new(self.n_layers, batch_size, self.n_hidden).zero_().cuda())
    return hidden


In [195]:
# check if GPU is available
train_on_gpu = torch.cuda.is_available()
if(train_on_gpu):
    print('Training on GPU!')
else: 
    print('No GPU available, training on CPU; consider making n_epochs very small.')

Training on GPU!


In [0]:
def train(net, data, epochs=10, batch_size=10, seq_length=50, lr=0.001, clip=5, val_frac=0.1, print_every=10):
    ''' Training a network 
    
        Arguments
        ---------
        
        net: CharRNN network
        data: text data to train the network
        epochs: Number of epochs to train
        batch_size: Number of mini-sequences per mini-batch, aka batch size
        seq_length: Number of character steps per mini-batch
        lr: learning rate
        clip: gradient clipping
        val_frac: Fraction of data to hold out for validation
        print_every: Number of steps for printing training and validation loss
    
    '''
    net.train()
    
    opt = torch.optim.Adam(net.parameters(), lr=lr)
    criterion = nn.CrossEntropyLoss()
    
    # create training and validation data
    val_idx = int(len(data)*(1-val_frac))
    data, val_data = data[:val_idx], data[val_idx:]
    
    if(train_on_gpu):
        net.cuda()
    
    counter = 0
    n_chars = len(net.chars)
    for e in range(epochs):
        # initialize hidden state
        h = net.init_hidden(batch_size)
        
        for x, y in get_batches(data, batch_size, seq_length):
            counter += 1
            
            # One-hot encode our data and make them Torch tensors
            x = one_hot_encode(x, n_chars)
            inputs, targets = torch.from_numpy(x), torch.from_numpy(y)
            
            if(train_on_gpu):
                inputs, targets = inputs.cuda(), targets.cuda()

            # Creating new variables for the hidden state, otherwise
            # we'd backprop through the entire training history
            h = tuple([each.data for each in h])

            # zero accumulated gradients
            net.zero_grad()
            
            # get the output from the model
            output, h = net(inputs, h)
            
            # calculate the loss and perform backprop
            loss = criterion(output, targets.view(batch_size*seq_length).long())
            loss.backward()
            # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.
            nn.utils.clip_grad_norm_(net.parameters(), clip)
            opt.step()
            
            # loss stats
            if counter % print_every == 0:
                # Get validation loss
                val_h = net.init_hidden(batch_size)
                val_losses = []
                net.eval()
                for x, y in get_batches(val_data, batch_size, seq_length):
                    # One-hot encode our data and make them Torch tensors
                    x = one_hot_encode(x, n_chars)
                    x, y = torch.from_numpy(x), torch.from_numpy(y)
                    
                    # Creating new variables for the hidden state, otherwise
                    # we'd backprop through the entire training history
                    val_h = tuple([each.data for each in val_h])
                    
                    inputs, targets = x, y
                    if(train_on_gpu):
                        inputs, targets = inputs.cuda(), targets.cuda()

                    output, val_h = net(inputs, val_h)
                    val_loss = criterion(output, targets.view(batch_size*seq_length).long())
                
                    val_losses.append(val_loss.item())
                
                net.train() # reset to train mode after iterationg through validation data
                
                print("Epoch: {}/{}...".format(e+1, epochs),
                      "Step: {}...".format(counter),
                      "Loss: {:.4f}...".format(loss.item()),
                      "Val Loss: {:.4f}".format(np.mean(val_losses)))

In [198]:
n_hidden = 512
n_layers = 3

net = CharRNN(chars, n_hidden, n_layers)
print(net)
batch_size = 10
seq_length = 10
n_epochs = 40
train(net, encoded, epochs=n_epochs, batch_size=batch_size, seq_length=seq_length, lr=0.001, print_every=10)

CharRNN(
  (lstm): LSTM(77, 512, num_layers=3, batch_first=True, dropout=0.5)
  (dropout): Dropout(p=0.5, inplace=False)
  (fc): Linear(in_features=512, out_features=77, bias=True)
)
Epoch: 1/40... Step: 10... Loss: 3.1513... Val Loss: 3.3527
Epoch: 1/40... Step: 20... Loss: 3.4311... Val Loss: 3.3249
Epoch: 1/40... Step: 30... Loss: 3.5290... Val Loss: 3.2952
Epoch: 1/40... Step: 40... Loss: 3.2703... Val Loss: 3.2745
Epoch: 1/40... Step: 50... Loss: 3.0551... Val Loss: 3.2843
Epoch: 1/40... Step: 60... Loss: 3.4411... Val Loss: 3.2833
Epoch: 1/40... Step: 70... Loss: 3.2058... Val Loss: 3.2719
Epoch: 1/40... Step: 80... Loss: 3.2626... Val Loss: 3.2796
Epoch: 1/40... Step: 90... Loss: 3.2019... Val Loss: 3.2726
Epoch: 1/40... Step: 100... Loss: 3.3569... Val Loss: 3.2725
Epoch: 1/40... Step: 110... Loss: 3.1359... Val Loss: 3.2685
Epoch: 1/40... Step: 120... Loss: 3.2607... Val Loss: 3.2667
Epoch: 1/40... Step: 130... Loss: 3.3359... Val Loss: 3.2553
Epoch: 1/40... Step: 140... Loss:

In [0]:
def predict(net, char, h=None, top_k=None):
  x = np.array([[net.char2int[char]]])
  x = one_hot_encode(x, len(net.chars))
  inputs = torch.from_numpy(x)
  if(train_on_gpu):
    inputs = inputs.cuda()
  h = tuple([each.data for each in h])
  out, h = net(inputs, h)

  p = F.softmax(out, dim=1).data
  if(train_on_gpu):
    p = p.cpu()
  
  if top_k is None:
    top_ch = np.arange(len(net.chars))
  else:
      p, top_ch = p.topk(top_k)
      top_ch = top_ch.numpy().squeeze()
        
  # select the likely next character with some element of randomness
  p = p.numpy().squeeze()
  char = np.random.choice(top_ch, p=p/p.sum())
        
  # return the encoded value of the predicted char and the hidden state
  return net.int2char[char], h

In [0]:
def sample(net, size, prime='The', top_k=None):
        
    if(train_on_gpu):
        net.cuda()
    else:
        net.cpu()
    
    net.eval() # eval mode
    
    # First off, run through the prime characters
    chars = [ch for ch in prime]
    h = net.init_hidden(1)
    for ch in prime:
        char, h = predict(net, ch, h, top_k=top_k)

    chars.append(char)
    
    # Now pass in the previous character and get a new one
    for ii in range(size):
        char, h = predict(net, chars[-1], h, top_k=top_k)
        chars.append(char)

    return ''.join(chars)

In [201]:
print(sample(net, 1000, prime='Harry', top_k=5))

Harry Potter, he was still staring to the wall. ‘It’s – theme people to tolk and Mrs Dursley gossepibed shanps into a sight of diskbing. The Dursleys shudderly, but they was nothing how is the bashroom, Mr Dursley picked up his brigh twat when he dad seemed Potter who had a small son, head. Ne, you could have been crinking our over the monn. He had seen a lattle swoke and sat aistore and Mrs Dursley stopped Mrs Dursley. ‘Well, He couldn’t kill Harry Potter cake the Put-Outer almost twice as quobas arose his freat the weother of showing on the people he’s kissed and stared at the window. ‘Shhohid never know … her sister, buckus it was shousing a map. How very walk havion a smopet and patted her he tanted to the spot. He dad noticed something as it, the Potters … Hapry, the thought before he can homing a little come sear as she wis now and Mrs Dursley stupped 28493.indb 13 8493.indb 14 18/07/2014 16:36 8/07/2014 16:36 8/07/2014 16:36 5 THE BOY WHO LIVED to notice something as inetead wha