In [104]:
#importing libraries
import numpy as np
import torch
from torch import nn 
import torch.nn.functional as F

#### Data Encoding

In [105]:
#importing the text file
with open('data/shakespear.txt','r') as f: 
    text = f.read()

In [106]:
# encoding text by mapping characters to integers and vice versa 
chars = tuple(set(text))
int2char = dict(enumerate(chars))
char2int = {c:i for i,c in int2char.items()}

# econding text 
encoded_text = np.array([char2int[c] for c in text])

In [107]:
len(text),len(encoded_text)

(1835008, 1835008)

In [108]:
# creating one-hot vectors
def one_hot_encoding(arr,labels):
    
    # initalize
    one_hot = np.zeros((np.multiply(*arr.shape), labels), dtype=np.float32)
    
    # fill in with ones
    one_hot[np.arange(one_hot.shape[0]), arr.flatten()] = 1.
    
    # reshape back to the original array
    one_hot = one_hot.reshape((*arr.shape, labels))
    
    return one_hot

#### Mini-batches 
> speparating the file into mini batches to speed up training

In [109]:
# Defining method to make mini-batches for training
def get_batches(arr, batch_size, seq_length):
    '''Create a generator that returns batches of size
       batch_size x seq_length from arr.
       
       Arguments
       ---------
       arr: Array you want to make batches from
       batch_size: Batch size, the number of sequences per batch
       seq_length: Number of encoded chars in a sequence
    '''
    
    batch_size_total = batch_size * seq_length
    
    # total number of batches we can make
    n_batches = len(arr)//batch_size_total
    
    # Keep only enough characters to make full batches
    arr = arr[:n_batches * batch_size_total]
    # Reshape into batch_size rows
    arr = arr.reshape((batch_size, -1))
    
    # iterate through the array, one sequence at a time
    for n in range(0, arr.shape[1], seq_length):
        # The features
        x = arr[:, n:n+seq_length]
        # The targets, shifted by one
        y = np.zeros_like(x)
        try:
            y[:, :-1], y[:, -1] = x[:, 1:], arr[:, n+seq_length]
        except IndexError:
            y[:, :-1], y[:, -1] = x[:, 1:], arr[:, 0]
        yield x, y

#### Building Model 

In [110]:
train_on_gpu = torch.cuda.is_available()
if(train_on_gpu):
    print('Training on GPU!')
else: 
    print('No GPU available, training on CPU; consider making n_epochs very small.')

No GPU available, training on CPU; consider making n_epochs very small.


##### Defining the RNN network

In [111]:
class CharRNN(nn.Module):

    def __init__(self, tokens, n_hidden=512, n_layers=2, drop_prob=0.5, lr=0.001):
            
            super().__init__()
            self.drop_prob = drop_prob
            self.n_layers = n_layers
            self.n_hidden = n_hidden
            self.lr = lr
            
            # creating dictionaries
            self.chars = tokens
            self.int2char = dict(enumerate(chars))
            self.char2int = {c:i for i,c in int2char.items()}
            
            # LSTM - https://pytorch.org/docs/stable/nn.html
            self.lstm = nn.LSTM(len(self.chars),n_hidden,n_layers,dropout = drop_prob,batch_first = True)
            
            # drop out layer
            self.dropout = nn.Dropout(drop_prob)
            
            # fully connected layer - final layer
            self.fc = nn.Linear(n_hidden,len(self.chars))
            
    # forward function - propagating input and memory values 
    def forward (self, x, hidden):

        # output of new hidden state from lstm model 
        r_output, hidden = self.lstm(x, hidden)
        
        # go through the drop out layer 
        out = self.dropout(r_output)

        # stack lstm outputs 
        out = out.contiguous().view(-1,self.n_hidden)

        # inputs pass through the fully connected layer
        out = self.fc(out)

        return out,hidden
    
    
    # initializing the hidden value for the correct batch size if you’re using mini-batches.
    def init_hidden(self, batch_size):
        ''' Initializes hidden state '''
        # Create two new tensors with sizes n_layers x batch_size x n_hidden,
        # initialized to zero, for hidden state and cell state of LSTM
        weight = next(self.parameters()).data

        if (train_on_gpu):
            hidden = (weight.new(self.n_layers, batch_size, self.n_hidden).zero_().cuda(),
                  weight.new(self.n_layers, batch_size, self.n_hidden).zero_().cuda())
        else:
            hidden = (weight.new(self.n_layers, batch_size, self.n_hidden).zero_(),
                      weight.new(self.n_layers, batch_size, self.n_hidden).zero_())

        return hidden

#### Training

In [112]:
def train(net, data, epochs=10, batch_size=10, seq_length=50, lr=0.001, clip=5, val_frac=0.1, print_every=10):
    
    ''' Training a network 
    
        Arguments
        ---------
        
        net: CharRNN network
        data: text data to train the network
        epochs: Number of epochs to train
        batch_size: Number of mini-sequences per mini-batch, aka batch size
        seq_length: Number of character steps per mini-batch
        lr: learning rate
        clip: gradient clipping
        val_frac: Fraction of data to hold out for validation
        print_every: Number of steps for printing training and validation loss
    
    '''
    
    net.train()
    
    opt = torch.optim.Adam(net.parameters(), lr=lr)
    criterion = nn.CrossEntropyLoss()
    
    #creating training & validating data
    val_idx = int(len(data)*(1-val_frac))
    data, val_data = data[:val_idx], data[val_idx:]
    
    if(train_on_gpu):
        net.cuda()
        
    counter = 0 
    n_chars = len(net.chars)
    
    for e in range(epochs):
        
        #hidden state
        h = net.init_hidden(batch_size)
        
        for x,y in get_batches(data,batch_size,seq_length):
            counter += 1
            
            #one hot encode
            x = one_hot_encoding(x,n_chars)
            inputs,targets = torch.from_numpy(x),torch.from_numpy(y)
            
            
            if(train_on_gpu):
                inputs, targets = inputs.cuda(), targets.cuda()

            # Creating new variables for the hidden state
            h = tuple([each.data for each in h])
            
            #setting gradient 
            net.zero_grad()
            
            #model output 
            output,h = net(inputs,h)
            
            #calculate loss, then backpropagate
            loss = criterion(output,targets.view(batch_size*seq_length).long()) 
            loss.backward()
            
            # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.
            # https://pytorch.org/docs/stable/nn.html#clip-grad-norm
            nn.utils.clip_grad_norm_(net.parameters(), clip)
            opt.step()
            
            # loss stats
            if counter % print_every == 0 :
                #get_valid loss
                val_h = net.init_hidden(batch_size)
                val_losses = []
                net.eval()
                
                for x,y in get_batches(data,batch_size,seq_length):
                    # one hot encode
                    x = one_hot_encoding(x,n_chars)
                    x,y = torch.from_numpy(x),torch.from_numpy(y)
        
                    val_h = tuple([each.data for each in h])
            
                    if(train_on_gpu):
                        x, y = inputs.cuda(), targets.cuda()
                    
                    output, val_h = net(x, val_h)
                    val_loss = criterion(output, y.view(batch_size*seq_length).long())
                    
                    val_losses.append(val_loss.item())
                    
                net.train()
                
                
                print("Epoch: {}/{}...".format(e+1, epochs),
                      "Step: {}...".format(counter),
                      "Loss: {:.4f}...".format(loss.item()),
                      "Val Loss: {:.4f}".format(np.mean(val_losses)))

In [113]:
net = CharRNN(chars)
print(net)


CharRNN(
  (lstm): LSTM(82, 512, num_layers=2, batch_first=True, dropout=0.5)
  (dropout): Dropout(p=0.5)
  (fc): Linear(in_features=512, out_features=82, bias=True)
)


In [114]:
batch_size = 128
seq_length = 100
n_epochs = 20

# train the model
train(net, encoded_text, epochs=n_epochs, batch_size=batch_size, seq_length=seq_length, lr=0.001, print_every=50)

Epoch: 1/20... Step: 50... Loss: 3.2098... Val Loss: 3.1827
Epoch: 1/20... Step: 100... Loss: 3.1826... Val Loss: 3.1484
Epoch: 2/20... Step: 150... Loss: 2.8858... Val Loss: 2.8569
Epoch: 2/20... Step: 200... Loss: 2.4888... Val Loss: 2.4428
Epoch: 2/20... Step: 250... Loss: 2.2761... Val Loss: 2.2537
Epoch: 3/20... Step: 300... Loss: 2.1876... Val Loss: 2.1382
Epoch: 3/20... Step: 350... Loss: 2.0618... Val Loss: 2.0525
Epoch: 4/20... Step: 400... Loss: 2.0175... Val Loss: 1.9881
Epoch: 4/20... Step: 450... Loss: 1.9803... Val Loss: 1.9265
Epoch: 4/20... Step: 500... Loss: 1.9049... Val Loss: 1.8730
Epoch: 5/20... Step: 550... Loss: 1.8851... Val Loss: 1.8300
Epoch: 5/20... Step: 600... Loss: 1.8483... Val Loss: 1.7866
Epoch: 6/20... Step: 650... Loss: 1.7554... Val Loss: 1.7484
Epoch: 6/20... Step: 700... Loss: 1.7312... Val Loss: 1.7189
Epoch: 6/20... Step: 750... Loss: 1.7019... Val Loss: 1.6901
Epoch: 7/20... Step: 800... Loss: 1.6864... Val Loss: 1.6582
Epoch: 7/20... Step: 850.

#### Prediction

In [127]:
# predict the next character from the trained RNN with forward propagation.
def predict(net, char, h=None, top_k=None):
    'Given a character, predict the next character'
    'returns predicted character and the hidden state'
    
    #tensor inputs
    x = np.array([[net.char2int[char]]])
    x = one_hot_encoding(x, len(net.chars))
    inputs = torch.from_numpy(x)
    
    if(train_on_gpu):
        inputs = inputs.cuda()
    h = tuple([each.data for each in h])
    
    # get output in model
    out,h = net(inputs,h)
    
    #get char possibility
    p = F.softmax(out,dim=1).data
    
    if(train_on_gpu):
        p = p.cpu()
    
    if top_k is None:
        top_ch = np.arrange(len(net.chars))
    else:
        p,top_ch = p.topk(top_k)
        top_ch = top_ch.numpy().squeeze()
    
    p = p.numpy().squeeze()
    char = np.random.choice(top_ch, p=p/p.sum())
    
    return net.int2char[char],h

#### Sample

In [134]:
def sample(net,size,prime="Flower", top_k=None):
    
    if(train_on_gpu):
        net.cuda()
    else:
        net.cpu()
    
    net.eval() # eval mode
    
    # going through prime characters
    chars = [ch for ch in prime]
    h  = net.init_hidden(1)
    
    for ch in prime:
        char,h = predict(net,ch,h, top_k=top_k)
        
    chars.append(char)
    
    #pass in previous characters, and get new ones 
    for ii in range(size):
        char,h = predict(net,char[-1],h,top_k=top_k)
        chars.append(char)
        
    return ''.join(chars)

In [139]:
print(sample(net, 1000, prime='A', top_k=10))

ADY.
  My lord, sir!
  Laer. No find his house, for me, too bear than milist
    That then what minite my howald speak
    Befind you it to a bareot which he must
    Or thear thou douse me sparing him alast,
    The countrys of some lang. This for his poor
    As would the serve of mischers, there is a poor
    To do which shall-blow and mine, famous,
    As I heaven to anone about the field
    Will break to this fire this heart to think,
    In arries this consul. The that can feed
    As I have bland to him, but I am male;
    To hold him for once; what thering base bettor you,
    Who which we were well how dear here.
    If he can ad fool what many the winder
    Which in the this thoughts in ourselves in thy wife,
    The bitter of the twroted.
  ORLANDO. I am give our heart.
  SAUDUS. I have stan's on; bick it.
  PAROLLES. As yes well suck to-morrow. He't a woman that I ars not wait.

                           Exeunt




SCENE III.
Which and with a possip
Enter and the LODD


