**Import libraries**

In [0]:
%matplotlib inline
import torch
from torch import nn, optim
import numpy as np
import torch.nn.functional as F

In [2]:
from google.colab import drive
drive.mount('/gdrive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=email%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdocs.test%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive.photos.readonly%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fpeopleapi.readonly&response_type=code

Enter your authorization code:
··········
Mounted at /gdrive


In [0]:
with open('/gdrive/My Drive/colab_data/anna.txt', 'r') as f:
  data=f.read()

**Create dictionaries for converting Int to Char**

In [0]:
int2char=dict(enumerate(set(data)))

**Char to int**

In [0]:
char2int={x:y for y,x in int2char.items()}

In [0]:
encoded=np.array([char2int[x]for x in data])

**Generator for getting Batches and Sequences during training**

In [0]:
def create_sequences(arr, batch_size, seq_length):
    batch_size_total = batch_size * seq_length
    # total number of batches we can make
    n_batches = len(arr)//batch_size_total
    
    # Keep only enough characters to make full batches
    arr = arr[:n_batches * batch_size_total]
    # Reshape into batch_size rows
    arr = arr.reshape((batch_size, -1))
    
    # iterate through the array, one sequence at a time
    for n in range(0, arr.shape[1], seq_length):
        # The features
        x = arr[:, n:n+seq_length]
        # The targets, shifted by one
        y = np.zeros_like(x)
        try:
            y[:, :-1], y[:, -1] = x[:, 1:], arr[:, n+seq_length]
        except IndexError:
            y[:, :-1], y[:, -1] = x[:, 1:], arr[:, 0]
        yield x, y

In [0]:
# print(len(encoded))
x,y=next(create_sequences(encoded,8,50))
print(x.shape,y.shape)
print("x : ",x[:10,:10])

print("y : ",y[:10,:10])

(8, 50) (8, 50)
x :  [[13  8 18 75 10 50 29 69 70 25]
 [16 65 60 69 10  8 18 10 69 18]
 [50 60 32 69 65 29 69 18 69 71]
 [16 69 10  8 50 69 23  8 58 50]
 [69 16 18 34 69  8 50 29 69 10]
 [23  2 16 16 58 65 60 69 18 60]
 [69  3 60 60 18 69  8 18 32 69]
 [51 53 64 65 60 16  1 72  4 69]]
y :  [[ 8 18 75 10 50 29 69 70 25 25]
 [65 60 69 10  8 18 10 69 18 10]
 [60 32 69 65 29 69 18 69 71 65]
 [69 10  8 50 69 23  8 58 50 71]
 [16 18 34 69  8 50 29 69 10 50]
 [ 2 16 16 58 65 60 69 18 60 32]
 [ 3 60 60 18 69  8 18 32 69 16]
 [53 64 65 60 16  1 72  4 69 68]]


**Function for converting numbers to One Hot Encoder**

In [0]:
def one_hot_encode(arr, n_labels):
    
    # Initialize the the encoded array
    one_hot = np.zeros((np.multiply(*arr.shape), n_labels), dtype=np.float32)
    
    # Fill the appropriate elements with ones
    one_hot[np.arange(one_hot.shape[0]), arr.flatten()] = 1.
    
    # Finally reshape it to get back to the original array
    one_hot = one_hot.reshape((*arr.shape, n_labels))
    
    return one_hot

**Character LSTM Model**

In [0]:
class CharRNN(nn.Module):
  def __init__(self,n_char,n_hidden=256,n_layers=2,drop_prob=0.3,lr=0.001):
    super().__init__()
    
    self.int2char=dict(enumerate(set(data)))
    self.char2int={x:y for y,x in int2char.items()}
    self.lr=lr
    self.n_char=n_char
    self.n_hidden=n_hidden
    self.n_layers=n_layers
    self.drop_prob=drop_prob
    
    
    self.lstm=nn.LSTM(self.n_char,self.n_hidden,self.n_layers,dropout=self.drop_prob,batch_first=True)
    self.dropout=nn.Dropout(self.drop_prob)
    self.fc=nn.Linear(self.n_hidden,self.n_char)
    
    self.softmax=nn.Softmax()
    
  def forward(self,x,hidden):
    x,hidden=self.lstm(x,hidden)
#     print("LSTM : ",x.shape)|
    
    x=self.dropout(x)
#     print("Dropout : ",x.shape)
    
    x = x.contiguous().view(-1, self.n_hidden)
    
    x=self.fc(x)
#     print("FC : ",x.shape)
    
#     x=self.softmax(x)
    
    return x,hidden
  
  def init_hidden(self,batch_size):
    weight=next(self.parameters()).data
    hidden=(weight.new(self.n_layers,batch_size,self.n_hidden).zero_().cuda(),
            weight.new(self.n_layers,batch_size,self.n_hidden).zero_().cuda())
    return hidden
    

**Model Object**

In [0]:
model=CharRNN(n_char=len(tuple(set(data))), n_hidden=512, n_layers=2, drop_prob=0.2, lr=0.001)
model.cuda()

CharRNN(
  (lstm): LSTM(83, 512, num_layers=2, batch_first=True, dropout=0.2)
  (dropout): Dropout(p=0.2)
  (fc): Linear(in_features=512, out_features=83, bias=True)
  (softmax): Softmax()
)

**Get initial weights for input**

In [0]:
hidden=model.init_hidden(128)
print((len(hidden[0][0][0]),len(hidden[1][0][0])))

(512, 512)


**Training objects**

In [0]:
criterion=nn.CrossEntropyLoss()
optimizer=optim.Adam(model.parameters(),lr=model.lr)

**Train**

In [0]:
epochs=35
batch_size=128
for e in range(epochs):
  running_loss=0
  h=model.init_hidden(batch_size)
  for x,y in create_sequences(encoded,128,100):
    x = one_hot_encode(x, len(set(data)))
#     y = one_hot_encode(y, len(set(data)))
    x=torch.from_numpy(x).cuda()
    y=torch.from_numpy(y).cuda()
    
    model.zero_grad()
    logits,h=model(x,h)
    y=y.view(batch_size*100).long()
#     print(logits.shape,y.shape)
    loss=criterion(logits,y)
    loss.backward()
    
    optimizer.step()
    
    running_loss+=loss.item()
  else:
    print('Training loss : ',running_loss/12800)
    
    

Training loss :  0.015535097299143673
Training loss :  0.015235918238759041
Training loss :  0.014978739526122808
Training loss :  0.014724350916221738
Training loss :  0.014478517239913344
Training loss :  0.014271963750943542
Training loss :  0.014086047038435936
Training loss :  0.013908609487116337
Training loss :  0.013744011344388128
Training loss :  0.013590997578576207
Training loss :  0.013450111886486411
Training loss :  0.013325350722298027
Training loss :  0.013185132704675197
Training loss :  0.013056178614497184
Training loss :  0.012949336934834719
Training loss :  0.012832386232912541
Training loss :  0.01273253588937223
Training loss :  0.012637988375499844
Training loss :  0.01253236404620111
Training loss :  0.012436707555316388
Training loss :  0.012345033269375562
Training loss :  0.012243977687321604
Training loss :  0.012165154232643544
Training loss :  0.012071078973822296
Training loss :  0.012003877540118993
Training loss :  0.011921610250137746
Training loss 

**Prediction function**

In [0]:
def predict(net, char, h=None, top_k=None):
        ''' Given a character, predict the next character.
            Returns the predicted character and the hidden state.
        '''
        
        # tensor inputs
        x = np.array([[net.char2int[char]]])
        x = one_hot_encode(x, net.n_char)
        inputs = torch.from_numpy(x)
        
        inputs = inputs.cuda()
        
        # detach hidden state from history
        h = tuple([each.data for each in h])
        # get the output of the model
        out, h = net(inputs, h)

        # get the character probabilities
        p = F.softmax(out, dim=1).data
        p = p.cpu() # move to cpu
        
        # get top characters
        if top_k is None:
            top_ch = np.arange(len(net.chars))
        else:
            p, top_ch = p.topk(top_k)
            top_ch = top_ch.numpy().squeeze()
        
        # select the likely next character with some element of randomness
        p = p.numpy().squeeze()
        char = np.random.choice(top_ch, p=p/p.sum())
        
        # return the encoded value of the predicted char and the hidden state
        return net.int2char[char], h

In [0]:
def sample(net, size, prime='The', top_k=None):
        
    net.cuda()
    
    net.eval() # eval mode
    
    # First off, run through the prime characters
    chars = [ch for ch in prime]
    h = net.init_hidden(1)
    for ch in prime:
        char, h = predict(net, ch, h, top_k=top_k)

    chars.append(char)
    
    # Now pass in the previous character and get a new one
    for ii in range(size):
        char, h = predict(net, chars[-1], h, top_k=top_k)
        chars.append(char)

    return ''.join(chars)

**Print Output**

In [0]:
print(sample(model, 1000, prime='Anna', top_k=5))

Anna Arkadyevna's children sat down and
seemed to him an impression of his life, who could not help straight
into her hoofe, where the crown words of his part, still less for the face,
her sister had told her to the low of the room, and with a thurder
signested his hands, and, smiling, and her eyes like her. And she
did not hust any considerate from the old cressey that a subjuct and some wink of
agreement, he had not reached the table, stopped something at the spot. And
again had not the society chest of their conversation alone with her,
as it were, as though he did not let them fall, he could not attent to
something.

Anna said her hat. "He did not conceive that I don't care," said Stepan
Arkadyevitch, "that which was a woman who did not know."

"No, it's all right, that I can can never have saying at home, as I have come for a
moment an impossible to any fanly diverted, and I'm all about it, that
I have been enginged by me to arrive with him. And tell me as a touch
of who has been 