# **Character Prediction Engine**

This is the heart of the smart-keys agent.

Its a LSTM that will take in some representation of the input and return the next character in the sequence.

In [0]:
# Code to check the resources available Ex. GPU RAM
# memory footprint support libraries/code
!ln -sf /opt/bin/nvidia-smi /usr/bin/nvidia-smi
!pip install gputil
!pip install psutil
!pip install humanize
import psutil
import humanize
import os
import GPUtil as GPU
GPUs = GPU.getGPUs()
# XXX: only one GPU on Colab and isn’t guaranteed
gpu = GPUs[0]
def printm():
 process = psutil.Process(os.getpid())
 print("Gen RAM Free: " + humanize.naturalsize( psutil.virtual_memory().available ), " | Proc size: " + humanize.naturalsize( process.memory_info().rss))
 print("GPU RAM Free: {0:.0f}MB | Used: {1:.0f}MB | Util {2:3.0f}% | Total {3:.0f}MB".format(gpu.memoryFree, gpu.memoryUsed, gpu.memoryUtil*100, gpu.memoryTotal))
printm()



Collecting gputil
  Downloading https://files.pythonhosted.org/packages/45/99/837428d26b47ebd6b66d6e1b180e98ec4a557767a93a81a02ea9d6242611/GPUtil-1.3.0.tar.gz
Building wheels for collected packages: gputil
  Running setup.py bdist_wheel for gputil ... [?25l- done
[?25h  Stored in directory: /root/.cache/pip/wheels/17/0f/04/b79c006972335e35472c0b835ed52bfc0815258d409f560108
Successfully built gputil
Installing collected packages: gputil
Successfully installed gputil-1.3.0
Gen RAM Free: 11.3 GB  | Proc size: 141.8 MB
GPU RAM Free: 8295MB | Used: 3146MB | Util  27% | Total 11441MB


#STEPS:

Libraries & Dependencies:
1. Install PyTorch
2. Install any other dependencies/ libraries
3. Import all required libraries including PyTorch

Getting, loading and preparing data:
4. Get text_corpus file from drive to colab instance
5. Read text_corpus file 
6. Prepare the data from text_corpus file for training and testing 
(Find set of characters in corpus, one-hot encode characters depending on vocabulary etc.)

Create, Train and Validate the model:
7. Split the encoded text_corpus into train and validation data
8. Create the LSTM model
9. Train the data using the training encoded text_corpus
10. Validate the model using the validation text_corpus


### **Installing required libraries and dependencies**

In [0]:
# Install pytorch
# http://pytorch.org/
from os import path
from wheel.pep425tags import get_abbr_impl, get_impl_ver, get_abi_tag
platform = '{}{}-{}'.format(get_abbr_impl(), get_impl_ver(), get_abi_tag())

accelerator = 'cu80' if path.exists('/opt/bin/nvidia-smi') else 'cpu'

!pip install -q http://download.pytorch.org/whl/{accelerator}/torch-0.4.0-{platform}-linux_x86_64.whl torchvision

In [0]:
#Check install and version
import torch
print(torch.__version__)

# Check cuda avaialability
if torch.cuda.is_available():
   print("Yay!!")
    

0.4.0
Yay!!


In [0]:
# Installing PyDrive
!pip install PyDrive



### **Importing required libraries and dependencies**

In [0]:
# NumPy & Pytorch

import numpy as np

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torch.utils.data as data

from torch.autograd import Variable
from torch.utils.data import Dataset, DataLoader

In [0]:
# Set_trace is used for debugging
from IPython.core.debugger import set_trace

In [0]:
# PyDrive
# For importing data-set from Google Drive -> Colab VM 

from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials

# Authenticate and create the PyDrive client.
# This only needs to be done once in a notebook.
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)

### **Loading text_corpus into the VM**

In [0]:
fileId = drive.CreateFile({'id': '1stICwWLnHkBeQ60pksTH5IQMgXBmPaAV'}) # id of the file being uploaded
print (fileId['title'])  # outcasts.txt
file = fileId['title']
fileId.GetContentFile(file)  # Save Drive file on Colab VM as a local file

outcasts.txt


In [0]:
# Check if text_corlus is present in the current working directory
!ls

adc.json  outcasts.txt	sample_data  text_corpus


### **Read text_corpus and prepare for training**

In [0]:
with open('outcasts.txt', 'r') as f:
    text_corpus = f.read()

# print(len(text_corpus))

In [0]:
# Set of all unique characters in the corpus, will give us size of vocabulary 
# Will have a-z, A-Z, special characters and numbers present in the text_corpus
characters = set(text_corpus)

print(len(characters))

79


In [0]:
# Creating a dictionary with integers as keys and charactes as values
integer_to_character = dict(enumerate(characters))

print(integer_to_character)

# Swapping the keys with the values to create a character to integer mapping
# Characters as keys and integers as values 
character_to_integer = dict()
for index, char in integer_to_character.items():
  character_to_integer[char] = index

print(len(character_to_integer))

{0: 'f', 1: 'P', 2: 'M', 3: '4', 4: 'c', 5: "'", 6: 'j', 7: 'F', 8: 'U', 9: 'y', 10: 'z', 11: 'ô', 12: '"', 13: 'u', 14: 'k', 15: 'a', 16: 'V', 17: 'Y', 18: 'h', 19: '0', 20: 'o', 21: 'e', 22: '_', 23: '6', 24: '7', 25: '9', 26: ',', 27: 'v', 28: '?', 29: ']', 30: 'W', 31: '\n', 32: 'C', 33: 'i', 34: 'B', 35: 'N', 36: '-', 37: 'd', 38: 'r', 39: 'n', 40: 'K', 41: 'D', 42: 'T', 43: 'E', 44: 'H', 45: '2', 46: ':', 47: '5', 48: '.', 49: '[', 50: 'Z', 51: '1', 52: 'J', 53: 'q', 54: 'X', 55: 'l', 56: 'm', 57: '8', 58: 'O', 59: 'w', 60: '3', 61: 'G', 62: 's', 63: 'b', 64: 'I', 65: 'x', 66: 'S', 67: 't', 68: 'A', 69: ';', 70: 'é', 71: 'ï', 72: '!', 73: ' ', 74: 'L', 75: 'R', 76: 'p', 77: 'g', 78: 'Q'}
79


In [0]:
# Encoding the entire text_corpus using the character to integer mapping

text_corpus_encoded = []

for char in text_corpus:
  text_corpus_encoded.append(character_to_integer[char])
  
text_corpus_encoded = np.array(text_corpus_encoded)


### **Custom Data-batches for training & testing**
The goal is to generate batches with features : stream of characters and targets : same stream of characters but shifted by one character sequentially 

The input to the data loader is the text_corpus but in encoded form (using character to integer mapping)

In [0]:
class TextDataLoader:
  def __init__(self, text_corpus_encoded, no_sequences_per_batch, no_of_characters):
    
    
    self.no_sequences = no_sequences_per_batch
    self.no_characters = no_of_characters
    
    self.batch_size = self.no_sequences * self.no_characters
    self.no_of_batches = len(text_corpus_encoded)//self.batch_size
    
    #print(f"No. of Batches generated from given corpus = {self.no_of_batches}")
    
    # remove extra characters, so we get full batches
    self.text_corpus_encoded = text_corpus_encoded[:self.batch_size * self.no_of_batches]
    
    # Arrange data in no_sequence_per_batch rows and rest columns
    self.text_corpus_encoded = self.text_corpus_encoded.reshape((self.no_sequences,-1))
    
  def make_batches(self):
    # Each step is number of characters apart
    for chars in range(0, self.text_corpus_encoded.shape[1], self.no_characters):
      
      features = self.text_corpus_encoded[:, chars:chars+self.no_characters]
      # Array of zeros with shape similar to features
      lables = np.zeros_like(features) 
      
      try:
        lables[:, :-1] = features[:, 1:] 
        lables[:, -1] = self.text_corpus_encoded[:, chars+self.no_characters]
      except IndexError:
        #print("in except")
        lables[:, :-1] = features[:, 1:] 
        lables[:, -1] = self.text_corpus_encoded[:, 0]
      
      yield features, lables
      
    
  
  def __len__(self):
    '''Return size of the data set'''
    return len(self.text_corpus_encoded)
  
  def __getitem__(self, idx):
    pass
  

In [0]:
# Testing dataLoader and the generator
T = TextDataLoader(text_corpus_encoded, 128,128)
x,y = next(T.make_batches())
print(x[0])
print("*******")
print(y[0])

[42 44 43 73 58  8 42 32 68 66 42 66 31 31 31 64 31 31 42 18 21 73 44 33
 77 18 73 66 67 38 21 21 67 73  4 20 39 62 33 62 67 62 73 20  0 73 67 59
 20 73 38 20 59 62 73 20  0 73 20 39 21 36 62 67 20 38 21  9 21 37 73 18
 20 27 21 55 62 26 73 62 53 13 21 21 10 21 37 31  4 55 20 62 21 73 20 39
 21 73 15 77 15 33 39 62 67 73 15 39 20 67 18 21 38 69 73 20 55 37 73 18
 20 27 21 55 62 73 59 33]
*******
[44 43 73 58  8 42 32 68 66 42 66 31 31 31 64 31 31 42 18 21 73 44 33 77
 18 73 66 67 38 21 21 67 73  4 20 39 62 33 62 67 62 73 20  0 73 67 59 20
 73 38 20 59 62 73 20  0 73 20 39 21 36 62 67 20 38 21  9 21 37 73 18 20
 27 21 55 62 26 73 62 53 13 21 21 10 21 37 31  4 55 20 62 21 73 20 39 21
 73 15 77 15 33 39 62 67 73 15 39 20 67 18 21 38 69 73 20 55 37 73 18 20
 27 21 55 62 73 59 33 67]


### **The LongShortTermMemory Network **

In [0]:
# LSTM net with 4 LSTM layers, 2 Fully Connected layer and 2 dropout layers
# Each LSTM layer is a LSTM cell that takes hidden state and cell state as input 
# and produces new hidden state and cell state as output

# input_size = Vocabulary size
# Output of the network is also of Vocabulary size

class LSTM(nn.ModuleList): 
  def __init__(self, sequence_length, input_size, hidden_size, batch_size):
    super(LSTM,self).__init__()
    
    # used for live debugging, use next command to move to next line
    # set_trace()
    
    self.sequence_length = sequence_length
    self.input_size = input_size
    self.hidden_size = hidden_size
    self.batch_size = batch_size
    
    # LSTMCell 1
    self.lstm1 = nn.LSTMCell(input_size = self.input_size, 
                             hidden_size = self.hidden_size, 
                             bias = True)
    
    # LSTMCell 2
    self.lstm2 = nn.LSTMCell(input_size = self.hidden_size, 
                             hidden_size = self.hidden_size, 
                             bias = True)
    
    # LSTMCell 3
    self.lstm3 = nn.LSTMCell(input_size = self.hidden_size , 
                             hidden_size = self.hidden_size, 
                             bias = True)
    
    # LSTMCell 4
    self.lstm4 = nn.LSTMCell(input_size = self.hidden_size, 
                             hidden_size = self.hidden_size, 
                             bias = True)
    
    # LSTMCell 5
    self.lstm5 = nn.LSTMCell(input_size = self.hidden_size, 
                             hidden_size = self.hidden_size, 
                             bias = True)
    
    # Dropout Layer 1
    self.drop1 = nn.Dropout(p=0.2)
    
    # Fully-connected Layer, out_features = input_size = vocab_size
    self.fc1 = nn.Linear(in_features = self.hidden_size,
                        out_features = self.hidden_size,
                        bias = True)

    # Dropout Layer 2
    self.drop2 = nn.Dropout(p=0.2)
    
    self.fc2 = nn.Linear(in_features = self.hidden_size,
                        out_features = self.input_size,
                        bias = True)
    
    
  def forward(self,inputs, initial_states):
    '''
    Function responsible for the forward pass through the network
    
    Args:
      inputs         : Tuple of batch, input_size at each timestep t
      initail_states : Tuple of hidden state and cell state, initailly set to 0s
    
    Returns:
      Collection of output sequences at each timestep t
    '''
    
    # More debugguing
    # set_trace()

    # Initialize empty output sequence
    output_sequence = torch.empty((self.sequence_length,
                         self.batch_size,
                         self.input_size))
    
    
    # For every time step in the sequence
    for t in range(self.sequence_length):
      
      
      
      # Passing the input sequentially through all LSTMCells
      initial_hidden_state, initial_cell_state = initial_states
      
      initial_hidden_state.requires_grad_()
      initial_hidden_state.cuda()
      
      initial_cell_state.requires_grad_()
      initial_cell_state.cuda()
      
      state1 = self.lstm1(inputs[t], (initial_hidden_state, initial_cell_state))
      hidden_state1, cell_state1 = state1
      
      state2 = self.lstm2(hidden_state1, (initial_hidden_state, initial_cell_state))
      hidden_state2, cell_state2 = state2
      
      '''
      state3 = self.lstm3(hidden_state2, (initial_hidden_state, initial_cell_state))
      hidden_state3, cell_state3 = state3
      
      state4 = self.lstm4(hidden_state3, (initial_hidden_state, initial_cell_state))
      hidden_state4, cell_state4 = state4
      
      state5 = self.lstm5(hidden_state3, (initial_hidden_state, initial_cell_state))
      hidden_state5, cell_state5 = state5
      '''
      
      # Passing output of LSTMCells through dropout and fc layers
      output = self.fc1(self.drop1(hidden_state2))
      
      # add final output to output sequence
      #output_sequence[t] = self.fc2(self.drop2(output))
      output_sequence[t] = self.fc2(output)
      
    # returning output sequence
    return output_sequence.view((self.sequence_length * self.batch_size, -1))
  
  def initialize_hidden_and_cell_states(self):
    # The hidden and cell state at the start are all zeros
    return (torch.zeros(self.batch_size, self.hidden_size, device=device),
            torch.zeros(self.batch_size, self.hidden_size, device=device))

    
    
    

In [0]:
# Setting the device that will be used for training.
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

# Creating the model and load it into the GPU
model = LSTM(sequence_length=128, 
             input_size=len(character_to_integer), 
             hidden_size=512, 
             batch_size=128)
model = model.to(device)

NameError: ignored

In [0]:
# One-Hot encoding a tensor
def to_categorical(input_instance, num_classes):
  """
  Function responsible for one-hot encoding a given tensor
  
  Args:
    input_instance : np.ndarray which is one instance of input that is to 
                     be one-hot encoded
    num_classes    : total no. of classes
  """
  return np.eye(num_classes, dtype='uint8')[input_instance]


### **Optimizer & Loss function**

In [0]:
# Using Adam as Optimizer and CrossEntropyLoss as the loss function

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr = 0.01)

### **Training & Validating**

In [0]:
# Separating validation and training set from the encoded text corpus
# First 90% data used for training and last 10% data used for validation

validation_index = int(len(text_corpus_encoded) *(1-0.1))

validation_set = text_corpus_encoded[validation_index:]
training_set = text_corpus_encoded[:validation_index]

In [0]:
def predict_next_char(model, input_char, sequence_length = 128):
  
  model.eval # set model to eval mode
  
  # Set initail hidden and cell state to 0, but now batch size is 1
  initial_hidden_and_cell_state = (torch.zeros(1, model.hidden_size, device = device),
                                      torch.zeros(1, model.hidden_size, device = device))
  
  
  # placeholder for the generated text
  seq = np.empty(sequence_length+1)
  seq[0] = character_to_integer[input_char]
  
  # Encode the input character, (1, input_size)
  input_char = torch.from_numpy(to_categorical(character_to_integer[input_char], 
                                               num_classes=model.input_size))
  input_char = input_char.float()
  
  input_char.requires_grad_()
  input_char = input_char.to(device)
  
  # Add fake dimension, as input to network needs to be batches
  input_char = input_char.unsqueeze(0)
  
  # Forward pass, without dropout layers 
  for t in range(sequence_length):
    out = model.lstm1(input_char, initial_hidden_and_cell_state)
    hidden1, cell1 = out
    
    out = model.lstm2(hidden1, initial_hidden_and_cell_state)
    hidden2, cell2 = out


    out = model.lstm3(hidden2, initial_hidden_and_cell_state)
    hidden3, cell3 = out

    out = model.lstm4(hidden3, initial_hidden_and_cell_state)
    hidden4, cell4 = out

    # Passing output of LSTMCells through fc layers
    out = model.fc1(hidden4)

    out = model.fc2(out)

    # Applying softmax, to get probabilities
    out = F.softmax(out, dim=1)
    
    # Out is now a vector of (1, input_size)
    # Get top 5 best predictions based on softmax probabilities
    prob,top_characters = out.topk(5)
    
    top_characters =top_characters.to("cpu")
    prob =prob.to("cpu")
                                      
    top_characters = top_characters.squeeze().numpy()
    prob = prob.detach().squeeze().numpy()


    
    char = np.random.choice(top_characters, p = prob/prob.sum())

    # append the character to the output sequence
    seq[t+1] = char

    # prepare the character to be fed to the next LSTM cell
    char = to_categorical(char, num_classes=model.input_size)
    char = torch.from_numpy(char).unsqueeze(0)
    
    # Type conversion before feeding the new char into LSTM
    #input_char = char.byte()
    input_char = char.float()
    #input_char.requires_grad_()
    input_char = input_char.to(device)
    
  return seq

In [0]:
# empty list to store model output
model_output_list = []

# Training the model
for epoch in range(50):
  
  # Initialize hidden and cell state to zero at the beginning
  hidden_and_cell_state = model.initialize_hidden_and_cell_states()
  
  
  # Creating instance of dataloader class
  T = TextDataLoader(training_set, 128,128)
  
  # To find average loss per epoch
  batch_count = 0
  loss_sum = 0
  
  for i, (feature, label) in enumerate(T.make_batches()):
    
    # One-hot encode inputs, convert into tensor and transpose them
    feature = torch.from_numpy(to_categorical(feature, num_classes=model.input_size).transpose([1, 0, 2]))
    # Converting input to FloatTensor
    feature = feature.float()
    
    # Transpose labels and convert into LongTensor
    label = torch.from_numpy(label.T).type(torch.LongTensor)
    
    # Wrap features and labels in PyTorch Variable
    feature, label = Variable(feature),Variable(label)
    
    # Convert features and labels to type torch.cuda and load them into the GPU
    feature.requires_grad_()
    feature = feature.to(device)
   
    #label.requires_grad_()
    #label = feature.to(device)
    
    optimizer.zero_grad() # Makes sure the gradients are initially zero
    
    out = model(feature, hidden_and_cell_state) # Forward pass
    
    loss = criterion(out,label.contiguous().view(128*128)) # Computing loss
    
    loss.backward() # Back-Prop the loss / Backward Pass

    optimizer.step() # Update the gradients
    
    # Validating every 10 epochs
    if i%10 == 0:
        # Initialize hidden and cell state to zero at the beginning
        hidden_and_cell_state = model.initialize_hidden_and_cell_states()

        # Creating instance of dataloader class
        V = TextDataLoader(validation_set, 128,128)

        for i, (val_feature, val_label) in enumerate(V.make_batches()):

          # One-hot encode inputs, convert into tensor and transpose them
          val_feature = torch.from_numpy(to_categorical(val_feature, num_classes=model.input_size).transpose([1, 0, 2]))
          # Converting input to FloatTensor
          val_feature = val_feature.float()
          val_feature.requires_grad_()
          val_feature = val_feature.to(device)

          # Transpose labels and convert into LongTensor
          val_label = torch.from_numpy(val_label.T).type(torch.LongTensor)

          val_out = model(val_feature, hidden_and_cell_state) # Forward pass

          loss = criterion(val_out,val_label.contiguous().view(128*128)) # Computing loss

          model_output_list.append(''.join([integer_to_character[k] for k in predict_next_char(model, 'A', sequence_length = 1024)]))

        print(f'Model output for epoch {epoch+1} :')
        print(model_output_list[epoch])

    print(f'Batch : {i+1} Loss : {loss}')
    
    loss_sum +=loss
    batch_count +=1

  print(f'Epoch : {epoch+1} Average Loss : {loss_sum/batch_count}')

    
    
    
    
  

In [0]:
print(model_output_list[-1])

Aa issasi sitsitaa sa its  sss   aisa  s s ai aisiassttstata  tatat isstisitstttitsaaiaaatsaitatattaa ts  i tt atatt a aiia s tats attiit i  aa asa ss t a  t  istiatii  iitaiis aia aiaatata   it tsstiasatiaiit atasia  iii  sst  st aa  aat iiii ttits  attatst saiattttt si aa   asti assi t t iai   ti tiistaiit i   t astss i isa aiasiai sisasattastts tt aaaa statatasistiasitts i itasiiit saaat iast i a tis  atsi  sts ttiai issta   ii  tsatasttsaassttai is    ats ii   atssstaisa asaatsatsststs ttts   ai t tt  ita ita tsassaststisaiiaia ia  sts   ttsstatstastais t isst ss  aaa ata sttts is a aaaai isita atsatiaastisa  itit ai sta ss sttasisas iaat atta tsiii tstaitiit s ttst  sita tsasi a  ss tt t ats sitt ts ittiitat ataiasaa saitst ti  a isa istit iaii i s saistttai a iissttsisias i iaitissss  s  tta a aistiaistias  s at    stts a i sstat sttsa s t stitt tti ati ti st a  sa  ss  i  a t ttt i tt itiaaisissaa attt iaa asaa tas  s iitiit t  itsitist s saaaaiti tttst   aiia   sist i  t ati i 

### **Saving model data to google drive and fetching it for future use**

In [0]:
# Saving the model
torch.save(model.state_dict(), 'model_data_initial_state_100.pt')

# Check size of model state dictionary
!ls -l

total 41700
-rw-r--r-- 1 root root     2578 Nov 23 18:02 adc.json
-rw-r--r-- 1 root root 39744985 Nov 24 03:03 model_data_initial_state_100.pt
drwxr-xr-x 2 root root     4096 Nov 20 18:17 sample_data
-rw-r--r-- 1 root root  2943701 Nov 23 18:02 text_corpus


In [0]:
# Upload to drive and work with it
from google.colab import files
files.download('model_data_initial_state_100.pt')

In [0]:
# Fetching the model_data file from google drive
fileId = drive.CreateFile({'id': ''}) # id of model_data file
file = fileId['title']
fileId.GetContentFile(file)  # Save Drive file as a local file

In [0]:
# Creating a model and Loading the state data saved earlier

# model = LSTM(sequence_length=128, 
#             input_size=len(character_to_integer), 
#             hidden_size=512, 
#             batch_size=128).to(device) # If running on GPU

model = LSTM(sequence_length=128, 
             input_size=len(character_to_integer), 
             hidden_size=512, 
             batch_size=128) # if running on CPU

model.load_state_dict(torch.load('model_data.pt'))

### **Validating the model on the validation data set**

In [0]:
# Validation loop
model.eval()

for epoch in range(10):
  
  # Initialize hidden and cell state to zero at the beginning
  hidden_and_cell_state = model.initialize_hidden_and_cell_states()
   
  # Creating instance of dataloader class
  T = TextDataLoader(validation_set, 128,128)

  for i, (feature, label) in enumerate(T.make_batches()):
    
    # One-hot encode inputs, convert into tensor and transpose them
    feature = torch.from_numpy(to_categorical(feature, num_classes=model.input_size).transpose([1, 0, 2]))
    # Converting input to FloatTensor
    feature = feature.float()
    feature.requires_grad_()
    feature = feature.to(device)
    
    # Transpose labels and convert into LongTensor
    label = torch.from_numpy(label.T).type(torch.LongTensor)
       
    out = model(feature, hidden_and_cell_state) # Forward pass
    
    loss = criterion(out,label.contiguous().view(128*128)) # Computing loss
    
    print(f'Batch : {i+1} Loss : {loss}')

  print(f'Epoch : {epoch+1} Loss : {loss}')

No. of Batches generated from given corpus = 1
in except
Batch : 1 Loss : 2.555915117263794
Epoch : 1 Loss : 2.555915117263794
No. of Batches generated from given corpus = 1
in except
Batch : 1 Loss : 2.555915117263794
Epoch : 2 Loss : 2.555915117263794
No. of Batches generated from given corpus = 1
in except
Batch : 1 Loss : 2.555915117263794
Epoch : 3 Loss : 2.555915117263794
No. of Batches generated from given corpus = 1
in except
Batch : 1 Loss : 2.555915117263794
Epoch : 4 Loss : 2.555915117263794
No. of Batches generated from given corpus = 1
in except
Batch : 1 Loss : 2.555915117263794
Epoch : 5 Loss : 2.555915117263794
No. of Batches generated from given corpus = 1
in except
Batch : 1 Loss : 2.555915117263794
Epoch : 6 Loss : 2.555915117263794
No. of Batches generated from given corpus = 1
in except
Batch : 1 Loss : 2.555915117263794
Epoch : 7 Loss : 2.555915117263794
No. of Batches generated from given corpus = 1
in except
Batch : 1 Loss : 2.555915117263794
Epoch : 8 Loss : 2.

Validation loop achieves loss values similar to that of a converged network 

### **Predict output**

In [0]:
# Testing on smaller sequence length
seq = predict_next_char(model, 'A', sequence_length = 128)

# Convert sequence elements type to int, 
# else we wont be able to use them as keys for integer_to_character dictionary 
seq = seq.astype(int)
print(seq)

[71 69  3 69 31 31 57 48 31  3 57 48 69 31 48 31 69 31 57 31 48  3 69 31
 48 48 57 31 69 57 48 48 31 69 48 48 48 31 57 31 48 48 31 31 31 57  3 48
 69 69  3 31 48 57 57 31 69 31 57 48  3 48 69 31 57 69 57  3 69  3  3 31
 57 48 57 69 31 48 31 31  3 48 31  3 48 57 48 31  3 31 57  3 69 48  3 31
  3  3 48 48 48 57  3 69 31  3  3 48 57 69 31 57 69 31 57 57  3 69  3 48
 69  3 31  3 31  3 69 69 57]


In [0]:
character_sequence = [integer_to_character[i] for i in seq]
print(character_sequence)

['A', 'o', 's', 'o', 'a', 'a', ' ', 't', 'a', 's', ' ', 't', 'o', 'a', 't', 'a', 'o', 'a', ' ', 'a', 't', 's', 'o', 'a', 't', 't', ' ', 'a', 'o', ' ', 't', 't', 'a', 'o', 't', 't', 't', 'a', ' ', 'a', 't', 't', 'a', 'a', 'a', ' ', 's', 't', 'o', 'o', 's', 'a', 't', ' ', ' ', 'a', 'o', 'a', ' ', 't', 's', 't', 'o', 'a', ' ', 'o', ' ', 's', 'o', 's', 's', 'a', ' ', 't', ' ', 'o', 'a', 't', 'a', 'a', 's', 't', 'a', 's', 't', ' ', 't', 'a', 's', 'a', ' ', 's', 'o', 't', 's', 'a', 's', 's', 't', 't', 't', ' ', 's', 'o', 'a', 's', 's', 't', ' ', 'o', 'a', ' ', 'o', 'a', ' ', ' ', 's', 'o', 's', 't', 'o', 's', 'a', 's', 'a', 's', 'o', 'o', ' ']


In [0]:
res = ''.join(character_sequence)
print(res)

Aosoaa tas toataoa atsoatt ao ttaottta attaaa stoosat  aoa tstoa o sossa t oataastast tasa sotsassttt soasst oa oa  sostosasasoo 


### **Make the model write stuff from a seed character**

In [0]:
# Run model on validation data and see what model writes
# Starting seed is a
model.eval()

# empty list to store model output
model_output_list = []

for epoch in range(50):
  
  # Initialize hidden and cell state to zero at the beginning
  hidden_and_cell_state = model.initialize_hidden_and_cell_states()
   
  # Creating instance of dataloader class
  T = TextDataLoader(validation_set, 128,128)

  for i, (feature, label) in enumerate(T.make_batches()):
    
    # One-hot encode inputs, convert into tensor and transpose them
    feature = torch.from_numpy(to_categorical(feature, num_classes=model.input_size).transpose([1, 0, 2]))
    # Converting input to FloatTensor
    feature = feature.float()
    feature.requires_grad_()
    feature = feature.to(device)
    
    # Transpose labels and convert into LongTensor
    label = torch.from_numpy(label.T).type(torch.LongTensor)
       
    out = model(feature, hidden_and_cell_state) # Forward pass
    
    loss = criterion(out,label.contiguous().view(128*128)) # Computing loss
    
    model_output_list.append(''.join([integer_to_character[k] for k in predict_next_char(model, 'A', sequence_length = 1024)]))

  print(f'Model output for epoch {epoch+1} :')
  print(model_output_list[epoch])

No. of Batches generated from given corpus = 1
in except
Model output for epoch 1 :
Aaaos ts oataastast  stat saot sssaaota  os soott tsottaatsa  ot  tsasts osstsotsooattttao t otstsoo otoooto aotatooaaoat satttsssaootaaaao aoassotssasa ooaa sttoattasto tota oo aoat aas taotoaoaooatosaaoatassastt ttat astastoa asatasttaooosaos  o oootoootat ttaso tttsaoassoos  sattsst aostotatsaatsoss o s ttststsaos saas taaoastto tsss otaosoaaoos atossttatoooooosoa aasa at  at taaso tossoo   o atto oot ot stas  sst so ta assattt staotoao otastas ott oasstostso ts tots too  tasooaattassoooto  to otsttos oaossta oott tsota o o  soaoat  tatoo   s sttstasottssstotooo ooo ao aat ssotoa a ttttaas  otsoaoo ssostta attts taaataa  ss s o tstoooott o oao taotttoss   oasoa ttst sossao a otas ot  ssa  aoaaa t a oao  o tasostoaos  oaassoo taaot t ootas s o  aootsasaotaoassttata asaatstto o a ta oaos o  aataooot saataaa a attsaa tattaotota ass ss  to taoss ss    tso tttt stoto a  aa ao ssossatsasttaoootaaao  ts  ot