# **Character Prediction Engine**

This is the heart of the smart-keys agent.

Its a LSTM that will take in some representation of the input and return the next character in the sequence.

In [0]:
# Code to check the resources available Ex. GPU RAM
# memory footprint support libraries/code
!ln -sf /opt/bin/nvidia-smi /usr/bin/nvidia-smi
!pip install gputil
!pip install psutil
!pip install humanize
import psutil
import humanize
import os
import GPUtil as GPU
GPUs = GPU.getGPUs()
# XXX: only one GPU on Colab and isn’t guaranteed
gpu = GPUs[0]
def printm():
 process = psutil.Process(os.getpid())
 print("Gen RAM Free: " + humanize.naturalsize( psutil.virtual_memory().available ), " | Proc size: " + humanize.naturalsize( process.memory_info().rss))
 print("GPU RAM Free: {0:.0f}MB | Used: {1:.0f}MB | Util {2:3.0f}% | Total {3:.0f}MB".format(gpu.memoryFree, gpu.memoryUsed, gpu.memoryUtil*100, gpu.memoryTotal))
printm()



Collecting gputil
  Downloading https://files.pythonhosted.org/packages/45/99/837428d26b47ebd6b66d6e1b180e98ec4a557767a93a81a02ea9d6242611/GPUtil-1.3.0.tar.gz
Building wheels for collected packages: gputil
  Running setup.py bdist_wheel for gputil ... [?25l- done
[?25h  Stored in directory: /root/.cache/pip/wheels/17/0f/04/b79c006972335e35472c0b835ed52bfc0815258d409f560108
Successfully built gputil
Installing collected packages: gputil
Successfully installed gputil-1.3.0
Gen RAM Free: 12.9 GB  | Proc size: 141.9 MB
GPU RAM Free: 11441MB | Used: 0MB | Util   0% | Total 11441MB


#STEPS:

Libraries & Dependencies:
1. Install PyTorch
2. Install any other dependencies/ libraries
3. Import all required libraries including PyTorch

Getting, loading and preparing data:
4. Get text_corpus file from drive to colab instance
5. Read text_corpus file 
6. Prepare the data from text_corpus file for training and testing 
(Find set of characters in corpus, one-hot encode characters depending on vocabulary etc.)

Create, Train and Validate the model:
7. Split the encoded text_corpus into train and validation data
8. Create the LSTM model
9. Train the data using the training encoded text_corpus
10. Validate the model using the validation text_corpus


### **Installing required libraries and dependencies**

In [1]:
# Install pytorch
# http://pytorch.org/
from os import path
from wheel.pep425tags import get_abbr_impl, get_impl_ver, get_abi_tag
platform = '{}{}-{}'.format(get_abbr_impl(), get_impl_ver(), get_abi_tag())

accelerator = 'cu80' if path.exists('/opt/bin/nvidia-smi') else 'cpu'

!pip install -q http://download.pytorch.org/whl/{accelerator}/torch-0.4.0-{platform}-linux_x86_64.whl torchvision

tcmalloc: large alloc 1073750016 bytes == 0x5c32a000 @  0x7f110f0312a4 0x591a07 0x5b5d56 0x502e9a 0x506859 0x502209 0x502f3d 0x506859 0x504c28 0x502540 0x502f3d 0x506859 0x504c28 0x502540 0x502f3d 0x506859 0x504c28 0x502540 0x502f3d 0x507641 0x502209 0x502f3d 0x506859 0x504c28 0x502540 0x502f3d 0x507641 0x504c28 0x502540 0x502f3d 0x507641


In [2]:
#Check install and version
import torch
print(torch.__version__)

# Check cuda avaialability
if torch.cuda.is_available():
   print("Yay!!")
    

0.4.0
Yay!!


In [7]:
# Installing PyDrive
!pip install PyDrive

Collecting PyDrive
[?25l  Downloading https://files.pythonhosted.org/packages/52/e0/0e64788e5dd58ce2d6934549676243dc69d982f198524be9b99e9c2a4fd5/PyDrive-1.3.1.tar.gz (987kB)
[K    100% |████████████████████████████████| 993kB 20.1MB/s 
Building wheels for collected packages: PyDrive
  Running setup.py bdist_wheel for PyDrive ... [?25l- \ done
[?25h  Stored in directory: /root/.cache/pip/wheels/fa/d2/9a/d3b6b506c2da98289e5d417215ce34b696db856643bad779f4
Successfully built PyDrive
Installing collected packages: PyDrive
Successfully installed PyDrive-1.3.1


### **Importing required libraries and dependencies**

In [0]:
# NumPy & Pytorch

import numpy as np

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torch.utils.data as data

from torch.autograd import Variable
from torch.utils.data import Dataset, DataLoader

In [0]:
# Set_trace is used for debugging
from IPython.core.debugger import set_trace

In [0]:
# PyDrive
# For importing data-set from Google Drive -> Colab VM 

from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials

# Authenticate and create the PyDrive client.
# This only needs to be done once in a notebook.
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)

### **Loading text_corpus into the VM**

In [9]:
fileId = drive.CreateFile({'id': '1ZHd8wJx1o-O4O_mUxM8pfJKK7gX5SXZT'}) # id of the file being uploaded
print (fileId['title'])  # text_corpus
file = fileId['title']
fileId.GetContentFile(file)  # Save Drive file on Colab VM as a local file

text_corpus


In [10]:
# Check if text_corlus is present in the current working directory
!ls

adc.json  sample_data  text_corpus


### **Read text_corpus and prepare for training**

In [0]:
with open('text_corpus', 'r') as f:
    text_corpus = f.read()

# print(len(text_corpus))

In [12]:
# Set of all unique characters in the corpus, will give us size of vocabulary 
# Will have a-z, A-Z, special characters and numbers present in the text_corpus
characters = set(text_corpus)

print(len(characters))

103


In [13]:
# Creating a dictionary with integers as keys and charactes as values
integer_to_character = dict(enumerate(characters))

print(integer_to_character)

# Swapping the keys with the values to create a character to integer mapping
# Characters as keys and integers as values 
character_to_integer = dict()
for index, char in integer_to_character.items():
  character_to_integer[char] = index

print(len(character_to_integer))

{0: 'œ', 1: 'R', 2: '3', 3: 'x', 4: 'o', 5: 'è', 6: 'F', 7: '[', 8: 'm', 9: '8', 10: 'h', 11: ' ', 12: 'ë', 13: '\n', 14: 'B', 15: 'æ', 16: 'Q', 17: '‘', 18: 'c', 19: 'e', 20: 'k', 21: ';', 22: '1', 23: 'à', 24: 'G', 25: '£', 26: '!', 27: 'p', 28: 'P', 29: 'â', 30: '"', 31: 'N', 32: 'W', 33: 'O', 34: '6', 35: '}', 36: ',', 37: 'L', 38: 'u', 39: '“', 40: 't', 41: '’', 42: '$', 43: '-', 44: 'S', 45: 'I', 46: 'g', 47: 'v', 48: '*', 49: 'w', 50: 'é', 51: ')', 52: 'i', 53: 'ö', 54: 'J', 55: 'Z', 56: 'r', 57: 'l', 58: 'd', 59: 'M', 60: '2', 61: '7', 62: '(', 63: '.', 64: 'a', 65: 'f', 66: '9', 67: 'ï', 68: ']', 69: 'y', 70: 'Y', 71: 's', 72: 'A', 73: '5', 74: 'X', 75: 'ê', 76: '0', 77: 'b', 78: 'E', 79: 'U', 80: 'q', 81: 'n', 82: '”', 83: '&', 84: '`', 85: '?', 86: 'j', 87: ':', 88: 'á', 89: 'V', 90: 'D', 91: 'ô', 92: 'T', 93: 'K', 94: '{', 95: 'H', 96: "'", 97: '/', 98: '4', 99: 'z', 100: '_', 101: '—', 102: 'C'}
103


In [0]:
# Encoding the entire text_corpus using the character to integer mapping

text_corpus_encoded = []

for char in text_corpus:
  text_corpus_encoded.append(character_to_integer[char])
  
text_corpus_encoded = np.array(text_corpus_encoded)


### **Custom Data-batches for training & testing**
The goal is to generate batches with features : stream of characters and targets : same stream of characters but shifted by one character sequentially 

The input to the data loader is the text_corpus but in encoded form (using character to integer mapping)

In [0]:
class TextDataLoader:
  def __init__(self, text_corpus_encoded, no_sequences_per_batch, no_of_characters):
    
    
    self.no_sequences = no_sequences_per_batch
    self.no_characters = no_of_characters
    
    self.batch_size = self.no_sequences * self.no_characters
    self.no_of_batches = len(text_corpus_encoded)//self.batch_size
    
    #print(f"No. of Batches generated from given corpus = {self.no_of_batches}")
    
    # remove extra characters, so we get full batches
    self.text_corpus_encoded = text_corpus_encoded[:self.batch_size * self.no_of_batches]
    
    # Arrange data in no_sequence_per_batch rows and rest columns
    self.text_corpus_encoded = self.text_corpus_encoded.reshape((self.no_sequences,-1))
    
  def make_batches(self):
    # Each step is number of characters apart
    for chars in range(0, self.text_corpus_encoded.shape[1], self.no_characters):
      
      features = self.text_corpus_encoded[:, chars:chars+self.no_characters]
      # Array of zeros with shape similar to features
      lables = np.zeros_like(features) 
      
      try:
        lables[:, :-1] = features[:, 1:] 
        lables[:, -1] = self.text_corpus_encoded[:, chars+self.no_characters]
      except IndexError:
        #print("in except")
        lables[:, :-1] = features[:, 1:] 
        lables[:, -1] = self.text_corpus_encoded[:, 0]
      
      yield features, lables
      
    
  
  def __len__(self):
    '''Return size of the data set'''
    return len(self.text_corpus_encoded)
  
  def __getitem__(self, idx):
    pass
  

In [16]:
# Testing dataLoader and the generator
T = TextDataLoader(text_corpus_encoded, 128,128)
x,y = next(T.make_batches())
print(x[0])
print("*******")
print(y[0])

[92 10 19 11 31 19 57 57 52 19 36 11 64 11 18 56 38 52 71 52 81 46 11 69
 64 49 57 36 11 71 49 38 81 46 11 40  4 11 10 19 56 11 64 81 18 10  4 56
 11 49 52 40 10  4 38 40 11 64 11 65 57 38 40 40 19 56 11  4 65 13 40 10
 19 11 71 64 52 57 71 36 11 64 81 58 11 49 64 71 11 64 40 11 56 19 71 40
 63 11 92 10 19 11 65 57  4  4 58 11 10 64 58 11  8 64 58 19 36 11 40 10
 19 11 49 52 81 58 11 49]
*******
[10 19 11 31 19 57 57 52 19 36 11 64 11 18 56 38 52 71 52 81 46 11 69 64
 49 57 36 11 71 49 38 81 46 11 40  4 11 10 19 56 11 64 81 18 10  4 56 11
 49 52 40 10  4 38 40 11 64 11 65 57 38 40 40 19 56 11  4 65 13 40 10 19
 11 71 64 52 57 71 36 11 64 81 58 11 49 64 71 11 64 40 11 56 19 71 40 63
 11 92 10 19 11 65 57  4  4 58 11 10 64 58 11  8 64 58 19 36 11 40 10 19
 11 49 52 81 58 11 49 64]


### **The LongShortTermMemory Network **

In [0]:
# LSTM net with 4 LSTM layers, 2 Fully Connected layer and 2 dropout layers
# Each LSTM layer is a LSTM cell that takes hidden state and cell state as input 
# and produces new hidden state and cell state as output

# input_size = Vocabulary size
# Output of the network is also of Vocabulary size

class LSTM(nn.ModuleList): 
  def __init__(self, sequence_length, input_size, hidden_size, batch_size):
    super(LSTM,self).__init__()
    
    # used for live debugging, use next command to move to next line
    # set_trace()
    
    self.sequence_length = sequence_length
    self.input_size = input_size
    self.hidden_size = hidden_size
    self.batch_size = batch_size
    
    # LSTMCell 1
    self.lstm1 = nn.LSTMCell(input_size = self.input_size, 
                             hidden_size = self.hidden_size, 
                             bias = True)
    
    # LSTMCell 2
    self.lstm2 = nn.LSTMCell(input_size = self.hidden_size, 
                             hidden_size = self.hidden_size, 
                             bias = True)
    
    # LSTMCell 3
    self.lstm3 = nn.LSTMCell(input_size = self.hidden_size , 
                             hidden_size = self.hidden_size, 
                             bias = True)
    
    # LSTMCell 4
    self.lstm4 = nn.LSTMCell(input_size = self.hidden_size, 
                             hidden_size = self.hidden_size, 
                             bias = True)
    
    # LSTMCell 5
    self.lstm5 = nn.LSTMCell(input_size = self.hidden_size, 
                             hidden_size = self.hidden_size, 
                             bias = True)
    
    # Dropout Layer 1
    self.drop1 = nn.Dropout(p=0.5)
    
    # Fully-connected Layer, out_features = input_size = vocab_size
    self.fc1 = nn.Linear(in_features = self.hidden_size,
                        out_features = self.hidden_size,
                        bias = True)

    # Dropout Layer 2
    self.drop2 = nn.Dropout(p=0.5)
    
    self.fc2 = nn.Linear(in_features = self.hidden_size,
                        out_features = self.input_size,
                        bias = True)
    
    
  def forward(self,inputs, initial_states):
    '''
    Function responsible for the forward pass through the network
    
    Args:
      inputs         : Tuple of batch, input_size at each timestep t
      initail_states : Tuple of hidden state and cell state, initailly set to 0s
    
    Returns:
      Collection of output sequences at each timestep t
    '''
    
    # More debugguing
    # set_trace()

    # Initialize empty output sequence
    output_sequence = torch.empty((self.sequence_length,
                         self.batch_size,
                         self.input_size))
    
    
    # For every time step in the sequence
    for t in range(self.sequence_length):
     
      # Passing the input sequentially through all LSTMCells
      initial_hidden_state, initial_cell_state = initial_states
      
      initial_hidden_state.requires_grad_()
      initial_hidden_state.cuda()
      
      initial_cell_state.requires_grad_()
      initial_cell_state.cuda()
      
      state1 = self.lstm1(inputs[t], (initial_hidden_state, initial_cell_state))
      hidden_state1, cell_state1 = state1
      
      state2 = self.lstm2(hidden_state1, (initial_hidden_state, initial_cell_state))
      hidden_state2, cell_state2 = state2
      
      state3 = self.lstm3(hidden_state2, (initial_hidden_state, initial_cell_state))
      hidden_state3, cell_state3 = state3
      
      state4 = self.lstm4(hidden_state3, (initial_hidden_state, initial_cell_state))
      hidden_state4, cell_state4 = state4
      
      state5 = self.lstm5(hidden_state3, (initial_hidden_state, initial_cell_state))
      hidden_state5, cell_state5 = state5
      
      # Passing output of LSTMCells through dropout and fc layers
      output = self.fc1(self.drop1(hidden_state5))
      
      # add final output to output sequence
      #output_sequence[t] = self.fc2(self.drop2(output))
      output_sequence[t] = self.fc2(output)
      
    # returning output sequence
    return output_sequence.view((self.sequence_length * self.batch_size, -1))
  
  def initialize_hidden_and_cell_states(self):
    # The hidden and cell state at the start are all zeros
    return (torch.zeros(self.batch_size, self.hidden_size, device=device),
            torch.zeros(self.batch_size, self.hidden_size, device=device))

    
    
    

In [18]:
# Setting the device that will be used for training.
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

# Creating the model and load it into the GPU
model = LSTM(sequence_length=128, 
             input_size=len(character_to_integer), 
             hidden_size=512, 
             batch_size=128)
model = model.to(device)

cuda:0


In [0]:
# One-Hot encoding a tensor
def to_categorical(input_instance, num_classes):
  """
  Function responsible for one-hot encoding a given tensor
  
  Args:
    input_instance : np.ndarray which is one instance of input that is to 
                     be one-hot encoded
    num_classes    : total no. of classes
  """
  return np.eye(num_classes, dtype='uint8')[input_instance]


### **Optimizer & Loss function**

In [0]:
# Using Adam as Optimizer and CrossEntropyLoss as the loss function

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr = 0.01)

### **Training & Validating**

In [0]:
# Separating validation and training set from the encoded text corpus
# First 80% data used for training and last 20% data used for validation

validation_index = int(len(text_corpus_encoded) *(1-0.2))

validation_set = text_corpus_encoded[validation_index:]
training_set = text_corpus_encoded[:validation_index]

In [22]:
# Training the model
for epoch in range(10):
  
  # Initialize hidden and cell state to zero at the beginning
  hidden_and_cell_state = model.initialize_hidden_and_cell_states()
  
  
  # Creating instance of dataloader class
  T = TextDataLoader(training_set, 128,128)
  
  # To find average loss per epoch
  batch_count = 0
  loss_sum = 0
  
  for i, (feature, label) in enumerate(T.make_batches()):
    
    # One-hot encode inputs, convert into tensor and transpose them
    feature = torch.from_numpy(to_categorical(feature, num_classes=model.input_size).transpose([1, 0, 2]))
    # Converting input to FloatTensor
    feature = feature.float()
    
    # Transpose labels and convert into LongTensor
    label = torch.from_numpy(label.T).type(torch.LongTensor)
    
    # Wrap features and labels in PyTorch Variable
    feature, label = Variable(feature),Variable(label)
    
    # Convert features and labels to type torch.cuda and load them into the GPU
    feature.requires_grad_()
    feature = feature.to(device)
   
    #label.requires_grad_()
    #label = feature.to(device)
    
    optimizer.zero_grad() # Makes sure the gradients are initially zero
    
    out = model(feature, hidden_and_cell_state) # Forward pass
    
    loss = criterion(out,label.contiguous().view(128*128)) # Computing loss
    
    loss.backward() # Back-Prop the loss / Backward Pass

    optimizer.step() # Update the gradients
    
    print(f'Batch : {i+1} Loss : {loss}')
    
    loss_sum +=loss
    batch_count +=1

  print(f'Epoch : {epoch+1} Average Loss : {loss_sum/batch_count}')

    
    
    
    
  

Batch : 1 Loss : 4.6424713134765625
Batch : 2 Loss : 4.361251354217529
Batch : 3 Loss : 3.287141799926758
Batch : 4 Loss : 6.865812301635742
Batch : 5 Loss : 3.685643196105957
Batch : 6 Loss : 3.3834519386291504
Batch : 7 Loss : 3.4586434364318848
Batch : 8 Loss : 3.4660463333129883
Batch : 9 Loss : 3.4003405570983887
Batch : 10 Loss : 3.293982744216919
Batch : 11 Loss : 3.1509416103363037
Batch : 12 Loss : 3.1774771213531494
Batch : 13 Loss : 3.185244083404541
Batch : 14 Loss : 3.214449882507324
Batch : 15 Loss : 3.3372490406036377
Batch : 16 Loss : 3.1381304264068604
Batch : 17 Loss : 3.1278157234191895
Batch : 18 Loss : 3.0291531085968018
Batch : 19 Loss : 3.148047924041748
Batch : 20 Loss : 3.209872245788574
Batch : 21 Loss : 5.432459354400635
Batch : 22 Loss : 3.4241104125976562
Batch : 23 Loss : 3.632106065750122
Batch : 24 Loss : 3.176801919937134
Batch : 25 Loss : 3.3661158084869385
Batch : 26 Loss : 3.221060276031494
Batch : 27 Loss : 3.3445496559143066
Batch : 28 Loss : 3.162

In [0]:
# Just to check if all data was loaded into GPU
print(next(model.parameters()).is_cuda)
print(feature.is_cuda)
print(label.is_cuda) # Labels dont need to be loaded

True
True
False


In [0]:
'''

# CPU based model and training, too slow Colab timing out
# Creating the model and run it on CPU

model_cpu = LSTM(sequence_length=128, 
             input_size=len(character_to_integer), 
             hidden_size=512, 
             batch_size=128)

# Using Adam as Optimizer and CrossEntropyLoss as the loss function

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model_cpu.parameters(), lr = 0.01)

# Training loop

for epoch in range(10):
  
  # Initialize hidden and cell state to zero at the beginning
  hidden_and_cell_state = model_cpu.initialize_hidden_and_cell_states()
   
  # Creating instance of dataloader class
  T = TextDataLoader(training_set, 128,128)

  for i, (feature, label) in enumerate(T.make_batches()):
    
    # One-hot encode inputs, convert into tensor and transpose them
    feature = torch.from_numpy(to_categorical(feature, num_classes=model_cpu.input_size).transpose([1, 0, 2]))
    # Converting input to FloatTensor
    feature = feature.float()
    
    # Transpose labels and convert into LongTensor
    label = torch.from_numpy(label.T).type(torch.LongTensor)
      
    optimizer.zero_grad() # Makes sure the gradients are initially zero
    
    out = model_cpu(feature, hidden_and_cell_state) # Forward pass
    
    loss = criterion(out,label.contiguous().view(128*128)) # Computing loss
    
    loss.backward() # Back-Prop the loss / Backward Pass

    optimizer.step() # Update the gradients
    
    print(f'Batch : {i+1} Loss : {loss}')

  print(f'Epoch : {epoch+1} Loss : {loss}')

'''

### **Saving model data to google drive and fetching it for future use**

In [0]:
# Saving the model
torch.save(model.state_dict(), 'model_data_initial_state_100.pt')

# Check size of model state dictionary
!ls -l

total 41700
-rw-r--r-- 1 root root     2578 Nov 23 18:02 adc.json
-rw-r--r-- 1 root root 39744985 Nov 24 03:03 model_data_initial_state_100.pt
drwxr-xr-x 2 root root     4096 Nov 20 18:17 sample_data
-rw-r--r-- 1 root root  2943701 Nov 23 18:02 text_corpus


In [0]:
# Upload to drive and work with it
from google.colab import files
files.download('model_data_initial_state_100.pt')

In [0]:
# Fetching the model_data file from google drive
fileId = drive.CreateFile({'id': ''}) # id of model_data file
file = fileId['title']
fileId.GetContentFile(file)  # Save Drive file as a local file

In [0]:
# Creating a model and Loading the state data saved earlier

# model = LSTM(sequence_length=128, 
#             input_size=len(character_to_integer), 
#             hidden_size=512, 
#             batch_size=128).to(device) # If running on GPU

model = LSTM(sequence_length=128, 
             input_size=len(character_to_integer), 
             hidden_size=512, 
             batch_size=128) # if running on CPU

model.load_state_dict(torch.load('model_data.pt'))

### **Validating the model on the validation data set**

In [0]:
# Validation loop
model.eval()

for epoch in range(10):
  
  # Initialize hidden and cell state to zero at the beginning
  hidden_and_cell_state = model.initialize_hidden_and_cell_states()
   
  # Creating instance of dataloader class
  T = TextDataLoader(validation_set, 128,128)

  for i, (feature, label) in enumerate(T.make_batches()):
    
    # One-hot encode inputs, convert into tensor and transpose them
    feature = torch.from_numpy(to_categorical(feature, num_classes=model.input_size).transpose([1, 0, 2]))
    # Converting input to FloatTensor
    feature = feature.float()
    feature.requires_grad_()
    feature = feature.to(device)
    
    # Transpose labels and convert into LongTensor
    label = torch.from_numpy(label.T).type(torch.LongTensor)
       
    out = model(feature, hidden_and_cell_state) # Forward pass
    
    loss = criterion(out,label.contiguous().view(128*128)) # Computing loss
    
    print(f'Batch : {i+1} Loss : {loss}')

  print(f'Epoch : {epoch+1} Loss : {loss}')

Batch : 1 Loss : 2.547416925430298
Batch : 2 Loss : 2.532693862915039
Batch : 3 Loss : 2.529435634613037
Batch : 4 Loss : 2.5287697315216064
Batch : 5 Loss : 2.5254032611846924
Batch : 6 Loss : 2.5405335426330566
Batch : 7 Loss : 2.530756711959839
Batch : 8 Loss : 2.512718439102173
Batch : 9 Loss : 2.5271551609039307
Batch : 10 Loss : 2.520352602005005
Batch : 11 Loss : 2.532042980194092
Batch : 12 Loss : 2.5148468017578125
Batch : 13 Loss : 2.519270658493042
Batch : 14 Loss : 2.5226192474365234
Batch : 15 Loss : 2.532123565673828
Batch : 16 Loss : 2.516371965408325
Batch : 17 Loss : 2.5177488327026367
Batch : 18 Loss : 2.5201001167297363
Batch : 19 Loss : 2.5183284282684326
Batch : 20 Loss : 2.5283570289611816
Batch : 21 Loss : 2.546668291091919
Batch : 22 Loss : 2.5312142372131348
Batch : 23 Loss : 2.530093193054199
Batch : 24 Loss : 2.5212182998657227
Batch : 25 Loss : 2.497713565826416
Batch : 26 Loss : 2.524336814880371
Batch : 27 Loss : 2.5107977390289307
Batch : 28 Loss : 2.4991

Validation loop achieves loss values similar to that of a converged network 

### **Predict output**

In [0]:
def predict_next_char(model, input_char, sequence_length = 128):
  
  model.eval # set model to eval mode
  
  # Set initail hidden and cell state to 0, but now batch size is 1
  initial_hidden_and_cell_state = (torch.zeros(1, model.hidden_size, device = device),
                                      torch.zeros(1, model.hidden_size, device = device))
  
  
  # placeholder for the generated text
  seq = np.empty(sequence_length+1)
  seq[0] = character_to_integer[input_char]
  
  # Encode the input character, (1, input_size)
  input_char = torch.from_numpy(to_categorical(character_to_integer[input_char], 
                                               num_classes=model.input_size))
  input_char = input_char.float()
  
  input_char.requires_grad_()
  input_char = input_char.to(device)
  
  # Add fake dimension, as input to network needs to be batches
  input_char = input_char.unsqueeze(0)
  
  # Forward pass, without dropout layers 
  for t in range(sequence_length):
    out = model.lstm1(input_char, initial_hidden_and_cell_state)
    hidden1, cell1 = out
    
    out = model.lstm2(hidden1, initial_hidden_and_cell_state)
    hidden2, cell2 = out


    out = model.lstm3(hidden2, initial_hidden_and_cell_state)
    hidden3, cell3 = out

    out = model.lstm4(hidden3, initial_hidden_and_cell_state)
    hidden4, cell4 = out

    # Passing output of LSTMCells through fc layers
    out = model.fc1(hidden4)

    out = model.fc2(out)

    # Applying softmax, to get probabilities
    out = F.softmax(out, dim=1)
    
    # Out is now a vector of (1, input_size)
    # Get top 5 best predictions based on softmax probabilities
    prob,top_characters = out.topk(5)
    
    top_characters =top_characters.to("cpu")
    prob =prob.to("cpu")
                                      
    top_characters = top_characters.squeeze().numpy()
    prob = prob.detach().squeeze().numpy()


    
    char = np.random.choice(top_characters, p = prob/prob.sum())

    # append the character to the output sequence
    seq[t+1] = char

    # prepare the character to be fed to the next LSTM cell
    char = to_categorical(char, num_classes=model.input_size)
    char = torch.from_numpy(char).unsqueeze(0)
    
    # Type conversion before feeding the new char into LSTM
    #input_char = char.byte()
    input_char = char.float()
    #input_char.requires_grad_()
    input_char = input_char.to(device)
    
  return seq

In [52]:
# Testing on smaller sequence length
seq = predict_next_char(model, 'A', sequence_length = 128)

# Convert sequence elements type to int, 
# else we wont be able to use them as keys for integer_to_character dictionary 
seq = seq.astype(int)
print(seq)

[72 40 11 11 19 11 71 71 40 10 71 40 71 64 19 81 64 81 40 11 11 11 40 11
 11 40 11 19 40 71 40 10 71 40 19 40 19 19 19 40 10 19 71 40 40 10 71 19
 19 40 19 19 19 40 40 19 40 71 40 11 71 40 11 71 11 81 11 71 40 19 40 19
 71 19 19 71 11 40 10 40 11 11 19 71 40 19 40 11 81 64 81 11 71 71 64 40
 10 81 40 19 71 11 40 10 11 81 19 40 19 71 19 40 40 71 19 81 19 81 64 64
 81 40 19 40 71 64 71 19 11]


In [53]:
# Converting output from encoded numbers to letters to see what the model 
# actually predicted.
character_sequence = [integer_to_character[i] for i in seq]
print(character_sequence)

['A', 't', ' ', ' ', 'e', ' ', 's', 's', 't', 'h', 's', 't', 's', 'a', 'e', 'n', 'a', 'n', 't', ' ', ' ', ' ', 't', ' ', ' ', 't', ' ', 'e', 't', 's', 't', 'h', 's', 't', 'e', 't', 'e', 'e', 'e', 't', 'h', 'e', 's', 't', 't', 'h', 's', 'e', 'e', 't', 'e', 'e', 'e', 't', 't', 'e', 't', 's', 't', ' ', 's', 't', ' ', 's', ' ', 'n', ' ', 's', 't', 'e', 't', 'e', 's', 'e', 'e', 's', ' ', 't', 'h', 't', ' ', ' ', 'e', 's', 't', 'e', 't', ' ', 'n', 'a', 'n', ' ', 's', 's', 'a', 't', 'h', 'n', 't', 'e', 's', ' ', 't', 'h', ' ', 'n', 'e', 't', 'e', 's', 'e', 't', 't', 's', 'e', 'n', 'e', 'n', 'a', 'a', 'n', 't', 'e', 't', 's', 'a', 's', 'e', ' ']


In [54]:
# Single round test
res = ''.join(character_sequence)
print(res)

At  e ssthstsaenant   t  t etsthsteteeethestthseeteeettetst st s n stetesees tht  estet nan ssathntes th netesettsenenaantetsase 


### **Make the model write stuff from a seed character**

In [0]:
# Run model on validation data and see what model writes
# Starting seed is a
model.eval()

# empty list to store model output
model_output_list = []

for epoch in range(10):
  
  # Initialize hidden and cell state to zero at the beginning
  hidden_and_cell_state = model.initialize_hidden_and_cell_states()
   
  # Creating instance of dataloader class
  T = TextDataLoader(validation_set, 128,128)

  for i, (feature, label) in enumerate(T.make_batches()):
    
    # One-hot encode inputs, convert into tensor and transpose them
    feature = torch.from_numpy(to_categorical(feature, num_classes=model.input_size).transpose([1, 0, 2]))
    # Converting input to FloatTensor
    feature = feature.float()
    feature.requires_grad_()
    feature = feature.to(device)
    
    # Transpose labels and convert into LongTensor
    label = torch.from_numpy(label.T).type(torch.LongTensor)
       
    out = model(feature, hidden_and_cell_state) # Forward pass
    
    loss = criterion(out,label.contiguous().view(128*128)) # Computing loss
    
    model_output_list.append(''.join([integer_to_character[k] for k in predict_next_char(model, 'A', sequence_length = 1024)]))

  print(f'Model output for epoch {epoch+1} :')
  print(model_output_list[epoch])

Model output for epoch 1 :
Atht tthn  nsest enateetenstseteten naetsen    n nsen  t nttes t nanattthse tsentteesttesanttheestst  ees esetet tentesatheenststsenat tsttsaenat  eet  t nathss ths sss sstht tenasenans ene ttsttsatt  t te n tttet tt ssentttsttttttts t eetse thnsaathstee e  eseteetents settethssatth ssthethnettessth tt naette nes nt stet  naeensethses  nee en  nteeeeeeee  ethentetesats s t natetsat th tt ete  thee saees senatett san  eesethtt esstht  n ttetenssaesesstettestthsantttttt ss t ene e setesenenen   t stthte sananatt et  stttsenaessthethe nas tsaat sathtee n  en  t ntsseestths s enassaesaenatesaaeeesen tht ne th ettent   ees te ns sae th esth en ethts  nteettttetenaet ttsststhnt  sat  nssaaanstht  ts  n t tesanesat stsen t e enesaese ens tettt nenessatt ns  eensstt sas tsent th  thetsaaeneethtenth nth tst t s sae testte etsenttt nsstheet  ee esae stst seths ts eenthen sthetese eten  tttssee nenee etssat n s ssathnt tt sss nteset eee  tt ttttetessttsetthth st  thtsts