In [13]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.distributions import Categorical
import numpy as np

In [14]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [15]:
device

device(type='cuda')

In [16]:
def count_true(a1, a2):
  e = torch.eq(a1, a2)
  e = e.cpu().numpy()
  c = np.count_nonzero(e)
  return c

In [17]:
def preprocess():
   # load the text file
    data = open("dataset.txt", 'r').read()
    chars = sorted(list(set(data)))
    data_size, vocab_size = len(data), len(chars)
    
    # char to index and index to char maps
   
    char_to_ix = { ch:i for i,ch in enumerate(chars) }
    ix_to_char = { i:ch for i,ch in enumerate(chars) }
    
    # convert data from chars to indices
    data = list(data)
    for i, ch in enumerate(data):
        data[i] = char_to_ix[ch]
    # data tensor on device
    data = torch.tensor(data).to(device)
    data = torch.unsqueeze(data, dim=1)
    
    return data , ix_to_char , data_size, vocab_size

In [18]:
class RNN(nn.Module):
    def __init__(self, input_size, output_size, hidden_size, num_layers):
        super(RNN, self).__init__()
        self.embedding = nn.Embedding(input_size, input_size)
        self.rnn = nn.LSTM(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers)
        self.fc = nn.Linear(hidden_size, output_size)
    
    def forward(self, input_seq, hidden_state):
        embedding = self.embedding(input_seq)
        output, hidden_state = self.rnn(embedding, hidden_state)
        output = self.fc(output)
        return output, (hidden_state[0].detach(), hidden_state[1].detach())

In [19]:
def test(data,data_size,rnn,ix_to_char):

        data_ptr = 0
        hidden_state = None
        # random character
        rand_index = np.random.randint(data_size-1)
        input_seq = data[rand_index : rand_index+1]
        
        for i in range(400):
          
            # forward pass
            output, hidden_state = rnn(input_seq, hidden_state)
            
            # construct categorical distribution and sample a character
            output = F.softmax(torch.squeeze(output), dim=0)
            dist = Categorical(output)
            index = dist.sample()
            
            # print the sampled character
            print(ix_to_char[index.item()], end='')
            
            # next input is current output
            input_seq[0][0] = index.item()
            data_ptr += 1
          

In [20]:
def train(data , ix_to_char, data_size, vocab_size,rnn,epochs,seq_len,loss_fn,optimizer):
    acc = []
    loss_list = []
    for i_epoch in range(1, epochs+1):
        
        # random starting point (1st 100 chars) from data to begin
        data_ptr = np.random.randint(100)
        n = 0
        running_loss = 0
        hidden_state = None
        true_predicts = 0
        while data_ptr + seq_len + 1 < data_size:
          
            input_seq = data[data_ptr : data_ptr+seq_len]
            target_seq = data[data_ptr+1 : data_ptr+seq_len+1]
            target_seq_onehot = torch.squeeze(target_seq)
            
            # forward pass
            output, hidden_state = rnn(input_seq, hidden_state)
            # compute loss
            loss = loss_fn(F.softmax(torch.squeeze(output), dim=1), F.softmax(torch.squeeze(output), dim=1))
            running_loss += loss.item()
            
            output = F.softmax(torch.squeeze(output), dim=1)
            dist = Categorical(output)
            index = dist.sample()
            
            
            # compute gradients and take optimizer step
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            # compute true predicts
            true_predicts += count_true(torch.squeeze(target_seq), index)
            # update the data pointer
            data_ptr += seq_len
            n +=1
           
        acc.append(true_predicts*100/n)  
        loss_list.append(running_loss/n)  
        # print loss and save weights after every epoch
        print("Epoch: {0} \t Loss: {1:.4f} \t accuracy: {2:.4f}".format(i_epoch, running_loss/n,true_predicts*100/data_size))
        
    return acc,loss_list


# KLDivLoss

In [21]:
def runn(hidden_size, seq_len, num_layers, lr, epochs):
  # Hyperparameters
  hidden_size = hidden_size  
  # length of LSTM sequence 
  seq_len = seq_len
  # num of layers in LSTM layer stack      
  num_layers =num_layers      
  lr = lr   
  epochs = epochs
  data , ix_to_char, data_size, vocab_size =  preprocess()
  # model 
  rnn = RNN(vocab_size, vocab_size, hidden_size, num_layers).to(device)
  # loss function and optimizer
  loss_fn = nn.KLDivLoss()
  optimizer = torch.optim.Adam(rnn.parameters(), lr=lr)

  acc_list,loss_list = train(data , ix_to_char, data_size, vocab_size,rnn,epochs,seq_len,loss_fn,optimizer)
  print("generate text ------------------------------")
  test(data,data_size,rnn,ix_to_char)
  
  return acc_list,loss_list

In [22]:
acc_list,loss_list = runn(512, 250, 3, 0.001, 10)

  "reduction: 'mean' divides the total loss by both the batch size and the support size."


Epoch: 1 	 Loss: -0.0549 	 accuracy: 1.2434
Epoch: 2 	 Loss: -0.0549 	 accuracy: 1.2495
Epoch: 3 	 Loss: -0.0549 	 accuracy: 1.2457
Epoch: 4 	 Loss: -0.0549 	 accuracy: 1.2623
Epoch: 5 	 Loss: -0.0549 	 accuracy: 1.2197
Epoch: 6 	 Loss: -0.0549 	 accuracy: 1.2459
Epoch: 7 	 Loss: -0.0549 	 accuracy: 1.2457
Epoch: 8 	 Loss: -0.0549 	 accuracy: 1.2516
Epoch: 9 	 Loss: -0.0549 	 accuracy: 1.2443
Epoch: 10 	 Loss: -0.0549 	 accuracy: 1.2439
generate text ------------------------------
BKpS.M0c moVVjpvW_- )GS9aF3},J.;rAS0gdrWcKOLixS4k
-F1ghcCPSav0fT1vre)FjIdIT-gpJ4:L.y,A9m4"nYL.qWp.hx7TE}!G9NyK4D"O}^J"!}ZfN
nSqs•G•6!Sf'sMcZ!FbA' hDU7)f3}laZA_G	K(QD!r"r_^OBF3DIy/Q0E3,qf6XA(
}:•/s.?,ZFc!^) DRz}7cOLZa;0ZidJ_(Oi;d	ES!(C1/kTzat1fYjeuI36C)2K,U.ZaVqNDvi-. ;'r	h' sU.q3FcEYsR^Dde	9tOkZBLit)•^•kCXBfQ2nTrw•kj9cwEnCT))QUF?ny/dRfe0V_aCLrWye4gn-7.9eEwo0pHg	PE}UY)/L2JDue^SO7grqdE.i4"xzNIcpKT_d

# NLLLoss

In [23]:
def train(data , ix_to_char, data_size, vocab_size,rnn,epochs,seq_len,loss_fn,optimizer):
    acc = []
    loss_list = []
    for i_epoch in range(1, epochs+1):
        
        # random starting point (1st 100 chars) from data to begin
        data_ptr = np.random.randint(100)
        n = 0
        running_loss = 0
        hidden_state = None
        true_predicts = 0

        while data_ptr + seq_len + 1 < data_size:
          
            input_seq = data[data_ptr : data_ptr+seq_len]
            target_seq = data[data_ptr+1 : data_ptr+seq_len+1]
            target_seq = torch.squeeze(target_seq)
            
            # forward pass
            output, hidden_state = rnn(input_seq, hidden_state)
            loss = loss_fn(torch.squeeze(output), torch.squeeze(target_seq))
            running_loss += loss.item()
            # compute loss
            output = F.softmax(torch.squeeze(output), dim=1)
            dist = Categorical(output)
            index = dist.sample()
            
            
            # compute gradients and take optimizer step
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            # compute true predicts
            true_predicts += count_true(torch.squeeze(target_seq), index)
            # update the data pointer
            data_ptr += seq_len
            n +=1
           
        acc.append(true_predicts*100/n)  
        loss_list.append(running_loss/n)  
        # print loss and save weights after every epoch
        print("Epoch: {0} \t Loss: {1:.4f} \t accuracy: {2:.4f}".format(i_epoch, running_loss/n,true_predicts*100/data_size))
        
    return acc,loss_list


In [24]:
def runn(hidden_size, seq_len, num_layers, lr, epochs):
  # Hyperparameters
  hidden_size = hidden_size  
  # length of LSTM sequence 
  seq_len = seq_len
  # num of layers in LSTM layer stack      
  num_layers =num_layers      
  lr = lr   
  epochs = epochs
  data , ix_to_char, data_size, vocab_size =  preprocess()
  # model 
  rnn = RNN(vocab_size, vocab_size, hidden_size, num_layers).to(device)
  # loss function and optimizer
  loss_fn = nn.NLLLoss()
  optimizer = torch.optim.Adam(rnn.parameters(), lr=lr)

  acc_list,loss_list = train(data , ix_to_char, data_size, vocab_size,rnn,epochs,seq_len,loss_fn,optimizer)
  print("generate text ------------------------------")
  test(data,data_size,rnn,ix_to_char)
  
  return acc_list,loss_list

In [26]:
acc_list,loss_list = runn(512, 150, 3, 0.01, 5)

Epoch: 1 	 Loss: -9183.8288 	 accuracy: 17.6342
Epoch: 2 	 Loss: -27571.6156 	 accuracy: 17.7413
Epoch: 3 	 Loss: -46060.6591 	 accuracy: 17.7416
Epoch: 4 	 Loss: -64626.3289 	 accuracy: 17.7414
Epoch: 5 	 Loss: -83299.3643 	 accuracy: 17.7416
generate text ------------------------------
                                                                                                                                                                                                                                                                                                                                                                                                                