In [1]:
import string
import random
import torch
import torch.nn as nn
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd


In [2]:
all_chars       = string.printable
n_chars         = len(all_chars)
file            = open('./cnus.txt').read()
file_len        = len(file)

print('Length of file: {}'.format(file_len))
print('All possible characters: {}'.format(all_chars))
print('Number of all possible characters: {}'.format(n_chars))

Length of file: 3381928
All possible characters: 0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~ 	

Number of all possible characters: 100


In [3]:
# Remove all non-printable characters
file = ''.join(list(filter((lambda x: x in all_chars), file)))

In [4]:
file_len = len(file)
print('Length of file: {}'.format(file_len))

Length of file: 3381831


In [5]:
# Get a random sequence of the Sherlock dataset.
def get_random_seq():
    seq_len     = 128  # The length of an input sequence.
    start_index = random.randint(0, file_len - seq_len)
    end_index   = start_index + seq_len + 1
    return file[start_index:end_index]

# Convert the sequence to one-hot tensor.
def seq_to_onehot(seq):
    tensor = torch.zeros(len(seq), 1, n_chars) 
    # Shape of the tensor:
    #     (sequence length, batch size, classes)
    # Here we use batch size = 1 and classes = number of unique characters.
    for t, char in enumerate(seq):
        index = all_chars.index(char)
        tensor[t][0][index] = 1
    return tensor

# Convert the sequence to index tensor.
def seq_to_index(seq):
    tensor = torch.zeros(len(seq), 1)
    # Shape of the tensor: 
    #     (sequence length, batch size).
    # Here we use batch size = 1.
    for t, char in enumerate(seq):
        tensor[t] = all_chars.index(char)
    return tensor

# Sample a mini-batch including input tensor and target tensor.
def get_input_and_target():
    seq    = get_random_seq()
    input  = seq_to_onehot(seq[:-1])      # Input is represented in one-hot.
    target = seq_to_index(seq[1:]).long() # Target is represented in index.
    return input, target

In [6]:
# If there are GPUs, choose the first one for computing. Otherwise use CPU.
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)  
# If 'cuda:0' is printed, it means GPU is available.

cuda:0


In [7]:
class Net(nn.Module):
    def __init__(self, activation='RNN'):
        # Initialization.
        super(Net, self).__init__()
        self.input_size  = n_chars   # Input size: Number of unique chars.
        self.hidden_size = 100       # Hidden size: 100.
        self.output_size = n_chars   # Output size: Number of unique chars.
        
        self.linear = nn.Linear(self.hidden_size, self.output_size)
        if activation == 'RNN':
            self.activation = nn.RNNCell(self.input_size, self.hidden_size)
        elif activation == 'LSTM':
            self.activation = nn.LSTMCell(self.input_size, self.hidden_size)
        
        self.act = activation
            
    def forward(self, input, hidden):
        """ Forward function.
              input:  One-hot input. It refers to the x_t in homework write-up.
              hidden: Previous hidden state. It refers to the h_{t-1}.
            Returns (output, hidden) where output refers to y_t and 
                     hidden refers to h_t.
        """
        if self.act == 'RNN':
            hidden = self.activation(input, hidden)
            output = self.linear(hidden)
            return output, hidden
        else:
            hidden, cell = self.activation(input, hidden)
            output = self.linear(hidden)
            return output, hidden, cell

    def init_hidden(self):
        # Initial hidden state.
        # 1 means batch size = 1.
        return torch.zeros(1, self.hidden_size).to(device) 
    
    def init_cell(self):
        # Initial cell state.
        # 1 means batch size = 1.
        return torch.zeros(1, self.hidden_size).to(device) 

In [8]:
# Training step function.
def train_step(net, opt, input, target):
    """ Training step.
        net:    The network instance.
        opt:    The optimizer instance.
        input:  Input tensor.  Shape: [seq_len, 1, n_chars].
        target: Target tensor. Shape: [seq_len, 1].
    """
    seq_len = input.shape[0]    # Get the sequence length of current input.
    hidden = net.init_hidden()  # Initial hidden state.
    cell = net.init_cell()      # Initial cell state
    net.zero_grad()             # Clear the gradient.
    loss = 0                    # Initial loss.
    
    if net.act == 'RNN':
        for t in range(seq_len):    # For each one in the input sequence.
            output, hidden = net(input[t], hidden)
            loss += loss_func(output, target[t])
    else:
        for t in range(seq_len):    # For each one in the input sequence.
            output, hidden, cell = net(input[t], (hidden, cell))
            loss += loss_func(output, target[t])

    loss.backward()             # Backward. 
    opt.step()                  # Update the weights.

    return loss / seq_len       # Return the average loss w.r.t sequence length.

In [9]:
# Evaluation step function.
def eval_step(net, init_seq='W', predicted_len=100):
    # Initialize the hidden state, input and the predicted sequence.
    hidden        = net.init_hidden()
    cell          = net.init_cell()
    init_input    = seq_to_onehot(init_seq).to(device)
    predicted_seq = init_seq

    # If input net is RNN
    if net.act == 'RNN':
        # Use initial string to "build up" hidden state.
        for t in range(len(init_seq) - 1):
            output, hidden = net(init_input[t], hidden)
        # Set current input as the last character of the initial string.
        input = init_input[-1]
        
        # Predict more characters after the initial string.
        for t in range(predicted_len):
            # Get the current output and hidden state.
            output, hidden = net(input, hidden)
        
            # Sample from the output as a multinomial distribution.
            predicted_index = torch.multinomial(output.view(-1).exp(), 1)[0]
        
            # Add predicted character to the sequence and use it as next input.
            predicted_char  = all_chars[predicted_index]
            predicted_seq  += predicted_char
        
            # Use the predicted character to generate the input of next round.
            input = seq_to_onehot(predicted_char)[0].to(device)
    
    # If input net is LSTM       
    else:
        # Use initial string to "build up" hidden state.
        for t in range(len(init_seq) - 1):
            output, hidden, cell = net(init_input[t], (hidden, cell))
        # Set current input as the last character of the initial string.
        input = init_input[-1]
        
        # Predict more characters after the initial string.
        for t in range(predicted_len):
            # Get the current output and hidden state.
            output, hidden, cell = net(input, (hidden, cell))
        
            # Sample from the output as a multinomial distribution.
            predicted_index = torch.multinomial(output.view(-1).exp(), 1)[0]
        
            # Add predicted character to the sequence and use it as next input.
            predicted_char  = all_chars[predicted_index]
            predicted_seq  += predicted_char
        
            # Use the predicted character to generate the input of next round.
            input = seq_to_onehot(predicted_char)[0].to(device)

    return predicted_seq

### Experiment

In [10]:
# Perform n_trials each for standard RNN and LSTM.

n_trials    = 3      # Number of trials per model
iters       = 20000  # Number of training iterations per trial.
print_iters = 100    # Number of iterations for each log printing.

In [None]:
# The loss variables.
all_losses_rnn = np.zeros(int(iters / print_iters))

# Training procedure.
for i in range(n_trials):
    net_rnn = Net()     # Create a new network instance.
    net_rnn.to(device)
    opt_rnn       = torch.optim.Adam(net_rnn.parameters(), lr=0.005)
    loss_func = nn.CrossEntropyLoss()
    losses = np.array([])
    loss_sum = 0
    for j in range(iters):
        input, target = get_input_and_target()            # Fetch input and target.
        input, target = input.to(device), target.to(device) # Move to GPU memory.
        loss      = train_step(net_rnn, opt_rnn, input, target)   # Calculate the loss.
        loss_sum += loss.item()                                  # Accumulate the loss.

        # Print the log.
        if j % print_iters == print_iters - 1:
            print('rnn_trial:{}, iter:{}/{} loss:{}'.format(i, j, iters, loss_sum / print_iters))
            print('generated sequence: {}\n'.format(eval_step(net_rnn)))
              
            # Track the loss.
            losses = np.append(losses, loss_sum / print_iters)
            loss_sum = 0
    all_losses_rnn += losses
    
mean_losses_rnn = all_losses_rnn / n_trials

rnn_trial:0, iter:99/20000 loss:3.0149088311195373
generated sequence: WM .yotor ahmmro  heeaxig iT hotfim y'ers weorsl  othlry   e rkrn a a led iunoot dt  ose   s , p Im d

rnn_trial:0, iter:199/20000 loss:2.5957364106178282
generated sequence: Wvuedclis  tomerroty ts mefhintyuler cesont"ey dtknde?disdit  he houvkr, har Ting in.ore
 k
 monomeon

rnn_trial:0, iter:299/20000 loss:2.4143584609031676
generated sequence: Waveetraspeast to

     thep,
a   thacI bandis pmons the sas-ofpaseteYind beds soind in. the pudy..I


rnn_trial:0, iter:399/20000 loss:2.2768567776679993
generated sequence: Woon,
     "

     "Af the hacint te is Ire. at iad the chalk is is is ge'io, ing min? t
      Bad "W

rnn_trial:0, iter:499/20000 loss:2.1708909237384795
generated sequence: Wend ag heot ro fintre
     bre pod anosh-

     Lupnsoilure
     nicin or
     prerlen.

      Whanp

rnn_trial:0, iter:599/20000 loss:2.106221311092377
generated sequence: Whed ssaiins of
     the save brobreHnvert cucps siren 

rnn_trial:0, iter:4799/20000 loss:1.759950270652771
generated sequence: Weys. I sury in aghen ate usele.

     "But o to tter ther a sharmy mes, sho or our deay. He look sti

rnn_trial:0, iter:4899/20000 loss:1.7770441353321076
generated sequence: Will
     Cloident wand fure cone, he bent withwore! Wold, from if youlloct, rethe what eevers where 

rnn_trial:0, iter:4999/20000 loss:1.7556602215766908
generated sequence: Well whath herraidase, anded. I was sobly-sowarible.

 
     Vustestan. It ouls. If you vast had cwas

rnn_trial:0, iter:5099/20000 loss:1.7402511966228484
generated sequence: We bugan'ves on the was enou. I froppee mine. EA as no dinve outtalm supsiaded us no
     rome makfor

rnn_trial:0, iter:5199/20000 loss:1.7512087976932527
generated sequence: Wiod I is as as eave he was inter befere's upran. The sars
     shave I
    ."

     "I and yot
     

rnn_trial:0, iter:5299/20000 loss:1.7385446393489838
generated sequence: Whpor durkith insteannay he orling and wish and 

rnn_trial:0, iter:9499/20000 loss:1.7358119678497315
generated sequence: Way steprition con jow."

     "I's dole objenly who sught hads of you casting hirraghy own
     shac

rnn_trial:0, iter:9599/20000 loss:1.7003621995449065
generated sequence: We presuntigg younatet, at
     him, that yepruee this man up a
     came arregnonct--,
     not a Nu

rnn_trial:0, iter:9699/20000 loss:1.6898574149608612
generated sequence: Wens, fool of the laster a rooms heard as
     of the angenion the
     into Honkesorn, stapstions av

rnn_trial:0, iter:9799/20000 loss:1.6957083320617676
generated sequence: What's flayes. He every anev 'remance. Scervehhed his asked of husher.!"I, Whom my combono
     hill.

rnn_trial:0, iter:9899/20000 loss:1.7169000101089478
generated sequence: Wh the findon frome fill on the bloottames on must will."

     "What
     Dome to we these. No at ag

rnn_trial:0, iter:9999/20000 loss:1.6968615138530732
generated sequence: When this the liff nog to, with enstinatised to

rnn_trial:0, iter:14199/20000 loss:1.6579045009613038
generated sequence: Wet
     and the his had not, looks, sinngghthing, was, Proth save the stpacining
     excem in volle

rnn_trial:0, iter:14299/20000 loss:1.671894680261612
generated sequence: Wive beecters and shouvep efoullowed in my dowef urtien, to me the bound goifedured fissester. Acter 

rnn_trial:0, iter:14399/20000 loss:1.6748746132850647
generated sequence: Wey. The sury, it
     the his Cowers up, which Bepoutout old the santer the thirruss the tropperore 

rnn_trial:0, iter:14499/20000 loss:1.6547527194023133
generated sequence: Whis meswers direltist litts, and that this uttern, become.

     "Not of no usent Got St. She from h

rnn_trial:0, iter:14599/20000 loss:1.6410621297359467
generated sequence: Wit, his hall was I
     extless dyen in the moundce and I
     purners the menes, and he wough I vat

rnn_trial:0, iter:14699/20000 loss:1.636257246732712
generated sequence: Why man, treds teuding
     impleced had se

In [None]:
all_losses_lstm = np.zeros(int(iters / print_iters))

for i in range(n_trials):
    net_lstm = Net('LSTM')    # Create a new network instance.
    net_lstm.to(device)
    opt_lstm      = torch.optim.Adam(net_lstm.parameters(), lr=0.005)
    loss_func = nn.CrossEntropyLoss()
    losses = np.array([])
    loss_sum = 0
    for j in range(iters):
        input, target = get_input_and_target()            # Fetch input and target.
        input, target = input.to(device), target.to(device) # Move to GPU memory.
        loss      = train_step(net_lstm, opt_lstm, input, target)   # Calculate the loss.
        loss_sum += loss.item()                                  # Accumulate the loss.

        # Print the log.
        if j % print_iters == print_iters - 1:
            print('lstm_trial:{}, iter:{}/{} loss:{}'.format(i, j, iters, loss_sum / print_iters))
            print('generated sequence: {}\n'.format(eval_step(net_lstm)))
              
            # Track the loss.
            losses = np.append(losses, loss_sum / print_iters)
            loss_sum = 0
    all_losses_lstm += losses

mean_losses_lstm = all_losses_lstm / n_trials

In [None]:
plt.title("Average Loss Over 20,000 Iterations")
plt.xlabel('iters')
plt.ylabel('loss')
plt.plot(mean_losses_rnn)
plt.plot(mean_losses_lstm)
plt.legend(['RNN', 'LSTM'])
plt.show()

In [None]:
fig = plt.gcf()

In [None]:
print(eval_step(net_rnn, predicted_len=600))

In [None]:
print(eval_step(net_lstm, predicted_len=600))

In [None]:
d = {'Iterations': [1, 5000, 10000, 15000, 20000], 
     'Standard RNN': [1. ], 
     'LSTM': []}

In [None]:
print(d.reset_index().to_markdown(index=False))

In [None]:
# fig.write_html('fig2.html', include_plotlyjs='cdn')