In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F

import numpy as np

from collections import Counter

In [2]:
import os
print(os.listdir())

['.config', 'sample_data']


In [3]:
# Enable anomaly detection during gradient computations,
# which helps in finding errors or unexpected behavior in your code
# related to gradients such as during training backprop
torch.autograd.set_detect_anomaly(True)

<torch.autograd.anomaly_mode.set_detect_anomaly at 0x79b0dda56890>

In [4]:
batch_size = 64
seq_size = 100
embedding_size = 256
lstm_size = 248
rnn_size=1024
gradients_norm = 5
# set device parameter
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [5]:
torch.cuda.is_available()

True

In [6]:
# Useful during multiple runs
# Clear gpu memory
if device == 'cuda':
  torch.cuda.empty_cache()

In [7]:
# # read document
# with open('./sri_aurobindo_all_lines_final.txt', 'r') as f:
#     text = f.read()

In [8]:
import tensorflow as tf
path_to_file = tf.keras.utils.get_file('shakespeare.txt', 'https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt')
text = open(path_to_file, 'rb').read().decode(encoding='utf-8')

Downloading data from https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt


In [9]:
vocab = sorted(set(text))
print('Num of chars: ', len(vocab))

Num of chars:  65


In [10]:
def vocab_map(chars_list):
    char_to_int = {c:i for i,c in enumerate(vocab)}
    int_to_char = {i:c for c,i in char_to_int.items()}
    return char_to_int, int_to_char


In [11]:
c_to_i, i_to_c = vocab_map(vocab)

def ids_from_chars(chars: str):
  return [c_to_i[c] for c in chars]

def chars_from_ids(ids):
  return ''.join([i_to_c[i] for i in ids])


In [12]:
sample_seq = "hello"
print('sample: ', sample_seq)
sample_seq_int = ids_from_chars(sample_seq)
print('ids from chars: ', sample_seq_int)
sample_seq_char = chars_from_ids(sample_seq_int)
print('chars from ids: ', sample_seq_char)

sample:  hello
ids from chars:  [46, 43, 50, 50, 53]
chars from ids:  hello


In [13]:
def get_batches(full_text, ids_from_chars, batch_size, seq_size):
    # generate Xs and Ys of shape (batch_size * num_batches) * seq_size
    text_vectorized = ids_from_chars(full_text)
    num_batches = int(len(text_vectorized) / (batch_size * seq_size))
    Xs = text_vectorized[:num_batches*batch_size*seq_size]
    Ys = np.zeros_like(Xs)
    Ys[:-1] = Xs[1:]
    Ys[-1] = Xs[0] if len(Xs) == len(text_vectorized) else text_vectorized[len(Xs)]
    Xs = np.reshape(Xs, (num_batches*batch_size, seq_size))
    Ys= np.reshape(Ys, (num_batches*batch_size, seq_size))

    # iterate over rows of Xs and Ys to generate batches
    for i in range(0, num_batches*batch_size, batch_size):
        yield Xs[i:i+batch_size, :], Ys[i:i+batch_size, :]

In [14]:
data_gen = get_batches(text, ids_from_chars, batch_size, seq_size)


In [15]:
X,Y = next(data_gen)
print('X shape: ', X.shape, 'Y shape: ', Y.shape)

X shape:  (64, 100) Y shape:  (64, 100)


In [16]:
class RNNModule(nn.Module):
    # initialize RNN module
    def __init__(self, n_vocab, seq_size=100, embedding_size=64, rnn_size=1024):
        super(RNNModule, self).__init__()
        self.seq_size = seq_size
        self.rnn_size = rnn_size
        self.embedding = nn.Embedding(n_vocab, embedding_size)
        self.rnn = nn.GRU(embedding_size,
                            rnn_size,
                            batch_first=True)
        self.dense = nn.Linear(rnn_size, n_vocab)

    def forward(self, x, prev_state):
        embed = self.embedding(x)
        output, state = self.rnn(embed, prev_state)
        logits = self.dense(output)

        return logits, state

    def zero_state(self, batch_size):
        return torch.zeros(1, batch_size, self.rnn_size)


In [17]:
class LSTMModule(nn.Module):
    # initialize LSTM module
    def __init__(self, n_vocab, seq_size=100, embedding_size=64, lstm_size=248):
        super(LSTMModule, self).__init__()
        self.seq_size = seq_size
        self.lstm_size = lstm_size
        self.embedding = nn.Embedding(n_vocab, embedding_size)
        self.lstm = nn.LSTM(embedding_size,
                            lstm_size,
                            batch_first=True)
        self.dense = nn.Linear(lstm_size, n_vocab)

    def forward(self, x, prev_state):
        embed = self.embedding(x)
        output, state = self.lstm(embed, prev_state)
        logits = self.dense(output)

        return logits, state

    def zero_state(self, batch_size):
        return (torch.zeros(1, batch_size, self.lstm_size), torch.zeros(1, batch_size, self.lstm_size))

In [18]:
def get_loss_and_train_op(net, lr=0.001):
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(net.parameters(), lr=lr)

    return criterion, optimizer

In [19]:
# net = RNNModule(len(vocab), seq_size, embedding_size, rnn_size) # rnn
net = LSTMModule(len(vocab), seq_size, embedding_size, lstm_size) # lstm
net = net.to(device)
net

LSTMModule(
  (embedding): Embedding(65, 256)
  (lstm): LSTM(256, 248, batch_first=True)
  (dense): Linear(in_features=248, out_features=65, bias=True)
)

In [20]:

# state = net.zero_state(batch_size) # rnn
state_h, state_c = net.zero_state(batch_size) # lstm

# Transfer data to GPU
# state = state.to(device) # rnn
state_h = state_h.to(device) # lstm
state_c = state_c.to(device) # lstm


X_inp, Y_inp = next(data_gen)
X_inp = torch.tensor(X_inp).to(device)
Y_inp = torch.tensor(Y_inp).to(device)


# logits, state = net(X_inp, state) # rnn
logits, (state_h, state_c) = net(X_inp, (state_h, state_c)) # lstm

# Note cross entropy expects
print('Net output shape: ', logits.shape, 'Expected Y shape: ', Y_inp.shape)

criterion, optimizer = get_loss_and_train_op(net)
loss = criterion(logits.transpose(1, 2), Y_inp)
print('Loss = ', loss.item())


Net output shape:  torch.Size([64, 100, 65]) Expected Y shape:  torch.Size([64, 100])
Loss =  4.156425952911377


In [21]:
def train_model(full_text, ids_from_chars, n_vocab):

    # model instance
    # net = RNNModule(n_vocab, seq_size, embedding_size, rnn_size) # rnn
    net = LSTMModule(len(vocab), seq_size, embedding_size, lstm_size) # lstm
    net = net.to(device)
    criterion, optimizer = get_loss_and_train_op(net, 0.001)

    num_batches = int(len(full_text) / (batch_size * seq_size))

    epochs = 20
    for e in range(epochs):
        iteration = 0
        batches = get_batches(full_text, ids_from_chars, batch_size, seq_size)
        # state = net.zero_state(batch_size) # rnn
        state_h, state_c = net.zero_state(batch_size) # lstm

        # Transfer data to GPU
        # state = state.to(device) # rnn
        state_h = state_h.to(device) # lstm
        state_c = state_c.to(device) # lstm

        for x, y in batches:
            # Tell it we are in training mode
            net.train()

            # Reset all gradients
            optimizer.zero_grad()

            # Transfer data to GPU
            x = torch.tensor(x).to(device)
            y = torch.tensor(y).to(device)

            # logits, state = net(x, state) # rnn
            logits, (state_h, state_c) = net(x, (state_h, state_c)) # lstm

            loss = criterion(logits.transpose(1, 2), y)

            state_h = state_h.detach() # lstm
            state_c = state_c.detach() # lstm
            # state = state.detach() # rnn

            loss_value = loss.item()

            # Perform back-propagation
            loss.backward(retain_graph=True)

            _ = torch.nn.utils.clip_grad_norm_(net.parameters(), gradients_norm)

            # Update the network's parameters
            optimizer.step()

            if iteration % 100 == 0:
                print(f'Epoch: {e}/{epochs} Iteration: {iteration}/{num_batches} Loss: {loss_value}')
            iteration += 1

    return net

In [22]:
# rnn_net = train_model(text, ids_from_chars, len(vocab))
lstm_net = train_model(text, ids_from_chars, len(vocab))

Epoch: 0/20 Iteration: 0/174 Loss: 4.176697254180908
Epoch: 0/20 Iteration: 100/174 Loss: 2.1926980018615723
Epoch: 1/20 Iteration: 0/174 Loss: 2.1061949729919434
Epoch: 1/20 Iteration: 100/174 Loss: 1.8128465414047241
Epoch: 2/20 Iteration: 0/174 Loss: 1.8983770608901978
Epoch: 2/20 Iteration: 100/174 Loss: 1.640510082244873
Epoch: 3/20 Iteration: 0/174 Loss: 1.768168330192566
Epoch: 3/20 Iteration: 100/174 Loss: 1.5331469774246216
Epoch: 4/20 Iteration: 0/174 Loss: 1.6858881711959839
Epoch: 4/20 Iteration: 100/174 Loss: 1.4579989910125732
Epoch: 5/20 Iteration: 0/174 Loss: 1.6302502155303955
Epoch: 5/20 Iteration: 100/174 Loss: 1.404670000076294
Epoch: 6/20 Iteration: 0/174 Loss: 1.5886951684951782
Epoch: 6/20 Iteration: 100/174 Loss: 1.3629822731018066
Epoch: 7/20 Iteration: 0/174 Loss: 1.5567364692687988
Epoch: 7/20 Iteration: 100/174 Loss: 1.3273605108261108
Epoch: 8/20 Iteration: 0/174 Loss: 1.5302850008010864
Epoch: 8/20 Iteration: 100/174 Loss: 1.300691843032837
Epoch: 9/20 Ite

In [23]:
def one_step(net, ix, state, temperature = 1):
  output, state = net(ix, state)

  last_predicted_char_output = output[:,-1]
  scaled_chars_probs = last_predicted_char_output / temperature

  chars_probs = F.softmax(scaled_chars_probs, dim=-1)
  predicted_ids = torch.multinomial(chars_probs, num_samples=1).squeeze(-1)

  predicted_ids = predicted_ids.tolist()
  next_chars = [chars_from_ids([id]) for id in predicted_ids]

  return next_chars, state

In [27]:
def generate_text(net, start_text, temperature=1):
    net.eval()

    ix = torch.tensor([ids_from_chars(start_text)])
    ix = ix.to(device)
    # state = net.zero_state(len(ix)) # rnn
    # state = state.to(device) # rnn
    state_h, state_c = net.zero_state(len(ix)) # lstm

    state_h = state_h.to(device) # lstm
    state_c = state_c.to(device) # lstm
    state = (state_h, state_c)

    final_text = start_text

    for i in range(1000):
      next_chars, state = one_step(net, ix, state, temperature=temperature)
      ix = torch.tensor([ids_from_chars(next_chars)])
      ix = ix.to(device)
      final_text += next_chars[0]

    return final_text

In [28]:
print(generate_text(lstm_net, "What beholds a fair"))

What beholds a fair;
And brain to they by Juliet,
Hark'd the prize and the pray save:
 master's Kate in our twother's honour
Than twice as treys and drankly is lay ends begin.

BIONDELLO:
Thus brother chantently one this,
Did content a sclived Ned.'
Duke and now and hundred friend:
I have actsemituty,
And mad up.

BIONDA:
Bovereive, if say me made a createnting;
While from sure?
I provock the other noble and reaso
To quapting off it, at Biancais' the sister,
If not.
After his, earny thourd on
the good many colled: joy my husband,
And pare ta'en shrewd: as please me.
The entreat but
For fire, know you within the treawd Lord for ever light.
Most lord! what!' the return that his gazed:
I loves it.

PROSPERO:
Then, thought that hoouse trueldnemanuman!

POMPEY:
I master, but fasher. Pray my tauty!

VALUMINIUS:
Ay, if his partaintly. King with all,
And dost.

MIRANDA:
Some vaitor such daughtrongum.
Nay, lost they are men shall know?

ISABELLA:
And so it she is for an immstand-heard dinarly d