In [None]:
import torch
import torch.optim as optim
import numpy as np
import pandas as pd
import torch.nn as nn
import torch.nn.functional as F
from tqdm.notebook import tqdm
import random
import sys
from scipy.special import softmax
import re

In [None]:
with open ("shakespeare.txt", "r") as f:
    data=f.readlines()
text = ''
for line in data:
    text = text + line
text[:200]

In [None]:
characters = list(set(text))
char_map = {}
inv_char_map = {}
for i in range(len(characters)):
    char_map[characters[i]] = i + 1
    inv_char_map[i] = characters[i]

In [None]:
len(char_map)

In [None]:
max_len = 40
x_data = []
y_data = []
for i in range(len(text) - max_len):
    x_data.append(text[i:i+max_len])
    y_data.append(text[i+max_len])

In [None]:
def sample(preds, temperature=1.0):
    # helper function to sample an index from a probability array
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)

In [None]:
class LanguagePredictor(nn.Module):

    def __init__(self, vocab_size, embedding_dim, output_size):
        super(LanguagePredictor, self).__init__()
        self.embeddings = nn.Embedding(vocab_size, embedding_dim)
        self.lstm = nn.LSTM(embedding_dim, 512, bidirectional = True) # Change setting to make Bi-directional
        self.linear1 = nn.Linear(512*max_len*2, output_size)
        self.vocab_size = vocab_size
        self.embedding_dim = embedding_dim

    def forward(self, inputs):
        embeds = self.embeddings(inputs)
        lstm_out, _ = self.lstm(embeds)
        out = self.linear1((lstm_out.view(-1, 512*max_len*2)))
        return out
model = LanguagePredictor(len(characters)+1, 15, len(characters)).to('cuda')
loss_function = nn.CrossEntropyLoss()


In [None]:
len(x_data)

In [None]:
import copy
batch_size = 512
optimizer = optim.Adam(model.parameters(), lr=0.001)
indices=np.arange(len(x_data))[:90000]
val_indices=np.arange(len(x_data))[90000:]
batch_x, batch_y = [], []
best_loss = np.inf
epochs_since_best = 0
best = None
for epoch in tqdm(range(300)):
    epoch_loss = 0
    np.random.shuffle(indices)
    model.train()
    for i in tqdm(indices):
        x = x_data[i]
        y = y_data[i]
        x_arr = np.zeros(max_len)
        for i in range(len(x)):
            x_arr[i] = char_map[x[i]]
        batch_x.append(x_arr)
        batch_y.append(char_map[y]-1)
        if(len(batch_x) == batch_size):

            model.zero_grad()

            log_probs = model(torch.tensor(batch_x, dtype = torch.long).to('cuda'))

            loss = loss_function(log_probs, torch.tensor(batch_y, dtype=torch.long).to('cuda'))

            loss.backward()
            optimizer.step()

            batch_x, batch_y = [], []
            epoch_loss += loss.item()
    model.eval()
    epoch_val_loss = 0
    np.random.shuffle(val_indices)
    for i in tqdm(val_indices):
        x = x_data[i]
        y = y_data[i]
        x_arr = np.zeros(max_len)
        for i in range(len(x)):
            x_arr[i] = char_map[x[i]]
        batch_x.append(x_arr)
        batch_y.append(char_map[y]-1)
        if(len(batch_x) == batch_size):


            log_probs = model(torch.tensor(batch_x, dtype = torch.long).to('cuda'))

            loss = loss_function(log_probs, torch.tensor(batch_y, dtype=torch.long).to('cuda'))


            batch_x, batch_y = [], []
            epoch_val_loss += loss.item()
    print(epoch_loss, epoch_val_loss)
    if(epoch_val_loss < best_loss ):
        best = copy.deepcopy(model.state_dict())
        best_loss = epoch_val_loss
        epochs_since_best = 0
    elif(epochs_since_best > 9 and epoch > 30):
        break
    else:
        epochs_since_best += 1
    
model.load_state_dict(best)

In [None]:
model.eval()
start_index = random.randint(0, len(text) - max_len - 1)
for diversity in [0.01, 0.05, 0.2]:
    print('$$$$$$$$$$$$$$$$$$$$$$$')
    print(diversity)
    print('$$$$$$$$$$$$$$$$$$$$$$$')
    generated = ''
    sentence = "shall i compare thee to a summer's day?\n"
    print(sentence)
    generated += sentence
    for i in range(2000):
        x_arr = np.zeros(max_len)
        for i in range(len(sentence)):
            x_arr[i] = char_map[sentence[i]]
        preds = (softmax(model(torch.tensor([x_arr], dtype=torch.long).cuda()).cpu().detach().numpy() )+ 1e-50)
        preds = preds/preds.sum()
        next_index = sample(preds.reshape(-1), diversity)
        next_char = inv_char_map[next_index]

        sentence = sentence[1:] + next_char

        sys.stdout.write(next_char)
        sys.stdout.flush()
    print()