<a href="https://colab.research.google.com/github/tejas-srikanth/Shakespeare-text-generator/blob/master/Shakespeare_text_gen.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F

import numpy as np
import matplotlib.pyplot as plt


In [2]:
use_gpu = torch.cuda.is_available()
use_gpu

True

In [3]:
from google.colab import drive
drive.mount('/content/gdrive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/gdrive


In [6]:
root = '/content/gdrive/My Drive/Colab Notebooks/NLP_with_pytorch/shakespeare.txt'

In [7]:
with open(root, 'r', encoding="utf8") as f:
  all_text = f.read()

In [8]:
all_text[:500]

"\n                     1\n  From fairest creatures we desire increase,\n  That thereby beauty's rose might never die,\n  But as the riper should by time decease,\n  His tender heir might bear his memory:\n  But thou contracted to thine own bright eyes,\n  Feed'st thy light's flame with self-substantial fuel,\n  Making a famine where abundance lies,\n  Thy self thy foe, to thy sweet self too cruel:\n  Thou that art now the world's fresh ornament,\n  And only herald to the gaudy spring,\n  Within thine own bu"

# Encode values

In [9]:
all_characters = set(all_text)
decoder = dict(enumerate(all_characters))

In [11]:
encoder = dict((d,idx) for idx,d in decoder.items())

In [12]:
encoded_text = np.array([encoder[char] for char in all_text])

# One Hot Encoder

In [13]:
def one_hot_encoder(encoded_text, num_unique_chars):

   one_hot = np.zeros((encoded_text.size, num_unique_chars))

   one_hot.astype(np.float32)

   one_hot[np.arange(one_hot.shape[0]), encoded_text.flatten()] = 1.0

   one_hot = one_hot.reshape(*encoded_text.shape, num_unique_chars)

   return one_hot

In [14]:
print(one_hot_encoder(np.array([0]), 3))

[[1. 0. 0.]]


# Batch creator

In [15]:
def create_batches(encoded_text, samp_batch_size, seq_len):
  total_chars_batch = samp_batch_size * seq_len
  num_total_batches = int(len(encoded_text)/total_chars_batch)
  enc_txt = encoded_text[:num_total_batches*total_chars_batch]
  enc_txt = enc_txt.reshape(samp_batch_size, -1)

  for n in range(0, enc_txt.shape[1], seq_len):
    x = enc_txt[:,n:n+seq_len]
    y = np.zeros_like(x)

    try:
      y[:,:-1] = x[:,1:]
      y[:, -1] = enc_txt[:,n+seq_len]
    except:
      y[:,:-1] = x[:,1:]
      y[:, -1] = enc_txt[:,0]
    
    yield x,y


In [16]:
arr = np.arange(30)
next(create_batches(arr, 2, 5))

(array([[ 0,  1,  2,  3,  4],
        [15, 16, 17, 18, 19]]), array([[ 1,  2,  3,  4,  5],
        [16, 17, 18, 19, 20]]))

# Create Model

In [17]:
class Model(nn.Module):

  def __init__(self, all_characters, num_hidden=256, num_layers=4, drop_prob=0.5, use_gpu=False ):

    super().__init__()
    self.num_hidden = num_hidden
    self.num_layers = num_layers
    self.drop_prob = 0.5
    self.use_gpu = use_gpu

    self.all_characters = all_characters
    self.decoder = dict(enumerate(all_characters))
    self.encoder = dict((data, idx) for idx,data in self.decoder.items())

    self.lstm = nn.LSTM(len(all_characters), hidden_size=num_hidden, num_layers=num_layers, batch_first=True, dropout=0.5)
    self.dropout = nn.Dropout(drop_prob)
    self.fc_linear = nn.Linear(num_hidden, len(all_characters))
  
  def forward(self, x, hidden):
    lstm_out, hidden = self.lstm(x, hidden)
    drop_out = self.dropout(lstm_out).contiguous().view(-1, self.num_hidden)
    x_out = self.fc_linear(drop_out)

    return x_out, hidden
  
  def hidden(self, batch_size):
    if self.use_gpu:
      hidden = (torch.zeros(self.num_layers, batch_size, self.num_hidden).cuda(),
                torch.zeros(self.num_layers, batch_size, self.num_hidden).cuda())
    else:
      hidden = (torch.zeros(self.num_layers, batch_size, self.num_hidden),
                torch.zeros(self.num_layers, batch_size, self.num_hidden))
    
    return hidden

In [18]:
model = Model(all_characters, num_hidden=512, num_layers=3, drop_prob=0.5, use_gpu=use_gpu)
if use_gpu:
  model = model.cuda()

# Train Validation Split

In [52]:
train_percentage = 0.9
num_train = int(len(encoded_text) * train_percentage)
train_set = encoded_text[:num_train]
val_set = encoded_text[num_train:]

# Loss and Optimizer

In [53]:
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# Variables

In [54]:
epochs = 20
seq_len = 100
batch_size=128
num_unique=max(encoded_text)+1

tracker=0

# Train the model

In [56]:
model.train()

for i in range(epochs):
  hidden_state = model.hidden(batch_size)

  for x, y in create_batches(train_set, batch_size, seq_len):
    tracker += 1

    x = one_hot_encoder(x, num_unique)

    inputs = torch.from_numpy(x).float()
    target = torch.from_numpy(y)

    if use_gpu:
      inputs = inputs.cuda()
      target = target.cuda()
    
    optimizer.zero_grad()
    hidden_state = tuple([state.data for state in hidden_state])
    output, hidden_state = model.forward(inputs, hidden_state)
    loss = criterion(output, target.view(batch_size*seq_len).long())

    loss.backward()

    nn.utils.clip_grad_norm_(model.parameters(), max_norm=5)

    optimizer.step()

    if tracker % 25 == 0:

      model.eval()

      val_losses = []

      hidden_val = model.hidden(batch_size)

      for x_val, y_val in create_batches(val_set, batch_size, seq_len):
        x_val = one_hot_encoder(x_val, num_unique)

        input_val = torch.from_numpy(x_val).float()
        target_val = torch.from_numpy(y_val)

        if use_gpu:
          input_val = input_val.cuda()
          target_val = target_val.cuda()
        
        optimizer.zero_grad()

        hidden_val = tuple([state.data for state in hidden_val])

        out_val, hidden_val = model(input_val, hidden_val)
        val_loss = criterion(out_val, target_val.view(batch_size*seq_len).long())

      val_losses.append(loss.item())
      if tracker % 100 == 0:
        print(f'EPOCH: {i+1}    STEP: {tracker}   LOSS: {loss.item()}')

      model.train()

EPOCH: 1    STEP: 100   LOSS: 3.066499710083008
EPOCH: 1    STEP: 200   LOSS: 2.617043972015381
EPOCH: 1    STEP: 300   LOSS: 2.2426953315734863
EPOCH: 1    STEP: 400   LOSS: 2.0865156650543213
EPOCH: 2    STEP: 500   LOSS: 1.9633419513702393
EPOCH: 2    STEP: 600   LOSS: 1.8567490577697754
EPOCH: 2    STEP: 700   LOSS: 1.8165791034698486
EPOCH: 3    STEP: 800   LOSS: 1.7943572998046875
EPOCH: 3    STEP: 900   LOSS: 1.6966782808303833
EPOCH: 3    STEP: 1000   LOSS: 1.6200647354125977
EPOCH: 3    STEP: 1100   LOSS: 1.6624664068222046
EPOCH: 4    STEP: 1200   LOSS: 1.607407569885254
EPOCH: 4    STEP: 1300   LOSS: 1.603095531463623
EPOCH: 4    STEP: 1400   LOSS: 1.520401120185852
EPOCH: 4    STEP: 1500   LOSS: 1.4756263494491577
EPOCH: 5    STEP: 1600   LOSS: 1.4703242778778076
EPOCH: 5    STEP: 1700   LOSS: 1.4506334066390991
EPOCH: 5    STEP: 1800   LOSS: 1.4619078636169434
EPOCH: 5    STEP: 1900   LOSS: 1.4399521350860596
EPOCH: 6    STEP: 2000   LOSS: 1.449833631515503
EPOCH: 6    STE

In [57]:
torch.save(model.state_dict(), '/content/gdrive/My Drive/Colab Notebooks/NLP_with_pytorch/512Hidden3Layers.pt')

In [58]:
model = Model(all_characters, num_hidden=512, num_layers=3, drop_prob=0.5, use_gpu=use_gpu)
model.load_state_dict(torch.load('/content/gdrive/My Drive/Colab Notebooks/NLP_with_pytorch/512Hidden3Layers.pt'))

<All keys matched successfully>

In [59]:
def predict_next_character(model, char, hidden=None, k=1):
  encoded_text = model.encoder[char]
  encoded_text = one_hot_encoder(np.array([[encoded_text]]), len(model.all_characters))
  input = torch.from_numpy(encoded_text)

  if use_gpu:
    input = input.float().cuda()
  
  hidden = tuple([state.data for state in hidden])
  out, hidden = model(input, hidden)
  probs = F.log_softmax(out, dim=1).data

  probs, indices = probs.topk(k)
  probs = probs.numpy().flatten()
  indices = indices.numpy().squeeze()
  probs = probs/probs.sum()
  chr = np.random.choice(indices, p=probs)
  return model.decoder[chr], hidden

In [60]:
def generate_text(model, size, seed='The', k=1):
    
    if model.use_gpu:
      model.cuda()
    else:
      model.cpu()
    model.eval()
    output = [c for c in seed]

    hidden = model.hidden(1)
    for c in seed:
      out, hidden = predict_next_char(model, c, hidden=hidden, k=k)
    output.append(out)

    for i in range(size):
      out, hidden = predict_next_char(model, output[-1], hidden=hidden, k=k)
      output.append(out)
    return ''.join(output)

In [61]:
shakey = generate_text(model, 500, seed="The ", k=3)

In [62]:
print(shakey)

The way at mine
    this things. If this were-therefore I heard the state,
    If you shall be the chamber, and I have seen his sound
    To see you all a man of my subjocce.  
  CASSIO. I will be so made than he. I will not speak
    The story of this brother and the son,
    And so we will be then, the subjects take till
    What they shall see this son to me to see.
  CAESAR. It shall be so more than this strange anger,
    The senting of the ways of such a parloy
    And stand their beauty too mu


In [None]:
f = open('/content/gdrive/My Drive/Colab Notebooks/NLP_with_pytorch/Data/ai_play.txt', 'w', encoding="utf8")
f.write(shakey)

10005