# Generating Shakespeare poetry with RNN

In [3]:
# !pip3 install torchtext
# !pip3 install pytorch-lightning
# !pip3 install keras-tcn --no-dependencies 

In [4]:
import torch 
import torch.nn as nn
import torch.optim as optim

from torch.utils.data import Dataset, DataLoader

from torchtext.vocab import vocab
from torchtext.transforms import VocabTransform
# We only need TF to dowaload the file - an useful function is available in keras
import tensorflow as tf
from collections import OrderedDict
import pytorch_lightning as pl

import numpy as np

# import ipdb

# TF: https://www.tensorflow.org/text/tutorials/text_generation
# https://www.kdnuggets.com/2020/07/pytorch-lstm-text-generation-tutorial.html
# https://pytorch.org/tutorials/intermediate/char_rnn_generation_tutorial.html
# https://pytorch.org/docs/stable/quantization.html#torch.quantization.quantize_dynamic
# https://anie.me/On-Torchtext/

## Preparing the data

In [5]:
# Downloading the file with Shakespare poetry
path_to_file = tf.keras.utils.get_file(
    'shakespeare.txt', 
    'https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt')

In [6]:
path_to_file

'/home/krzysztof/.keras/datasets/shakespeare.txt'

In [7]:
text = open(path_to_file, 'rb').read()
text = text.decode(encoding='utf-8')

## Unique tokens
We assume the atomic token of the text is a letter.

In [8]:
letters = sorted(set(text))
letters = OrderedDict([(i, 1) for i in letters])

In [9]:
vocabulary = vocab(letters)
vocab_transform = VocabTransform(vocabulary)

In [10]:
# vocabulary.vocab.itos_

## Splitting text by letters

In [11]:
splitted_text = list(text)

In [12]:
encoded_text = vocab_transform(splitted_text)

## Building a model

In [162]:
class TinyShakespeare(pl.LightningModule):
    
    def __init__(self, n_letters, embedding_dim = 15, hidden_size = 32):
        super().__init__()
        self.embed = nn.Embedding(n_letters, embedding_dim=embedding_dim)
        self.rnn = nn.LSTM(input_size = embedding_dim, hidden_size = hidden_size, batch_first=True)
        self.final_layer = nn.Linear(hidden_size, n_letters)
        self.softmax = nn.Softmax(-1)
        self.loss_fun = nn.CrossEntropyLoss()
        
    def forward(self, input):
        emb = self.embed(input)
        rnn_output, _ = self.rnn(emb)
        last_vec = rnn_output[:,-1, :]       
        final_output = self.final_layer(last_vec) 
        # print(final_output)
        return self.softmax(final_output)
    
    def training_step(self, batch, batch_idx):
        x, y = batch
        output = self(x)
        loss = self.loss_fun(output, y)
        print(loss)
        return loss
    
    def configure_optimizers(self):
        optimizer = optim.Adam(self.parameters())
        return optimizer

In [163]:
tiny_shakespeare = TinyShakespeare(65, hidden_size=128)

In [164]:
# Interesting embedding's behaviour - it added a dimesnion at the end
sample_input = torch.tensor([3, 10, 56]).reshape((1, 3))

In [165]:
tiny_shakespeare(sample_input)

tensor([[0.0141, 0.0151, 0.0142, 0.0166, 0.0171, 0.0149, 0.0169, 0.0160, 0.0156,
         0.0150, 0.0151, 0.0161, 0.0155, 0.0157, 0.0159, 0.0163, 0.0142, 0.0159,
         0.0150, 0.0148, 0.0162, 0.0136, 0.0154, 0.0150, 0.0160, 0.0157, 0.0138,
         0.0165, 0.0142, 0.0172, 0.0149, 0.0158, 0.0154, 0.0152, 0.0142, 0.0153,
         0.0168, 0.0155, 0.0156, 0.0176, 0.0135, 0.0144, 0.0165, 0.0161, 0.0160,
         0.0140, 0.0147, 0.0151, 0.0153, 0.0145, 0.0160, 0.0155, 0.0167, 0.0161,
         0.0132, 0.0137, 0.0163, 0.0131, 0.0150, 0.0160, 0.0161, 0.0154, 0.0163,
         0.0153, 0.0156]], grad_fn=<SoftmaxBackward0>)

In [206]:
class PoetryDataset(Dataset):
    
    def __init__(self, data, lookback, n_next, jump = 1):
        super().__init__()
        self.data = data
        self.lookback = lookback
        self.n_next = n_next
        self.jump = jump
        self.length = len(data) // lookback+n_next
        self.cardinality = np.unique(data).shape[0]

    def __len__(self):
        return self.length
    
    def __getitem__(self, idx):
        lback = self.data[idx:(idx+self.lookback)]
        nnext = self.data[(idx+self.lookback):(idx+self.lookback+1)]
        
        inp = torch.tensor(lback, dtype = torch.int).reshape((self.lookback))
        
        target = torch.zeros((65))
        target[nnext] = 1
        
        return inp, target        

In [207]:
pd = PoetryDataset(encoded_text, lookback=40, n_next = 1)
pdl = DataLoader(pd, batch_size = 256)

In [208]:
X, y = pd[0]

In [209]:
#tiny_shakespeare(X.reshape(-1, 10)).shape

## Training

In [210]:
optimizer = optim.Adam(tiny_shakespeare.parameters())
loss_fn = nn.CrossEntropyLoss()

In [211]:
# Manually written loop
EPOCHS = 10

for e in range(EPOCHS):

    tiny_shakespeare.train()
    epcoh_loss = 0

    for b in pdl:
        X, y = b
        optimizer.zero_grad()
        
        out = tiny_shakespeare(X)
        loss = loss_fn(out, y)

        loss.backward()
        optimizer.step()             

        epcoh_loss += loss.item()
        
    print(f"{e+1}: {epcoh_loss}")

1: 413.1027307510376
2: 412.6676528453827
3: 412.3336901664734
4: 412.06728959083557
5: 411.80920457839966
6: 411.604553937912
7: 411.4170432090759
8: 411.220999956131
9: 410.99572372436523


KeyboardInterrupt: 

In [None]:
trainer = pl.Trainer(accelerator='gpu', devices=1, max_epochs=5)

In [171]:
def num_to_text(nums, vocabulary):
    out = [vocabulary.lookup_token(n) for n in nums]
    return "".join(out)

def text_to_num(text):
    return vocab_transform(list(text))

def num_to_tensor(num):
    return torch.tensor(num, dtype = torch.int).reshape(1, -1)

def generate_poetry(start, n_next, lookback=10):
    # Translate firts latter to a vector
    start_num = text_to_num(start)
    
    so_far = start_num
    
    for n in range(n_next):
        
        inp = so_far[n:(n + lookback)]
        inp_tensor = num_to_tensor(inp)
        
        out = tiny_shakespeare(inp_tensor).detach()
        letter_idx = np.argmax(out.numpy())
        inp_tensor = num_to_tensor(letter_idx)
        so_far = so_far + [letter_idx]  
    
    return so_far

In [172]:
trainer.fit(model=tiny_shakespeare, train_dataloaders=pdl)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name        | Type             | Params
-------------------------------------------------
0 | embed       | Embedding        | 975   
1 | rnn         | LSTM             | 74.2 K
2 | final_layer | Linear           | 8.4 K 
3 | softmax     | Softmax          | 0     
4 | loss_fun    | CrossEntropyLoss | 0     
-------------------------------------------------
83.6 K    Trainable params
0         Non-trainable params
83.6 K    Total params
0.334     Total estimated model params size (MB)


Training: 0it [00:00, ?it/s]

tensor(4.1744, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.1742, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.1741, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.1738, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.1738, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.1736, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.1732, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.1731, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.1729, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.1724, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.1716, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.1709, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.1699, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.1688, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.1675, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.1657, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.1628, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.1603, device='cuda:0', grad_fn=<DivBack

tensor(4.0989, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0521, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0365, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0521, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0677, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0326, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0911, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0365, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0599, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0599, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0755, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0794, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0677, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0599, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0326, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0677, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0599, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0521, device='cuda:0', grad_fn=<DivBack

tensor(4.0560, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0560, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0755, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0599, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0911, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0716, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0482, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0638, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0287, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0443, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0638, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0599, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0482, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0482, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0716, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0560, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0521, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0248, device='cuda:0', grad_fn=<DivBack

tensor(4.0482, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0443, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0403, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0521, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0599, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0443, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0325, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0482, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0247, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0560, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0599, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0638, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0482, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0521, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0794, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0950, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0443, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0364, device='cuda:0', grad_fn=<DivBack

tensor(4.0364, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0560, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0403, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0482, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0560, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0638, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0521, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0442, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0521, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0403, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0403, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0364, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0638, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0560, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0638, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0599, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0677, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0677, device='cuda:0', grad_fn=<DivBack

tensor(4.0481, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0521, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0286, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0560, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0560, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0755, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0169, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0442, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0521, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0286, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0364, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0521, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0286, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0599, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0638, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0872, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0638, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0481, device='cuda:0', grad_fn=<DivBack

tensor(4.0560, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0911, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0325, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0364, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0169, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0403, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0403, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0599, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0364, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0560, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0755, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0560, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0403, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0208, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0716, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0755, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0560, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0364, device='cuda:0', grad_fn=<DivBack

tensor(4.0481, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0560, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0481, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0520, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0520, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0013, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0520, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0325, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0247, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0520, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0520, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0364, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0286, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0520, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0520, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0520, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0716, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0560, device='cuda:0', grad_fn=<DivBack

tensor(4.0442, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0716, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0794, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0325, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0364, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0325, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0599, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0599, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0286, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0520, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0520, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0520, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0599, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0560, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0794, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0442, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0481, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0442, device='cuda:0', grad_fn=<DivBack

tensor(4.0520, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0403, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0677, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0442, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0403, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0247, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0286, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0911, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0560, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0364, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0794, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0403, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0560, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0481, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0755, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0638, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0442, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0794, device='cuda:0', grad_fn=<DivBack

tensor(4.0403, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0638, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0481, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0442, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0247, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0481, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0716, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0677, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.1028, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0599, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0638, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0638, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0481, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0325, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0560, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0364, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0520, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0481, device='cuda:0', grad_fn=<DivBack

tensor(4.0794, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0442, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0481, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0442, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0560, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0364, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0442, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0208, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0560, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0442, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0872, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0716, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0520, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0520, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0599, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0364, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0520, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0403, device='cuda:0', grad_fn=<DivBack

tensor(4.0559, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0481, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0599, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0364, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0520, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0364, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0325, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0364, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0481, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0520, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0560, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0560, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0520, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0638, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0638, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0247, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0403, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0599, device='cuda:0', grad_fn=<DivBack

tensor(4.0481, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0442, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0442, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0677, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0481, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0638, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0599, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0638, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0560, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0599, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0950, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0872, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0638, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0403, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0989, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0599, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0403, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0599, device='cuda:0', grad_fn=<DivBack

tensor(4.0833, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0716, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0794, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0716, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.1067, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0599, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0677, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0677, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0638, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0638, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0559, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0364, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0130, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0520, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0247, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0364, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0403, device='cuda:0', grad_fn=<DivBackward1>)
tensor(4.0325, device='cuda:0', grad_fn=<DivBack

In [77]:
out = generate_poetry('wit', 10)

In [78]:
out

[61, 47, 58, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]

In [79]:
num_to_text(out, vocabulary)

'wit          '

In [54]:
vocabulary.lookup_token(63)

'y'

In [53]:
vocabulary.get_stoi()

{'y': 63,
 'w': 61,
 'u': 59,
 'O': 27,
 'N': 26,
 'M': 25,
 'a': 39,
 'e': 43,
 'P': 28,
 'L': 24,
 'R': 30,
 'K': 23,
 'E': 17,
 'i': 47,
 'I': 21,
 'H': 20,
 'G': 19,
 'B': 14,
 'F': 18,
 'm': 51,
 'g': 45,
 'l': 50,
 '$': 3,
 "'": 5,
 '-': 7,
 '&': 4,
 'o': 53,
 'x': 62,
 '.': 8,
 'c': 41,
 ':': 10,
 'D': 16,
 ' ': 1,
 ',': 6,
 'J': 22,
 't': 58,
 '3': 9,
 'r': 56,
 'A': 13,
 'S': 31,
 ';': 11,
 'C': 15,
 '?': 12,
 'Q': 29,
 'V': 34,
 'v': 60,
 'W': 35,
 'Y': 37,
 'z': 64,
 'T': 32,
 '!': 2,
 'U': 33,
 'X': 36,
 'b': 40,
 'd': 42,
 'f': 44,
 '\n': 0,
 'n': 52,
 'k': 49,
 'p': 54,
 'Z': 38,
 'q': 55,
 'j': 48,
 'h': 46,
 's': 57}