In [952]:
import torch
import torch.nn as nn
import numpy as np

# set device for pytorch
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cpu


In [953]:
LENGTH = 6

# Get Data (Shakespeare)

In [954]:
# store text in one long string called 'data'
f = open("./shakespeare.txt")
data = f.read()
data = data.replace('\n', ' ')
data = data.replace(' ', '\n')
data = data.replace('\n\n', '\n')
print(data[:36])
print(len(set(data)))

THE
SONNETS
by
William
Shakespeare
F
60


In [955]:
# create dictionary to one-hot encode each characters to
ctoi = {c:i for i, c in enumerate(set(data))}
itoc = {i:c for i, c in enumerate(set(data))}
print(ctoi)
print(itoc)

{'.': 0, 'n': 1, 'U': 2, 't': 3, 's': 4, 'K': 5, 'q': 6, 'u': 7, 'g': 8, 'F': 9, 'O': 10, 'N': 11, 'E': 12, 'v': 13, 'e': 14, 'w': 15, 'B': 16, '-': 17, 'W': 18, 'z': 19, ';': 20, '!': 21, ',': 22, 'j': 23, 'r': 24, 'D': 25, 'G': 26, 'l': 27, ')': 28, 'P': 29, 'c': 30, '\n': 31, 'd': 32, 'M': 33, 'p': 34, 'h': 35, ':': 36, 'T': 37, 'C': 38, 'Y': 39, 'H': 40, 'S': 41, 'y': 42, 'b': 43, 'R': 44, 'a': 45, 'm': 46, 'V': 47, 'i': 48, 'o': 49, "'": 50, 'f': 51, 'L': 52, '?': 53, '(': 54, 'A': 55, 'I': 56, 'k': 57, 'J': 58, 'x': 59}
{0: '.', 1: 'n', 2: 'U', 3: 't', 4: 's', 5: 'K', 6: 'q', 7: 'u', 8: 'g', 9: 'F', 10: 'O', 11: 'N', 12: 'E', 13: 'v', 14: 'e', 15: 'w', 16: 'B', 17: '-', 18: 'W', 19: 'z', 20: ';', 21: '!', 22: ',', 23: 'j', 24: 'r', 25: 'D', 26: 'G', 27: 'l', 28: ')', 29: 'P', 30: 'c', 31: '\n', 32: 'd', 33: 'M', 34: 'p', 35: 'h', 36: ':', 37: 'T', 38: 'C', 39: 'Y', 40: 'H', 41: 'S', 42: 'y', 43: 'b', 44: 'R', 45: 'a', 46: 'm', 47: 'V', 48: 'i', 49: 'o', 50: "'", 51: 'f', 52: 'L',

In [956]:
# create datapoints of data (3 chars), target (3 chars)
x_train = []
y_train = []
for i in range(len(data[:-(LENGTH+1)])):
    x_train.append([ctoi[x] for x in list(data[i:i+LENGTH])])
    y_train.append(ctoi[data[i+LENGTH]])
    
print(x_train[0:5])
print(y_train[0:5])

# to tensor
x_train = torch.Tensor(x_train)
y_train = torch.Tensor(y_train)
print(x_train.shape)
print(y_train.shape)

# to one_hot
x_train = nn.functional.one_hot(x_train.long()).float()
y_train = nn.functional.one_hot(y_train.long()).float()
print(x_train.shape)
print(y_train.shape)

[[37, 40, 12, 31, 41, 10], [40, 12, 31, 41, 10, 11], [12, 31, 41, 10, 11, 11], [31, 41, 10, 11, 11, 12], [41, 10, 11, 11, 12, 37]]
[11, 11, 12, 37, 41]
torch.Size([93911, 6])
torch.Size([93911])
torch.Size([93911, 6, 60])
torch.Size([93911, 60])


In [957]:
x_encoded = nn.functional.one_hot(torch.Tensor([ctoi[x] for x in data]).long()).float()

# Build RNN

In [958]:

class RNN(nn.Module):
    def __init__(self, input_len, input_dim, hidden_dim):
        super(RNN, self).__init__()
        self.hidden_dim = hidden_dim
        self.lin_ht = nn.Linear((input_len*input_dim)+self.hidden_dim, self.hidden_dim)
        self.lin_out = nn.Linear(self.hidden_dim, input_dim)
    
    def forward(self, x, hidden):
        if hidden == None:
            hidden = torch.randn((self.hidden_dim)).to(device)
        
        x = x.flatten()
        xh = torch.cat((x, hidden))
        
        ht = (self.lin_ht(xh)).tanh()
        ot = nn.functional.softmax(self.lin_out(ht), dim=0)
        
        return ht, ot
        

In [959]:
model = RNN(LENGTH, 60, 800)

# Set System Standards

In [960]:
# set loss function as cross entropy loss, optimizer and SGD
criterion = nn.CrossEntropyLoss(label_smoothing=0.2)
optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)

In [961]:
# move everything to CUDA (if possible)
model = model.to(device)
x_train = x_train.to(device)
y_train = y_train.to(device)

In [962]:
from tqdm import trange

def train(model, criterion, optimizer, data, target):

    hidden = None
    correct = 0
    
    # set to train mode
    model.train()
    for i in trange(len(data)):
        
        # label is the current char data[i] and input is the previous char data[i-1]
        optimizer.zero_grad()
        hidden, pred = model(data[i], None if hidden==None else hidden.detach())
        loss = criterion(pred, target[i])
        
        correct += np.argmax(pred.tolist())==np.argmax(target[i].tolist())

        # grad. descent
        loss.backward()
        optimizer.step()
        
        if (i+1)%30000==0:
            print(f"accuracy={correct/30000.0}")
            correct = 0
        

In [963]:
epochs = 1
for t in range(epochs):
    print(f"Epoch {t} -----------------------------------")
    train(model, criterion, optimizer, x_train, y_train)

Epoch 0 -----------------------------------


 32%|███▏      | 30169/93911 [00:19<00:38, 1642.49it/s]

accuracy=0.06336666666666667


 64%|██████▍   | 60159/93911 [00:37<00:21, 1599.72it/s]

accuracy=0.17876666666666666


 96%|█████████▌| 90239/93911 [00:56<00:02, 1651.47it/s]

accuracy=0.19133333333333333


100%|██████████| 93911/93911 [00:58<00:00, 1596.87it/s]


# Generate Text from Trained Model

In [964]:
# switch to eval mode here to prevent gradients from being calculated
model.eval()

# extra characters to make
generate_size = 20

# keep running
start = 250
text = x_encoded[start:start+LENGTH+1]
text = text.reshape(len(text), 60)

hidden = None
for i in range(generate_size):
    hidden, next_chr = model(text[i:i+LENGTH], hidden)
    
    nt = torch.zeros((1, next_chr.shape[0]))
    nt[0, (next_chr.detach().numpy())] = 1
    
    text = torch.cat((text, nt), dim=0)

text_real = [itoc[np.argmax(text[i].tolist())] for i in range(len(text))]

In [965]:
print(''.join(text_real))

ed'st
t....................
