In [57]:
import torch
import torch.nn as nn
import numpy as np

# Get Data (Shakespeare)

In [94]:
# store text in one long string called 'data'
f = open("./shakespeare.txt")
data = f.read()

In [95]:
# create dictionary to one-hot encode each characters to
char2int = dict([(c, i) for i, c in enumerate(set(data))])
int2char = dict([(char2int[k], k) for k in char2int])

In [96]:
print(char2int)
print(int2char)

{'W': 0, 'f': 1, 'D': 2, 'v': 3, 'g': 4, 'M': 5, 'V': 6, "'": 7, 'L': 8, 'A': 9, 'z': 10, 'n': 11, '(': 12, 'k': 13, '-': 14, ';': 15, 'o': 16, 'J': 17, 'h': 18, 'q': 19, 'N': 20, 'x': 21, 'd': 22, 'b': 23, 'm': 24, 'E': 25, ':': 26, 's': 27, ' ': 28, 'S': 29, 'a': 30, 'C': 31, 'R': 32, 'T': 33, '!': 34, 'B': 35, 'I': 36, 'Y': 37, 'K': 38, 'j': 39, 'O': 40, '.': 41, 'u': 42, 'H': 43, ')': 44, 'y': 45, 'i': 46, 'p': 47, 't': 48, 'l': 49, '?': 50, 'w': 51, 'c': 52, ',': 53, '\n': 54, 'P': 55, 'G': 56, 'e': 57, 'r': 58, 'U': 59, 'F': 60}
{0: 'W', 1: 'f', 2: 'D', 3: 'v', 4: 'g', 5: 'M', 6: 'V', 7: "'", 8: 'L', 9: 'A', 10: 'z', 11: 'n', 12: '(', 13: 'k', 14: '-', 15: ';', 16: 'o', 17: 'J', 18: 'h', 19: 'q', 20: 'N', 21: 'x', 22: 'd', 23: 'b', 24: 'm', 25: 'E', 26: ':', 27: 's', 28: ' ', 29: 'S', 30: 'a', 31: 'C', 32: 'R', 33: 'T', 34: '!', 35: 'B', 36: 'I', 37: 'Y', 38: 'K', 39: 'j', 40: 'O', 41: '.', 42: 'u', 43: 'H', 44: ')', 45: 'y', 46: 'i', 47: 'p', 48: 't', 49: 'l', 50: '?', 51: 'w', 

In [97]:
# one-hot encode and move to Tensors
x_encoded = nn.functional.one_hot((torch.Tensor([char2int[x] for x in data]).long()))

In [98]:
# example of reverting tensor back to single char for use later
int2char[np.argmax(x_encoded[1].tolist())]

'H'

In [99]:
print(x_encoded.shape)

torch.Size([94275, 61])


# Build RNN

In [106]:

class RNN(nn.Module):
    def __init__(self, hidden_dim):
        super(RNN, self).__init__()

        self.hidden_dim = hidden_dim
        
        self.in_linear = nn.Linear(61, 100, bias=False)
        self.hid_linear = nn.Linear(self.hidden_dim, 100)
        self.out_linear = nn.Linear(100, 61)

    def forward(self, x, hidden):
        if hidden == None:
            hidden = torch.zeros(self.hidden_dim).to(device)
        ht = nn.functional.tanh(self.in_linear(x) + self.hid_linear(hidden))
        ot = self.out_linear(ht)
        return ht, ot
        

In [107]:
model = RNN(200)

# Set System Standards

In [108]:
# set device for pytorch
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

# set loss function as cross entropy loss, optimizer and SGD
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)

cuda:0


In [109]:
# move everything to CUDA
model = model.to(device)
x_encoded = x_encoded.float().to(device)

In [110]:
from tqdm import trange

def train(model, criterion, optimizer, data):

    hidden = None
    # set to train mode
    model.train()
    for i in trange(len(data)):
        # skip first index, since at this point we have no context to train with
        if i == 0:
            continue

        # label is the current char data[i] and input is the previous char data[i-1]
        optimizer.zero_grad()
        hidden, pred = model(data[i-1], None)
        loss = criterion(pred, data[i])

        # grad. descent
        loss.backward()
        optimizer.step()
        

In [111]:
epochs = 5
for t in range(epochs):
    print(f"Epoch {t} -----------------------------------")
    train(model, criterion, optimizer, x_encoded)

Epoch 0 -----------------------------------


100%|█████████████████████████████████████████████| 94275/94275 [01:51<00:00, 846.22it/s]


Epoch 1 -----------------------------------


100%|█████████████████████████████████████████████| 94275/94275 [01:51<00:00, 847.06it/s]


Epoch 2 -----------------------------------


100%|█████████████████████████████████████████████| 94275/94275 [01:51<00:00, 847.90it/s]


Epoch 3 -----------------------------------


100%|█████████████████████████████████████████████| 94275/94275 [01:51<00:00, 847.68it/s]


Epoch 4 -----------------------------------


100%|█████████████████████████████████████████████| 94275/94275 [01:50<00:00, 853.00it/s]


# Generate Text from Trained Model

In [138]:
# switch to eval mode here to prevent gradients from being calculated
model.eval()

# extra characters to make
generate_size = 100

# keep running
text = x_encoded[0]
text = text.reshape(1, 61)

hidden = None
for i in range(generate_size):
    hidden, next_chr = model(text[i], hidden)
    print(text.shape)
    print(hidden.shape)
    text = torch.cat((text, next_chr.reshape(1, next_chr.shape[0])), dim=0)

text_real = [int2char[np.argmax(text[i].tolist())] for i in range(len(text))]

torch.Size([1, 61])
torch.Size([100])


RuntimeError: mat1 and mat2 shapes cannot be multiplied (1x100 and 200x100)