In [1]:
from urllib.request import urlretrieve
import numpy as np
import torch, torch.nn as nn
from torch.autograd import Variable
import torch.nn.functional as F

In [2]:
urlretrieve("http://www.gutenberg.org/files/100/100-0.txt", f'./shakespeare.txt')

('./shakespeare.txt', <http.client.HTTPMessage at 0x7fbbf94f1860>)

In [3]:
text = open('./shakespeare.txt').read()

In [4]:
text = text.replace('\n', '').replace('\t', '').replace('æ','')

In [5]:
text[20000:20200]

'im thou art bright,And dost him grace when clouds do blot the heaven:So flatter I the swart-complexioned night,When sparkling stars twire not thou gild’st the even.  But day doth daily draw my sorrows'

In [6]:
chars = sorted(list(set(text)))
vocab_size = len(chars)+1
print('total chars:', vocab_size)

total chars: 97


In [7]:
''.join(chars)

' !"#$%&\'()*,-./0123456789:;?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]_`abcdefghijklmnopqrstuvwxyz|}Æà—‘’“”\ufeff'

In [8]:
char_indices = dict((c, i) for i, c in enumerate(chars))
indices_char = dict((i, c) for i, c in enumerate(chars))

In [9]:
idx = [char_indices[c] for c in text]

idx[:20]

[95, 44, 77, 74, 69, 64, 62, 79, 0, 35, 80, 79, 64, 73, 61, 64, 77, 66, 92, 78]

In [10]:
''.join(indices_char[i] for i in idx[:20])

'\ufeffProject Gutenberg’s'

In [11]:
c1_dat = [idx[i]   for i in range(0, len(idx)-3, 3)]
c2_dat = [idx[i+1] for i in range(0, len(idx)-3, 3)]
c3_dat = [idx[i+2] for i in range(0, len(idx)-3, 3)]
c4_dat = [idx[i+3] for i in range(0, len(idx)-3, 3)]

In [12]:
x1 = np.stack(c1_dat)
x2 = np.stack(c2_dat)
x3 = np.stack(c3_dat)

In [13]:
y = np.stack(c4_dat)

In [14]:
x1[:4], x2[:4], x3[:4]

(array([95, 74, 62, 35]), array([44, 69, 79, 80]), array([77, 64,  0, 79]))

In [15]:
n_hidden = 256
embedding_length = 42

In [16]:
class Model(nn.Module):
    def __init__(self, vocab_size, n_fac):
        super().__init__()
        self.e = nn.Embedding(vocab_size, n_fac)
        self.l_in = nn.Linear(n_fac, n_hidden)
        self.l_hidden = nn.Linear(n_hidden, n_hidden)
        self.l_out = nn.Linear(n_hidden, vocab_size)
        
    def forward(self, c1, c2, c3):
        in1 = F.relu(self.l_in(self.e(c1)))
        in2 = F.relu(self.l_in(self.e(c2)))
        in3 = F.relu(self.l_in(self.e(c3)))
        
        h = Variable(torch.zeros(in1.size()).cuda())
        h = F.tanh(self.l_hidden(h+in1))
        h = F.tanh(self.l_hidden(h+in2))
        h = F.tanh(self.l_hidden(h+in3))
        
        return F.log_softmax(self.l_out(h), dim=1)

In [17]:
model = Model(vocab_size, embedding_length).cuda()

In [18]:
class DataGenerator():
    def __init__(self, dset, bs=1):
        self.dset = torch.LongTensor(dset).cuda()
        self.len = len(self.dset)
        self.idx = 0
        self.bs = bs
    def __len__(self):
        return len(self.dset)
    def __iter__(self):
        return self
    def __next__(self):
        batch = [t for t in [torch.LongTensor(s.T).cuda() for s in np.stack([self.dset[i] for i in range(self.idx, self.idx + self.bs)]).T]] 
        self.idx = self.idx + self.bs
        if self.idx > self.len - self.bs:
            raise StopIteration
        return batch

#Just for demo purposes:

In [19]:
it = DataGenerator(np.stack([x1,x2,x3,y], axis=1), bs=2)
next(it)

[
  95
  74
 [torch.cuda.LongTensor of size 2 (GPU 0)], 
  44
  69
 [torch.cuda.LongTensor of size 2 (GPU 0)], 
  77
  64
 [torch.cuda.LongTensor of size 2 (GPU 0)], 
  74
  62
 [torch.cuda.LongTensor of size 2 (GPU 0)]]

In [20]:
it = DataGenerator(np.stack([x1,x2,x3,y], axis=1), bs=2)
*Xs, yt = next(it)
t = model(*[Variable(x) for x in Xs])
print(t)

Variable containing:

Columns 0 to 9 
-4.6944 -4.5044 -4.4879 -4.6572 -4.7816 -4.7845 -4.6319 -4.7057 -4.3343 -4.5623
-4.5793 -4.5071 -4.4632 -4.8201 -5.0781 -4.6080 -4.8487 -4.7772 -4.5044 -4.6866

Columns 10 to 19 
-4.7588 -4.5446 -4.7652 -4.4265 -4.8748 -4.4213 -4.5453 -4.7087 -4.5867 -4.6358
-4.5429 -4.6383 -4.5555 -4.4805 -4.6145 -4.5346 -4.8363 -4.7398 -4.4736 -4.4661

Columns 20 to 29 
-4.2845 -4.6128 -4.6387 -4.8540 -4.5047 -4.6851 -4.4222 -4.7024 -4.8334 -4.6780
-4.4980 -4.6362 -4.5134 -4.6189 -4.5550 -4.7069 -4.4872 -4.6678 -4.8293 -4.7159

Columns 30 to 39 
-4.6676 -4.4244 -4.6224 -4.4371 -4.8172 -4.6989 -4.4100 -4.5136 -4.5534 -4.5819
-4.4216 -4.7453 -4.5410 -4.6807 -4.5857 -4.8709 -4.5449 -4.6125 -4.7540 -4.5782

Columns 40 to 49 
-4.1439 -4.3563 -4.4975 -4.7177 -4.4925 -4.7223 -4.3293 -4.5946 -4.7192 -4.6504
-4.3292 -4.0720 -4.5959 -4.7095 -4.5451 -4.6388 -4.6203 -4.7527 -4.7691 -4.4073

Columns 50 to 59 
-4.6581 -4.7156 -4.8316 -4.4920 -4.7279 -4.3553 -4.5645 -4.2831 -4.

In [27]:
optimizer = torch.optim.Adam(model.parameters(), 1e-3)

In [28]:
for epoch in range(5):
    stacked_training_data = DataGenerator(np.stack([x1,x2,x3,y], axis=1), bs=1024)
    i = 0
    loss = 0
    for *X, Y in stacked_training_data:
        i = i + 1
        optimizer.zero_grad()
        y_pred = model(*[Variable(x) for x in X])
        loss = F.nll_loss(y_pred, Variable(Y, requires_grad=False))
        loss.backward()
        optimizer.step()
    print(loss.data)


 2.4512
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 2.4202
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 2.4117
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 2.3916
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 2.3848
[torch.cuda.FloatTensor of size 1 (GPU 0)]



In [23]:
def get_next(inp):
    idxs = torch.LongTensor(np.array([char_indices[c] for c in inp])).cuda()
    p = model(*Variable(idxs))
    i = np.argmax(p.data)
    return chars[i]

In [24]:
get_next('y. ')

'G'

In [25]:
def get_next_n(inp, n):
    res = inp
    for i in range(n):
        c = get_next(inp)
        res += c
        inp = inp[1:]+c
    return res

In [29]:
get_next_n('ye ', 100)

'ye with the with the with the with the with the with the with the with the with the with the with the w'