In [6]:
import string
import numpy.random as random
import re

# note: we can build our own char base from reading the file
all_chars = string.printable
n_chars = len(all_chars) # total number of characters

with open('input.txt', 'r') as file:
    text = file.read()

text_len = len(text)
print('text_len =', text_len)

text_len = 1115394


In [10]:
seq_len = 200

def random_seq():
    start = random.randint(0, text_len - seq_len + 1) # numpy random gives int [low, high) hence the +1
    end = start + seq_len
    return text[start:end]

print(random_seq())


PARIS:
I do defy thy conjurations,
And apprehend thee for a felon here.

ROMEO:
Wilt thou provoke me? then have at thee, boy!

PAGE:
O Lord, they fight! I will go call the watch.

PARIS:
O, I am slai


In [54]:
import torch
import torch.nn as nn
from torch.autograd import Variable
import torch.nn.functional as F

class CharSeqRNN(nn.Module):
    def __init__(self, vocab_size, embed_dim, hidden_dim, batch_size):
        super(self.__class__, self).__init__()
        
        self.vocab_size = vocab_size # number of chars for this case
        self.hidden_dim = hidden_dim
        self.embed_dim = embed_dim # we could keep this same as hidden dim to reduce one variable
        
        self.encode = nn.Embedding(vocab_size, embed_dim)
        self.rnn = nn.LSTM(embed_dim, hidden_dim, num_layers=1, batch_first=True) # we can try dropout
        self.decode = nn.Linear(hidden_dim, vocab_size)

    def forward(self, inp, hidden):
        inp = self.encode(inp) #input must be N x T
        output, hidden = self.rnn(inp, hidden)
        output = self.decode(output)
        #output = F.log_softmax(output, dim=2) # we can  do this at output
        return output, hidden

    def init_hidden(self, batch_size):
        return (Variable(torch.zeros(1, batch_size, self.hidden_dim)),
                Variable(torch.zeros(1, batch_size, self.hidden_dim)))

In [78]:
def char_index(chars):
    return Variable(torch.LongTensor([all_chars.index(c) for c in chars]).view(1,-1))

print(char_index("abcDEF"))

Variable containing:
 10  11  12  39  40  41
[torch.LongTensor of size (1,6)]



In [79]:
def training_batch(batch_size):
    chars_in = []
    chars_out = []
    for i in range(batch_size):
        char_seq = random_seq()
        chars_in.append(char_index(char_seq[:-1]))
        chars_out.append(char_index(char_seq[1:]))
    chars_in = torch.cat(chars_in, dim=0)
    chars_out = torch.cat(chars_out, dim=0)
    return chars_in, chars_out

c_in, c_out = training_batch(1)
#print(c_in)

In [88]:
def run(init_str='A', length=200, temp=0.4):
    hidden = model.init_hidden(1)
    pred = init_str
    if len(init_str) > 1:
        input = char_index(init_str[:-1])
        _, hidden = model(input, hidden)
    
    input = char_index(init_str[-1])
    
    for i in range(length):
        output, hidden = model(input, hidden)
        
        output_dist = F.softmax(output.view(-1)/temp, dim=0).data
        idx = torch.multinomial(output_dist, 1)[0]
        pred_char = all_chars[idx]
        pred += pred_char
        input = char_index(pred_char)
    return pred

In [81]:
import time, math

def time_since(since):
    s = time.time() - since
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)

In [82]:
def train(batch_size):
    hidden = model.init_hidden(batch_size)
    model.zero_grad()
    loss = 0
    c_in, c_out = training_batch(batch_size)
    
    output, hidden = model(c_in, hidden)
    loss = criterion(output.view(-1, n_chars), c_out.view(-1))
    
    loss.backward()
    optimizer.step()
    
    return loss.data[0]
    

In [100]:
epochs = 2000
print_fq = 20
plot_fq = 10


hidden_dim = 128
batch_size = 64
model = CharSeqRNN(n_chars, 128, hidden_dim, batch_size)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

criterion = nn.CrossEntropyLoss()

start = time.time()
losses = []
loss_avg = 0

for epoch in range(1, epochs+1):
    loss1 = train(batch_size)
    loss_avg += loss1
    if epoch % print_fq == 0:
        print('[%s (%d %d%%) %.4f]' % (time_since(start), epoch, epoch / epochs * 100, loss1))
        print(run('\n', 150, 0.5), '\n')

    if epoch % plot_fq == 0:
        losses.append(loss_avg / plot_fq)
        loss_avg = 0
    
#print('[%s (%d %d%%) %.4f]' % (time_since(start), epoch, epoch / epochs * 100, loss1))


[0m 31s (20 1%) 3.3236]

whn st too ta oh  eis he  nooe tt tatoo roatoi att in anso  dy tthe  d taee tl oe t e ant ti oi t o  to he ema o r   t aoe etaeh esth t toee 

[1m 2s (40 2%) 2.9280]

]8E^ inree wid ta the the the theer se the

Osy nd ot thers d me thanh the for y crore c
de oonr menut the ohe fois coy ass he nd the the cant se antt 

[1m 32s (60 3%) 2.6810]

6ECto wwaous teind ithe al hen the ithe e nered had hean mind
B od ant thaor oy meato serere seres her hant wilr the as the hot yoo ar sato win withe  

[2m 3s (80 4%) 2.5475]

W,
Ahe ant mang sous the he nou the meret orl tour the  ag ous dat ares wines me herenc
I wor the melk, orout wis the than the ot the louncre sit me a 

[2m 33s (100 5%) 2.4180]

ARO:
OIRNA:

BRLEHNIOE:
That the the he me han thore ig herst fil an thare is th out an mith the thind houl pare the the and you dind ait mers bese so 

[3m 4s (120 6%) 2.3634]

USARDUTCACRA:
AD:

ARUED:
And ars lathe here the don got he ben me ar to the the me sherke the t

[30m 46s (940 47%) 1.6702]


PETRUCHIO:
Be the for my lord. When so thiss take in my seet of the sings
Doke his see his slay the can, the to my beard
the may the rest the dought  

[31m 1s (960 48%) 1.6903]


Second that I that for the fear her so should thy lord;
Now I we will made the have shall therererence.

First EDWARD II:
What not like the soulthred 

[31m 31s (980 49%) 1.6599]


Second I would beath the commbinger the server thou good son the seath that so the proser,
I hath the word you are for of me the be shill many;
And w 

[32m 2s (1000 50%) 1.6693]

Master the constrest here a south strong make the shall us my man
And shall not is a meed bark, for the cause of the recess.

BRUTUS:
What sir thy som 

[32m 33s (1020 51%) 1.6499]

What he word forture and be that my good will the bear
And good will not the conseed and amselved
And was see what thee for a spition some and my ligh 

[33m 3s (1040 52%) 1.6973]


TRIOLO:
For the have be proughter in the contised.

CORIOLANUS:
I 

[327m 32s (1860 93%) 1.5572]

CORIOLANUS:
Now, make the poor to my but we will not all the great so mean,
I prove me to my lords, and we be the seems the purse
That say, bette the  

[328m 2s (1880 94%) 1.5727]

See when I have stand in the seave as before thou drays,
That should prove for the live me like and shall be
The reaths the will maids and good speak  

[328m 32s (1900 95%) 1.5720]


GREMO:
Come of the gentle will mean, for stoon the part
And the sacking to the grace price of my father,
And with him to Warwick is the gates and bea 

[329m 3s (1920 96%) 1.5535]

See he do I leave my sunder words.

SICINIUS:
I can the should the king it me that we are the barded for and lord.

LEONTES:
And I will the earth our  

[329m 34s (1940 97%) 1.5266]

This thou have you shall nather the are them
And be sear the cause her bone a seemon the since,
That is the word of my hands, in the field;
And see th 

[330m 4s (1960 98%) 1.5466]

That the lives and save the hand, there of the night.

KIN

In [102]:
print("After training")
print(run('\n', 500, 0.2))

After training


Second Citizen:
The send the world the man the man thee and have her disconself.

CORIOLANUS:
What is the strange the father with the more to his death.

PETRUCHIO:
What have so so stand the strange the streast the father son,
And so come that the stranger and the send the strings
I will not the good be the man the stands the have
That thou are the seep the stand the world the store,
The stand the word to the word so have the father.

CORIOLANUS:
The prove the sunter the string the stoon the wo
