In [14]:
import torch 
import torch.nn as nn
import numpy as np
from torch.autograd import Variable
import os


In [15]:
class Dictionary(object):
    def __init__(self):
        self.word2idx = {}
        self.idx2word = {}
        self.idx = 0
    
    def add_word(self, word):
        if not word in self.word2idx:
            self.word2idx[word] = self.idx
            self.idx2word[self.idx] = word
            self.idx += 1
    
    def __len__(self):
        return len(self.word2idx)
    
class Corpus(object):
    def __init__(self, path='./data'):
        self.dictionary = Dictionary()
        self.train = os.path.join(path, 'train.txt')
        self.test = os.path.join(path, 'test.txt')

    def get_data(self, path, batch_size=20):
        # Add words to the dictionary
        with open(path, 'r') as f:
            tokens = 0
            for line in f:
                words = line.split() + ['<eos>']
                tokens += len(words)
                for word in words: 
                    self.dictionary.add_word(word)  
        
        # Tokenize the file content
        ids = torch.LongTensor(tokens)
        token = 0
        with open(path, 'r') as f:
            for line in f:
                words = line.split() + ['<eos>']
                for word in words:
                    ids[token] = self.dictionary.word2idx[word]
                    token += 1
        num_batches = ids.size(0) // batch_size
        ids = ids[:num_batches*batch_size]
        return ids.view(batch_size, -1)
    
# RNN Based Language Model
class RNNLM(nn.Module):
    def __init__(self, vocab_size, embed_size, hidden_size, num_layers):
        super(RNNLM, self).__init__()
        self.embed = nn.Embedding(vocab_size, embed_size)
        self.lstm = nn.LSTM(embed_size, hidden_size, num_layers, batch_first=True)
        self.linear = nn.Linear(hidden_size, vocab_size)
        self.init_weights()
        
    def init_weights(self):
        self.embed.weight.data.uniform_(-0.1, 0.1)
        self.linear.bias.data.fill_(0)
        self.linear.weight.data.uniform_(-0.1, 0.1)
        
    def forward(self, x, h):
        # Embed word ids to vectors
        x = self.embed(x) 
        
        # Forward propagate RNN  
        out, h = self.lstm(x, h)
        
        # Reshape output to (batch_size*sequence_length, hidden_size)
        out = out.contiguous().view(out.size(0)*out.size(1), out.size(2))
        
        # Decode hidden states of all time step
        out = self.linear(out)  
        return out, h


In [16]:
# Hyper Parameters
embed_size = 128
hidden_size = 1024
num_layers = 1
num_epochs = 5
num_samples = 1000   # number of words to be sampled
batch_size = 20
seq_length = 30
learning_rate = 0.002

In [17]:
# Load Shakespeare Dataset
train_path = './data/shakespeare.txt'
sample_path = './sample.txt'
corpus = Corpus()
ids = corpus.get_data(train_path, batch_size)
vocab_size = len(corpus.dictionary)
num_batches = ids.size(1) // seq_length


In [18]:
model = RNNLM(vocab_size, embed_size, hidden_size, num_layers)

# Loss and Optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [19]:
# Truncated Backpropagation 
def detach(states):
    return [Variable(state.data) for state in states] 


In [20]:
# Training
for epoch in range(num_epochs):
    # Initial hidden and memory states
    states = (Variable(torch.zeros(num_layers, batch_size, hidden_size)),
              Variable(torch.zeros(num_layers, batch_size, hidden_size)))
    
    for i in range(0, ids.size(1) - seq_length, seq_length):
        # Get batch inputs and targets
        inputs = Variable(ids[:, i:i+seq_length])
        targets = Variable(ids[:, (i+1):(i+1)+seq_length].contiguous())
        
        # Forward + Backward + Optimize
        model.zero_grad()
        states = detach(states)
        outputs, states = model(inputs, states) 
        loss = criterion(outputs, targets.view(-1))
        loss.backward()
        torch.nn.utils.clip_grad_norm(model.parameters(), 0.5)
        optimizer.step()

        step = (i+1) // seq_length
        if step % 100 == 0:
            print ('Epoch [%d/%d], Step[%d/%d], Loss: %.3f, Perplexity: %5.2f' %
                   (epoch+1, num_epochs, step, num_batches, loss.data[0], np.exp(loss.data[0])))


Epoch [1/5], Step[0/1714], Loss: 11.125, Perplexity: 67864.73
Epoch [1/5], Step[100/1714], Loss: 6.953, Perplexity: 1045.93
Epoch [1/5], Step[200/1714], Loss: 6.975, Perplexity: 1070.05
Epoch [1/5], Step[300/1714], Loss: 6.429, Perplexity: 619.43
Epoch [1/5], Step[400/1714], Loss: 6.570, Perplexity: 713.67
Epoch [1/5], Step[500/1714], Loss: 6.059, Perplexity: 427.86
Epoch [1/5], Step[600/1714], Loss: 6.157, Perplexity: 471.82
Epoch [1/5], Step[700/1714], Loss: 6.173, Perplexity: 479.39
Epoch [1/5], Step[800/1714], Loss: 6.671, Perplexity: 789.55
Epoch [1/5], Step[900/1714], Loss: 6.059, Perplexity: 427.95
Epoch [1/5], Step[1000/1714], Loss: 6.040, Perplexity: 419.99
Epoch [1/5], Step[1100/1714], Loss: 6.056, Perplexity: 426.59
Epoch [1/5], Step[1200/1714], Loss: 5.678, Perplexity: 292.38
Epoch [1/5], Step[1300/1714], Loss: 5.352, Perplexity: 210.96
Epoch [1/5], Step[1400/1714], Loss: 6.047, Perplexity: 423.02
Epoch [1/5], Step[1500/1714], Loss: 5.751, Perplexity: 314.36
Epoch [1/5], St

In [21]:
# Sampling
with open(sample_path, 'w') as f:
    # Set intial hidden ane memory states
    state = (Variable(torch.zeros(num_layers, 1, hidden_size)),
         Variable(torch.zeros(num_layers, 1, hidden_size)))

    # Select one word id randomly
    prob = torch.ones(vocab_size)
    input = Variable(torch.multinomial(prob, num_samples=1).unsqueeze(1),
                     volatile=True)
    s = ""
    for i in range(num_samples):
        # Forward propagate rnn 
        output, state = model(input, state)
        
        # Sample a word id
        prob = output.squeeze().data.exp()
        word_id = torch.multinomial(prob, 1)[0]
        
        # Feed sampled word id to next time step
        input.data.fill_(word_id)
        
        # File write
        word = corpus.dictionary.idx2word[word_id]
        word = '\n' if word == '<eos>' else word + ' '
        s += word
    print (s)


from the place. 
But Our eternal wife, Leave all your arms 
To dew of wife when I have bank'd and prune 
The cock, his house with all modest men. 
GLOUCESTER. Some things, dear gentle wife, unless a goodly man 
Defy me on a great and crown'd shame? 
Sir M. Doubt not, my lord. 
TIMON. Hear that thou diest. 
MESSENGER. He dares not. 
CHARMIAN. I know you then. That you confess, 
In this division of your great times 
I give him back your thanks, and weeds away. 
First, welcome to King John and meet me here. 
CITIZEN. Ha! Will not you lose your Grace's blow! 
KING HENRY. The reasons of the bad are by your house, 
Because our mettle is a law so lightly 
In this same book of Fortune? knife are come 
And tell how question ceremony. So was our death; 
Here in the lobby. 
MALCOLM. What shall this be? 
MACDUFF. Upon all fortunes, you come hither 
How honour in our time your purpos'd service 
In light and feeding, I am Egypt's hand, being rank, 
Nor many misery, have accus'd a right. 
Doug. Here 