In [1]:
import torch
import torch.nn as nn
from torch.autograd import Variable
import numpy as np

class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers=1):
        super(RNN, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.num_layers = num_layers
        
        self.gru = nn.GRU(input_size, hidden_size, num_layers, batch_first=True)
        self.decoder = nn.Linear(hidden_size, output_size)
    
    def forward(self, input, hidden):
        # input = self.encoder(input)
        output, hidden = self.gru(input, hidden)
        output = self.decoder(output)
        return output, hidden

    def init_hidden(self, batch_size):
        if batch_size == 0:
            return Variable(torch.zeros(self.num_layers, self.hidden_size))
        else:   
            return Variable(torch.zeros(self.num_layers, batch_size, self.hidden_size))

In [2]:
chunk_len = 200

def train(inp, target):
    hidden = decoder.init_hidden(inp.size(0))
    decoder.zero_grad()

    logits, hidden = decoder(inp, hidden)
    loss = criterion(logits.permute(0, 2, 1), target.permute(0, 2, 1))

    loss.backward()
    decoder_optimizer.step()

    return loss

In [3]:
def get_vocabulary_and_mappings (chars):
    vocabulary = list(set(chars))
    vocabulary.sort()
    print("Vocabulary size: ", len(vocabulary))
    
    index_to_char = {}
    char_to_index = {}
    
    for idx, char in enumerate(vocabulary):
        index_to_char[idx] = char
        char_to_index[char] = idx
    
    return vocabulary, index_to_char, char_to_index

def get_batch(chars, char_to_index, V, sequence_size, batch_size):
    x = torch.empty((batch_size, sequence_size, V))
    y = torch.empty((batch_size, sequence_size, V))
    
    chars = torch.tensor([char_to_index[char] for char in chars])

    batch_id = 0
    sequence_id = 0
    num_chars = len(chars)
    num_sequences = (num_chars-1) // sequence_size
    
    for i in range(num_sequences):
        start_idx = i * sequence_size
        end_idx = start_idx + sequence_size
        
        chars_in = chars[start_idx : end_idx]
        chars_out = chars[start_idx + 1 : end_idx + 1]
        
        x[batch_id] = nn.functional.one_hot(chars_in, V)
        y[batch_id] = nn.functional.one_hot(chars_out, V)
        
        batch_id += 1
        
        if batch_id == batch_size:
            batch_id = 0
            yield x, y
    
    last_batch_size = num_sequences % batch_size
    if last_batch_size != 0:
        yield x[:last_batch_size], y[:last_batch_size]

In [4]:
import string
corpus = open('input.txt', 'r').read()
chars = [char for char in corpus if char.isalpha() or char in string.punctuation or char =='\n' or char == ' ']
vocabulary, index_to_char, char_to_index = get_vocabulary_and_mappings(chars)
V = len(vocabulary)

Vocabulary size:  60


In [5]:
def evaluate(prime_str='A', predict_len=100, temperature=0.8):
    hidden = decoder.init_hidden(0)  # batch size is 1
    char_x = torch.tensor(char_to_index[prime_str])
    x = nn.functional.one_hot(char_x, V).reshape(1, V).to(torch.float32)
    predicted = prime_str

    for p in range(predict_len):
        output, hidden = decoder(x, hidden)

        # Sample from the network as a multinomial distribution
        output_dist = output.data.view(-1).div(temperature).exp()
        top_i = torch.multinomial(output_dist, 1)[0]

        # Add predicted character to string and use as next input
        predicted_char = index_to_char[top_i.item()]
        predicted += predicted_char
        x = nn.functional.one_hot(top_i, V).reshape(1, V).to(torch.float32)

    return predicted

In [6]:
import time, math

def time_since(since):
    s = time.time() - since
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)

In [None]:
n_epochs = 2000
print_every = 10
plot_every = 200
hidden_size = 512
n_layers = 1
lr = 0.005

batch_size = 16

decoder = RNN(V, hidden_size, V, n_layers)
decoder_optimizer = torch.optim.Adam(decoder.parameters(), lr=lr)
criterion = nn.CrossEntropyLoss()

start = time.time()
all_losses = []
loss_avg = 0

for epoch in range(1, n_epochs + 1):
    itr = 1
    for x, y_true in get_batch(chars, char_to_index, V, chunk_len, batch_size):
        loss = train(x, y_true)       
        loss_avg += loss

        if itr % plot_every == 0:
            loss = loss_avg / plot_every
            all_losses.append(loss)
            loss_avg = 0
            print(f"Iteration: {itr} Loss: {loss:.6f}")
    
        itr += 1
        
    if epoch % print_every == 0:
        print("\n--- Generating sample ---\n")
        print('[%s (%d %d%%) %.4f]' % (time_since(start), epoch, epoch / n_epochs * 100, loss))
        char = index_to_char[np.random.randint(V)]
        print(evaluate(char, 250))
        print()

In [99]:
print(evaluate('S', 1000, 0.5))

Sometime on the people.

SICINIUS:
The discorth as the world.

CORIOLANUS:
What we call'd the rest of him off that
The gods and powers at the world,
How did see the rather proud to the people along with me.

SICINIUS:
Speak to your honour'd and the banishmen.

SICINIUS:
And thousand to be your voices, by the world.

CORIOLANUS:
He would down would ye the people.

SICINIUS:
You shall begg'd. What o' the world
In a grown for the consent of the body.

SICINIUS:
He last it is it am as a man down.

SICINIUS:
Call place, what I then on the tongues.

SICINIUS:
Hence may be do not be to strange trage.

BRUTUS:
We'll the people, while that I pray them.

SICINIUS:
What o' the contribless of the people,
He was a day to stone of my chair.

SICINIUS:
First to be content to the people.

CORIOLANUS:
Why, no, no, and to the denited.

SICINIUS:
This way and the tongue on the dare to the people,
And to the tongues and way, or belly that he done.

BRUTUS:
The people, being the gods on
To be you to the gr