In [1]:
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import numpy as np


from torch.utils.data import IterableDataset, Dataset, DataLoader
from torch.utils.tensorboard import SummaryWriter
from torch.autograd import Variable
import os
batch_size = 100
hidden_size = 65
num_layers = 1
learning_rate = 0.001

In [2]:
class CharRNN(nn.Module):
    def __init__(self, input_size, output_size, hidden_size, num_layers):
        super(CharRNN, self).__init__()
        self.embedding = nn.Embedding(input_size, input_size)
        self.rnn = nn.GRU(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers)
        self.decoder = nn.Linear(hidden_size, output_size)
        self.input_size = input_size

    def forward(self, input_seq, hidden_state):
        encoded = self.embedding(input_seq)
        output, hidden_state = self.rnn(encoded, hidden_state)
        output = self.decoder(output)
        
        return output


For more simplistic dataloader:
```py
class ShakespeareData(Dataset):
    def __init__(self, fn):
        with open(fn) as f:
            _file = f.read()
        self.vocab = {ch:i for i, ch in enumerate(sorted(list(set(_file))))}
        self.length = len(_file)
        self.vocab_len = len(self.vocab)
        self.labels = torch.tensor([self.vocab[c] for c in _file]).unsqueeze(-1)
    def __getitem__(self, idx):
        return self.labels[idx]
    def __len__(self):
        return self.length 
```

In [3]:
# from itertools import cycle

class ShakespeareData(IterableDataset):
    def __init__(self, fn, batch_size):
        with open(fn) as f:
            _file = f.read()
        self.vocab = {ch:i for i, ch in enumerate(sorted(list(set(_file))))}
        self.length = len(_file)
        self.vocab_len = len(self.vocab)


        items = torch.unsqueeze(torch.tensor([self.vocab[c] for c in _file]), dim=1)
        onehot = torch.zeros(self.length, self.vocab_len)

        self.data = onehot.scatter_(1, items, 1) 
        self.batch_size = batch_size

    def process(self, data):
        ptr = np.random.randint(self.batch_size)
        inp_seq = data[ptr : self.batch_size + ptr]
        trg_seq = data[ptr + 1: self.batch_size + ptr + 1]

        ptr += self.batch_size
        yield torch.squeeze(inp_seq), torch.squeeze(trg_seq)

        if (ptr + self.batch_size + 1 > self.length):
            raise StopIteration
    
    # def get_stream(self, data):
    #     return cycle(self.process(data))

    def __iter__(self):
        return self.process(self.data)

    def __len__(self):
        return self.length 

In [4]:
def i_to_char(i):
    _i = (i == 1).nonzero(as_tuple=True)[0]
    lst = list(vocab.values())
    return list(vocab.keys())[lst.index(_i.item())]


train_data= ShakespeareData("input.txt",batch_size)
trainloader = DataLoader(train_data, batch_size=batch_size)
vocab = train_data.vocab
vocab_size = train_data.vocab_len

seq, trg = next(iter(trainloader))
print(seq.size())
for i in seq[0]:
    print(i_to_char(i), end='')

print('\n== target ==')
for i in trg[0]:
    print(i_to_char(i), end='')

torch.Size([1, 100, 65])
 speak.

All:
Speak, speak.

First Citizen:
You are all resolved rather to die than to famish?

All:
== target ==
speak.

All:
Speak, speak.

First Citizen:
You are all resolved rather to die than to famish?

All:


In [13]:
from tqdm import tqdm
import ModelTrainer
def train(net,optimizer, epochs, print_every):
    loss_tot = []
    pp_tot = []
    running_loss = 0.0
    
    for epoch in tqdm(range(1, epochs + 1)):
        for i, data in enumerate(trainloader):
            _input, _target = data
            loss, p = ModelTrainer.trainRNN(net, _input,_target ,batch_size ,optimizer)
            running_loss += loss.item()
            if(i % print_every == 0):
                loss_tot.append(loss.item())
                pp_tot.append(p)
                print(f"average loss={running_loss/print_every}")
                running_loss = 0.0
    return loss_tot, pp_tot


In [14]:
net = CharRNN(vocab_size, vocab_size, hidden_size, num_layers)

optimizer = torch.optim.Adam(net.parameters(), lr=learning_rate)
train(net,optimizer, 5, 100)

  0%|          | 0/5 [00:00<?, ?it/s]


ValueError: Expected target size (1, 65), got torch.Size([1, 100, 65])