In [1]:
import torch
import Models.pytorch_joy_and_anger.joy_and_anger_utils as model_utils

In [2]:
train_ds = model_utils.HappyClassifierDataset()

loaded 7520 items
{'joy': 0.0, 'anger': 1.0}


In [3]:
from torchtext.data.utils import get_tokenizer
from torchtext.vocab import build_vocab_from_iterator

In [4]:
tokenizer = get_tokenizer('basic_english')

In [5]:
vocab = build_vocab_from_iterator(list(map(lambda k: tokenizer(k), [txt for txt, label in train_ds.train_data])), specials=["<unk>"])

In [6]:
vocab.set_default_index(vocab["<unk>"])

In [7]:
vocab(['great', 'day', "we're", 'having'])

[353, 96, 0, 171]

In [8]:
# idx 2 has issues

In [9]:
text_pipeline = lambda x: vocab(tokenizer(x))

In [16]:
from torch.utils.data import DataLoader
device = torch.device("cpu")

def collate_batch(batch):
    text_list, label_list, offsets = [], [], [0]
    for text, label in batch:
        processed_text = torch.tensor(text_pipeline(text), dtype=torch.int64)
        text_list.append(processed_text)
        label_list.append(label)
        offsets.append(processed_text.size(0))
    # ?
    text_list = torch.cat(text_list)
    label_list = torch.tensor(label_list, dtype=torch.float32)
    offsets = torch.tensor(offsets[:-1]).cumsum(dim=0)
    return text_list.to(device), label_list.to(device), offsets.to(device)

In [17]:
from torch import nn

#TODO: Build a Backward RNN forward pass
class HappyClassifierModel(nn.Module):
    def __init__(self, vocab_size, embed_dim, num_class):
        super(HappyClassifierModel, self).__init__()
        self.embedding = nn.EmbeddingBag(vocab_size, embed_dim, sparse=True)
        self.fc = nn.Linear(embed_dim, num_class)
        self.init_weights()

    def init_weights(self):
        initrange = 0.2
        self.embedding.weight.data.uniform_(-initrange, initrange)
        self.fc.weight.data.uniform_(-initrange, initrange)
        self.fc.bias.data.zero_()

    def forward(self, text, offsets):
        layer = self.embedding(text, offsets)
        layer = self.fc(layer)
        return self.fc(layer)

In [18]:
num_class = 2
vocab_size = len(vocab)
emsize = 64
model = HappyClassifierModel(vocab_size, emsize, num_class)

In [19]:
from torchviz import make_dot

In [20]:
dataloader = DataLoader(train_ds, batch_size=8, shuffle=False, collate_fn=collate_batch)

In [21]:
next(iter(dataloader))

(tensor([  14, 6945,    6, 1033,    4,  378,    1,    2,  263,  553,    1,   23,
            9,  598,    1,   17,   48,   20, 4155,   18,  230,    1,    2,    7,
         4155,   99, 8483,  107,    3,  126,    6, 1028, 4230,    1,   38,    2,
            7,  604,   16,    6,  292,  448,    3,    7,    1,   39,  788,    4,
           17,   60,  725,    8, 2035, 2734,    1,   70,   12,   83,    5, 3622,
           44,    8,  174,    4,    2,  330,    1,   17, 7261, 1780,   20,    5,
          817,  269,   22,   27,    6,  830, 8744, 1620,  203,    4,  175,   44,
            4,  246,   11,    5, 1849,    8,   80,    3,   20,  101, 1428,    8,
           68, 3257,  221,  487,    6, 4244, 3485,   21,  129,    6,   65,  305,
            1,   38,   26,    2,  470, 1126,   16,   29,  247,  513,    1,    2,
          303,    3, 1755,  196,  311,  151,  166,   35,  426,  166]),
 tensor([1., 1., 0., 0., 1., 0., 0., 1.]),
 tensor([  0,  10,  14,  33,  54,  66, 108, 118]))

In [28]:
aa_batch = next(iter(dataloader))
make_dot(aa_batch, params=dict(list(model.named_parameters()))).render("not_rnn_torchviz", format="png")

'not_rnn_torchviz.png'