In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import pandas as pd
import numpy as np
import torch

from pathlib import Path
from torch import nn
from torch.nn import functional as F
from torch import optim
from torch.utils.data import DataLoader
from torch.utils.data import Dataset

from yelp.dataset import ProjectDataset

In [3]:
def set_all_seed(seed, cuda):
    np.random.seed(seed)
    torch.manual_seed(seed)
    if cuda:
        torch.cuda.manual_seed(seed)

In [4]:
path = Path('./data/yelp')
review_csv = path/'reviews_with_splits_lite.csv'
scratch = path/'scratch'
vectorizer_path = scratch/'vectorizer.json'

In [5]:
# dataset = ProjectDataset.load_data_and_create_vectorizer(review_csv)
# dataset.save_vectorizer(vectorizer_path)

In [6]:
dataset = ProjectDataset.load_data_and_vectorizer(review_csv, vectorizer_path)
vectorizer = dataset.get_vectorizer()

dataset.set_split('train')
train_dl = DataLoader(dataset, batch_size=64)
# dataset.set_split('val')
# val_dl = DataLoader(dataset, batch_size=64)

In [None]:
import pdb

In [7]:
class ReviewClassifier(nn.Module):
    def __init__(self, num_features):
        super(ReviewClassifier, self).__init__()
        self.fc1 = nn.Linear(in_features=num_features, out_features=1)
        
    def forward(self, x_in, apply_sigmoid=False):
#         pdb.set_trace()
        y_out = self.fc1(x_in).squeeze(1)
        if apply_sigmoid:
            y_out = torch.sigmoid(y_out)
#         y_out.unsqueeze_(0)
        return y_out

In [8]:
classifier = ReviewClassifier(num_features=len(vectorizer.review_vocab))
optimizer = optim.Adam(classifier.parameters(), lr=0.001)
loss_func = nn.BCEWithLogitsLoss()

In [None]:
itr = iter(train_dl)

In [None]:
x, y = next(itr)
y_pred = classifier(x)
# y_pred.unsqueeze_(0)
loss = loss_func(y_pred, y)
print(loss)

In [9]:
from ignite.engine import Events, create_supervised_trainer
from ignite.metrics import Accuracy, Loss
from ignite.contrib.handlers import ProgressBar

In [10]:
trainer = create_supervised_trainer(classifier, optimizer, loss_func)

In [11]:
@trainer.on(Events.ITERATION_COMPLETED)
def log_training_loss(trainer):
    print("Epoch[{}] Loss: {:.2f}".format(trainer.state.epoch, trainer.state.output))

In [12]:
trainer.run(train_dl, max_epochs=2)

Epoch[1] Loss: 0.69
Epoch[1] Loss: 0.66
Epoch[1] Loss: 0.62
Epoch[1] Loss: 0.59
Epoch[1] Loss: 0.57
Epoch[1] Loss: 0.53
Epoch[1] Loss: 0.50
Epoch[1] Loss: 0.49
Epoch[1] Loss: 0.45
Epoch[1] Loss: 0.39
Epoch[1] Loss: 0.40
Epoch[1] Loss: 0.42
Epoch[1] Loss: 0.38
Epoch[1] Loss: 0.37
Epoch[1] Loss: 0.36
Epoch[1] Loss: 0.31
Epoch[1] Loss: 0.29
Epoch[1] Loss: 0.31
Epoch[1] Loss: 0.28
Epoch[1] Loss: 0.28
Epoch[1] Loss: 0.27
Epoch[1] Loss: 0.30
Epoch[1] Loss: 0.24
Epoch[1] Loss: 0.26
Epoch[1] Loss: 0.25
Epoch[1] Loss: 0.27
Epoch[1] Loss: 0.21
Epoch[1] Loss: 0.21
Epoch[1] Loss: 0.21
Epoch[1] Loss: 0.23
Epoch[1] Loss: 0.24
Epoch[1] Loss: 0.16
Epoch[1] Loss: 0.17
Epoch[1] Loss: 0.16
Epoch[1] Loss: 0.19
Epoch[1] Loss: 0.16
Epoch[1] Loss: 0.18
Epoch[1] Loss: 0.16
Epoch[1] Loss: 0.16
Epoch[1] Loss: 0.18
Epoch[1] Loss: 0.15
Epoch[1] Loss: 0.17
Epoch[1] Loss: 0.14
Epoch[1] Loss: 0.11
Epoch[1] Loss: 0.16
Epoch[1] Loss: 0.17
Epoch[1] Loss: 0.15
Epoch[1] Loss: 0.14
Epoch[1] Loss: 0.14
Epoch[1] Loss: 0.14


Epoch[1] Loss: 0.10
Epoch[1] Loss: 0.09
Epoch[1] Loss: 0.10
Epoch[1] Loss: 0.13
Epoch[1] Loss: 0.12
Epoch[1] Loss: 0.09
Epoch[1] Loss: 0.11
Epoch[1] Loss: 0.10
Epoch[1] Loss: 0.12
Epoch[1] Loss: 0.14
Epoch[1] Loss: 0.12
Epoch[1] Loss: 0.10
Epoch[1] Loss: 0.16
Epoch[1] Loss: 0.11
Epoch[1] Loss: 0.12
Epoch[1] Loss: 0.08
Epoch[1] Loss: 0.10
Epoch[1] Loss: 0.15
Epoch[1] Loss: 0.12
Epoch[1] Loss: 0.11
Epoch[1] Loss: 0.09
Epoch[1] Loss: 0.10
Epoch[1] Loss: 0.08
Epoch[1] Loss: 0.12
Epoch[1] Loss: 0.12
Epoch[1] Loss: 0.11
Epoch[1] Loss: 0.10
Epoch[1] Loss: 0.10
Epoch[1] Loss: 0.07
Epoch[1] Loss: 0.11
Epoch[1] Loss: 0.09
Epoch[1] Loss: 0.12
Epoch[1] Loss: 0.10
Epoch[1] Loss: 0.09
Epoch[1] Loss: 0.09
Epoch[1] Loss: 0.10
Epoch[1] Loss: 0.07
Epoch[1] Loss: 0.10
Epoch[1] Loss: 0.10
Epoch[1] Loss: 0.10
Epoch[1] Loss: 0.09
Epoch[1] Loss: 0.11
Epoch[1] Loss: 0.11
Epoch[1] Loss: 0.09
Epoch[1] Loss: 0.11
Epoch[1] Loss: 0.09
Epoch[1] Loss: 0.09
Epoch[1] Loss: 0.10
Epoch[1] Loss: 0.12
Epoch[1] Loss: 0.09


Epoch[2] Loss: 0.08
Epoch[2] Loss: 0.08
Epoch[2] Loss: 0.06
Epoch[2] Loss: 0.06
Epoch[2] Loss: 0.05
Epoch[2] Loss: 0.07
Epoch[2] Loss: 0.04
Epoch[2] Loss: 0.04
Epoch[2] Loss: 0.05
Epoch[2] Loss: 0.07
Epoch[2] Loss: 0.05
Epoch[2] Loss: 0.05
Epoch[2] Loss: 0.03
Epoch[2] Loss: 0.06
Epoch[2] Loss: 0.05
Epoch[2] Loss: 0.05
Epoch[2] Loss: 0.04
Epoch[2] Loss: 0.04
Epoch[2] Loss: 0.06
Epoch[2] Loss: 0.06
Epoch[2] Loss: 0.05
Epoch[2] Loss: 0.04
Epoch[2] Loss: 0.05
Epoch[2] Loss: 0.07
Epoch[2] Loss: 0.05
Epoch[2] Loss: 0.05
Epoch[2] Loss: 0.06
Epoch[2] Loss: 0.07
Epoch[2] Loss: 0.05
Epoch[2] Loss: 0.06
Epoch[2] Loss: 0.05
Epoch[2] Loss: 0.08
Epoch[2] Loss: 0.04
Epoch[2] Loss: 0.08
Epoch[2] Loss: 0.04
Epoch[2] Loss: 0.04
Epoch[2] Loss: 0.04
Epoch[2] Loss: 0.06
Epoch[2] Loss: 0.05
Epoch[2] Loss: 0.07
Epoch[2] Loss: 0.05
Epoch[2] Loss: 0.06
Epoch[2] Loss: 0.04
Epoch[2] Loss: 0.05
Epoch[2] Loss: 0.07
Epoch[2] Loss: 0.06
Epoch[2] Loss: 0.06
Epoch[2] Loss: 0.06
Epoch[2] Loss: 0.05
Epoch[2] Loss: 0.04


<ignite.engine.engine.State at 0x7f77f1322c18>

In [None]:
def f(dataset, batch_size):
    dataloader = DataLoader(dataset, batch_size=batch_size)
    for data_dict in dataloader:
        out_data_dict = {}
        for name, tensor in data_dict.items():
            out_data_dict[name] = data_dict[name]
        yield out_data_dict

In [None]:
d = f(dataset, 16)

In [None]:
itr = iter(d)

In [None]:
out = next(itr)

In [None]:
out.keys()

In [None]:
itr = iter(dataloader)

In [None]:
out2 = next(itr)
x, y = out['x_data'], out['y_target']
x,y

In [None]:
out

In [None]:
out2['x_data']

In [None]:
out['x_data'] == out2['x_data']

In [None]:
torch.all(out['x_data'] == out2['x_data'])

In [None]:
out2['y_target']

In [None]:
out['y_target']