In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import pandas as pd
import numpy as np
import torch
import pdb

from pathlib import Path
from torch import nn
from torch.nn import functional as F
from torch import optim
from torch.utils.data import DataLoader
from torch.utils.data import Dataset

from yelp.dataset import ProjectDataset

In [3]:
from ignite.engine import Events, create_supervised_trainer, create_supervised_evaluator
from ignite.metrics import Accuracy, Loss
from ignite.contrib.handlers import ProgressBar

In [4]:
def set_all_seed(seed, cuda):
    np.random.seed(seed)
    torch.manual_seed(seed)
    if cuda:
        torch.cuda.manual_seed(seed)

In [5]:
path = Path('./data/yelp')
review_csv = path/'reviews_with_splits_lite.csv'
scratch = path/'scratch'
vectorizer_path = scratch/'vectorizer.json'

In [6]:
# dataset = ProjectDataset.load_data_and_create_vectorizer(review_csv)
# dataset.save_vectorizer(vectorizer_path)

In [7]:
dataset = ProjectDataset.load_data_and_vectorizer(review_csv, vectorizer_path)
vectorizer = dataset.get_vectorizer()

dataset.set_split('train')
train_dl = DataLoader(dataset, batch_size=64)
# dataset.set_split('val')
# val_dl = DataLoader(dataset, batch_size=64)

In [8]:
class ReviewClassifier(nn.Module):
    def __init__(self, num_features):
        super(ReviewClassifier, self).__init__()
        self.fc1 = nn.Linear(in_features=num_features, out_features=1)
        
    def forward(self, x_in, apply_sigmoid=False):
        y_out = self.fc1(x_in).squeeze(1)
        if apply_sigmoid:
            y_out = torch.sigmoid(y_out)
        return y_out

In [25]:
classifier = ReviewClassifier(num_features=len(vectorizer.review_vocab))
optimizer = optim.Adam(classifier.parameters(), lr=0.001)
loss_func = nn.BCEWithLogitsLoss()

In [26]:
trainer = create_supervised_trainer(classifier, optimizer, loss_func, device='cuda:3')
# evaluator = create_supervised_evaluator(classifier, metrics={'accuracy': Accuracy(),'bce': Loss(loss_func)}, device='cuda:3')

In [27]:
pbar = ProgressBar(persist=True)
pbar.attach(trainer)

In [28]:
@trainer.on(Events.EPOCH_COMPLETED)
def log_training_results(engine):
    output = engine.state.output
    pbar.log_message(f'Training Results - Epoch: {engine.state.epoch} Loss: {output:0.2f}')

In [29]:
trainer.run(train_dl, max_epochs=2)

Epoch [1/2]: [613/613] 100%|██████████ [00:07<00:00]
Epoch [2/2]: [14/613]   2%|▏          [00:00<00:06]

Training Results - Epoch: 1 Loss: 0.06


Epoch [2/2]: [613/613] 100%|██████████ [00:07<00:00]

Training Results - Epoch: 2 Loss: 0.05





<ignite.engine.engine.State at 0x7fbf97618748>