In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import pandas as pd
import numpy as np
import torch
import pdb

from pathlib import Path
from torch import nn
from torch.nn import functional as F
from torch import optim
from torch.utils.data import DataLoader
from torch.utils.data import Dataset

In [3]:
from ignite.metrics import Accuracy, Loss
from ignite.contrib.handlers import ProgressBar

In [4]:
from yelp.dataset import ProjectDataset
from yelp.trainer import YelpTrainer
from yelp.model import Classifier
from yelp.args import args

In [5]:
args

Namespace(batch_size=128, checkpointer_name='classifier', checkpointer_prefix='yelp', device='cuda:3', early_stopping_criteria=5, frequency_cutoff=25, learning_rate=0.001, num_epochs=100, sample_file='reviews_with_splits_lite.csv', vectorizer_fname='vectorizer.json', workdir_name='scratch')

In [6]:
path = Path('./data/yelp')
review_csv = path/args.sample_file
scratch = path/args.workdir_name
vectorizer_path = scratch/args.vectorizer_fname

df = pd.read_csv(review_csv)

In [7]:
args.save_dir = scratch
args.num_epochs=2

In [8]:
# train_ds = ProjectDataset.load_data_and_create_vectorizer(df.loc[df['split'] == 'train'])
# train_ds.save_vectorizer(vectorizer_path)

In [9]:
train_df = df.loc[df['split'] == 'train']
train_ds = ProjectDataset.load_data_and_vectorizer(train_df, vectorizer_path)
vectorizer = train_ds.get_vectorizer()
train_dl = DataLoader(train_ds, batch_size=128, shuffle=True, drop_last=True)

val_df = df.loc[df['split'] == 'val']
val_ds = ProjectDataset.load_data_and_vectorizer(val_df, vectorizer_path)
val_dl = DataLoader(val_ds, batch_size=128, shuffle=True, drop_last=True)

In [10]:
classifier = Classifier(num_features=len((vectorizer).review_vocab))
optimizer = optim.Adam(classifier.parameters(), lr=args.learning_rate)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer=optimizer, mode='min', factor=0.5, patience=1)
loss_func = nn.BCEWithLogitsLoss()

In [11]:
def bce_logits_wrapper(output):
    y_pred, y = output
    y_pred = (torch.sigmoid(y_pred) > 0.5).long()
    return y_pred, y

In [12]:
pbar = ProgressBar(persist=True)
metrics = {'accuracy': Accuracy(bce_logits_wrapper), 'loss': Loss(loss_func)}

In [13]:
yelp_trainer = YelpTrainer(classifier, optimizer, loss_func, train_dl, val_dl, args, pbar, metrics)

In [14]:
yelp_trainer.run()

Epoch [1/2]: [306/306] 100%|██████████, loss=4.03e-01 [00:07<00:00]
Epoch [2/2]: [7/306]   2%|▏         , loss=3.72e-01 [00:00<00:05]

Epoch: 1
Training - Loss: 0.370, Accuracy: 0.892
Validation - Loss: 0.382, Accuracy: 0.883


Epoch [2/2]: [306/306] 100%|██████████, loss=3.11e-01 [00:07<00:00]


Epoch: 2
Training - Loss: 0.294, Accuracy: 0.912
Validation - Loss: 0.310, Accuracy: 0.901


In [None]:
# trainer = create_supervised_trainer(classifier, optimizer, loss_func, device='cuda:3')
# evaluator = create_supervised_evaluator(classifier, metrics=\
#                                         {'accuracy':Accuracy(bce_logits_wrapper),\
#                                          'bce': Loss(loss_func)}, device='cuda:3')

In [None]:
RunningAverage(output_transform=lambda x: x).attach(trainer, 'loss')
checkpointer = ModelCheckpoint(scratch, 'yelp', save_interval=1, n_saved=2, create_dir=True,\
                               save_as_state_dict=True)
trainer.add_event_handler(Events.EPOCH_COMPLETED, checkpointer, {'classifier': classifier})

In [None]:
def score_function(engine):
  val_loss = engine.state.metrics['bce']
  return -val_loss

handler = EarlyStopping(patience=10, score_function=score_function, trainer=trainer)
evaluator.add_event_handler(Events.COMPLETED, handler)

In [None]:
pbar = ProgressBar(persist=True)
pbar.attach(trainer, ['loss'])

In [None]:
@evaluator.on(Events.COMPLETED)
def scheduler_step(engine):
  scheduler.step(engine.state.metrics['bce'])
  
@trainer.on(Events.EPOCH_COMPLETED)
def log_training_results(engine):
  evaluator.run(train_dl)
  metrics = evaluator.state.metrics
  pbar.log_message(f"Training Results - Epoch: {engine.state.epoch}\
                    Avg accuracy: {metrics['accuracy']:0.2f}\
                    Avg loss: {metrics['bce']:0.2f}")
                   
@trainer.on(Events.EPOCH_COMPLETED)
def log_validation_results(engine):
  evaluator.run(val_dl)
  metrics = evaluator.state.metrics
  pbar.log_message(f"Validation Results - Epoch: {engine.state.epoch}\
                    Avg accuracy: {metrics['accuracy']:0.2f}\
                    Avg loss: {metrics['bce']:0.2f}")

In [None]:
trainer.run(train_dl, max_epochs=2)