In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import pandas as pd
import numpy as np
import torch
import pdb

from pathlib import Path
from torch import nn
from torch.nn import functional as F
from torch import optim
from torch.utils.data import DataLoader
from torch.utils.data import Dataset

In [3]:
from ignite.engine import Events, create_supervised_trainer, create_supervised_evaluator
from ignite.metrics import Accuracy, Loss, RunningAverage
from ignite.handlers import ModelCheckpoint, EarlyStopping
from ignite.contrib.handlers import ProgressBar

In [4]:
from yelp.dataset import ProjectDataset

In [5]:
def set_all_seed(seed, cuda):
  np.random.seed(seed)
  torch.manual_seed(seed)
  if cuda:
    torch.cuda.manual_seed(seed)

In [6]:
path = Path('./data/yelp')
review_csv = path/'reviews_with_splits_lite.csv'
scratch = path/'scratch'
vectorizer_path = scratch/'vectorizer.json'

df = pd.read_csv(review_csv)

In [7]:
# train_ds = ProjectDataset.load_data_and_create_vectorizer(df.loc[df['split'] == 'train'])
# train_ds.save_vectorizer(vectorizer_path)

In [8]:
train_df = df.loc[df['split'] == 'train']
train_ds = ProjectDataset.load_data_and_vectorizer(train_df, vectorizer_path)
vectorizer = train_ds.get_vectorizer()
train_dl = DataLoader(train_ds, batch_size=128, shuffle=True, drop_last=True)

val_df = df.loc[df['split'] == 'val']
val_ds = ProjectDataset.load_data_and_vectorizer(val_df, vectorizer_path)
val_dl = DataLoader(val_ds, batch_size=128, shuffle=True, drop_last=True)

In [17]:
class ReviewClassifier(nn.Module):
  def __init__(self, num_features):
    super(ReviewClassifier, self).__init__()
    self.fc1 = nn.Linear(in_features=num_features, out_features=1)
    
  def forward(self, x_in, apply_sigmoid=False):
    y_out = self.fc1(x_in).squeeze(1)
    if apply_sigmoid:
      y_out = torch.sigmoid(y_out)
    return y_out

In [18]:
classifier = ReviewClassifier(num_features=len((vectorizer).review_vocab))
optimizer = optim.Adam(classifier.parameters(), lr=0.001)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer=optimizer, mode='min', factor=0.5, patience=1)
loss_func = nn.BCEWithLogitsLoss()

In [19]:
def bce_logits_wrapper(output):
    y_pred, y = output
    y_pred = (torch.sigmoid(y_pred) > 0.5).long()
    return y_pred, y

In [26]:
trainer = create_supervised_trainer(classifier, optimizer, loss_func, device='cuda:3')
evaluator = create_supervised_evaluator(classifier, metrics=\
                                        {'accuracy':Accuracy(bce_logits_wrapper),\
                                         'bce': Loss(loss_func)}, device='cuda:3')

In [27]:
RunningAverage(output_transform=lambda x: x).attach(trainer, 'loss')

def score_function(engine):
  val_loss = engine.state.metrics['bce']
  return -val_loss

handler = EarlyStopping(patience=10, score_function=score_function, trainer=trainer)
evaluator.add_event_handler(Events.COMPLETED, handler)

In [28]:
pbar = ProgressBar(persist=True)
pbar.attach(trainer, ['loss'])

In [29]:
@evaluator.on(Events.COMPLETED)
def scheduler_step(engine):
  print(engine.metrics)
  
@trainer.on(Events.EPOCH_COMPLETED)
def log_training_results(engine):
  evaluator.run(train_dl)
  metrics = evaluator.state.metrics
  pbar.log_message(f"Training Results - Epoch: {engine.state.epoch}\
                    Avg accuracy: {metrics['accuracy']:0.2f}\
                    Avg loss: {metrics['bce']:0.2f}")
                   
@trainer.on(Events.EPOCH_COMPLETED)
def log_validation_results(engine):
  evaluator.run(val_dl)
  metrics = evaluator.state.metrics
  pbar.log_message(f"Validation Results - Epoch: {engine.state.epoch}\
                    Avg accuracy: {metrics['accuracy']:0.2f}\
                    Avg loss: {metrics['bce']:0.2f}")

In [30]:
trainer.run(train_dl, max_epochs=2)

Epoch [1/2]: [306/306] 100%|██████████, loss=2.65e-01 [00:06<00:00]


AttributeError: 'Engine' object has no attribute 'metrics'