<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Imports-&amp;-Inits" data-toc-modified-id="Imports-&amp;-Inits-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Imports &amp; Inits</a></span></li><li><span><a href="#Data-&amp;-Model" data-toc-modified-id="Data-&amp;-Model-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Data &amp; Model</a></span></li><li><span><a href="#Training" data-toc-modified-id="Training-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>Training</a></span></li><li><span><a href="#Testing" data-toc-modified-id="Testing-4"><span class="toc-item-num">4&nbsp;&nbsp;</span>Testing</a></span><ul class="toc-item"><li><span><a href="#Ignite-Testing" data-toc-modified-id="Ignite-Testing-4.1"><span class="toc-item-num">4.1&nbsp;&nbsp;</span>Ignite Testing</a></span></li><li><span><a href="#NLPBook-Testing" data-toc-modified-id="NLPBook-Testing-4.2"><span class="toc-item-num">4.2&nbsp;&nbsp;</span>NLPBook Testing</a></span></li></ul></li></ul></div>

# CBOW Training with Frankenstein Text

## Imports & Inits

In [None]:
%load_ext autoreload
%autoreload 2

In [1]:
import pandas as pd
import torch

from torch import nn
from torch import optim
from torch.utils.data import DataLoader

In [2]:
from ignite.engine import Events, create_supervised_evaluator
from ignite.metrics import Accuracy, Loss
from ignite.contrib.handlers import ProgressBar

In [4]:
from consts import consts
from cbow.dataset import CBOWDataset, DataContainer
from cbow.model import CBOWClassifier, ModelContainer
from cbow.trainer import IgniteTrainer
consts

Namespace(batch_size=1024, checkpointer_name='classifier', checkpointer_prefix='cbow', device='cuda:3', early_stopping_criteria=5, embedding_size=100, learning_rate=0.0001, metric_file=PosixPath('../data/books/work_dir/metrics.csv'), model_dir=PosixPath('../data/books/work_dir/models'), num_epochs=100, path=PosixPath('../data/books'), proc_dataset_csv=PosixPath('../data/books/frankenstein_with_splits.csv'), save_every=2, save_total=5, vectorizer_file=PosixPath('../data/books/work_dir/vectorizer.json'), work_dir=PosixPath('../data/books/work_dir'))

## Data & Model

In [5]:
df = pd.read_csv(consts.proc_dataset_csv)
dc = DataContainer(df, consts.vectorizer_file, consts.batch_size, is_load=True)

classifier = CBOWClassifier(dc.vocabulary_size, consts.embedding_size)
loss_func = nn.CrossEntropyLoss()
optimizer = optim.Adam(classifier.parameters(), lr=consts.learning_rate)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer=optimizer, mode='min', factor=0.5, patience=1)

mc = ModelContainer(classifier, optimizer, loss_func, scheduler)

pbar = ProgressBar(persist=True)
metrics = {'accuracy': Accuracy(), 'loss': Loss(loss_func)}

## Training

In [None]:
ig = IgniteTrainer(mc, dc, consts, pbar, metrics)
ig.run()

## Testing

### Ignite Testing

In [6]:
state_dict = torch.load(consts.work_dir/'cbow_classifier1.pth')
classifier.load_state_dict(state_dict)

In [8]:
evaluator = create_supervised_evaluator(classifier, metrics=metrics)

@evaluator.on(Events.COMPLETED)
def log_testing_results(engine):
  metrics = engine.state.metrics
  print(f"Test loss: {metrics['loss']:0.3f}")
  print(f"Test accuracy: {metrics['accuracy']:0.3f}")

In [9]:
evaluator.run(dc.test_dl)

Test loss: 6.381
Test accuracy: 0.145


<ignite.engine.engine.State at 0x7f2993a5a588>

### NLPBook Testing

In [22]:
def compute_accuracy(y_pred, y_target):
  _, y_pred_indices = y_pred.max(dim=1)
  n_correct = torch.eq(y_pred_indices, y_target).sum().item()
  return n_correct / len(y_pred_indices)

In [23]:
running_loss = 0.
running_acc = 0.

classifier.eval()
for i, batch in enumerate(dc.test_dl):
  x,y = batch
  y_pred = classifier(x_in=x)
  
  loss = loss_func(y_pred, y)
  loss_t = loss.item()
  running_loss += (loss_t-running_loss)/(i+1)
  
  acc_t = compute_accuracy(y_pred, y)
  running_acc += (acc_t-running_acc)/(i+1)

In [24]:
print(f"Test loss: {running_loss:0.3f}")
print(f"Test acc: {running_acc:0.3f}")

Test loss: 6.384
Test acc: 0.145
