 If you use Google Colab, PyTorch and SciPy are already installed, so you probably just want to install PyTorch Lightning.
  - [PyTorch](https://pytorch.org/get-started/locally/) (tested with 1.10.1 and with 1.10.0)
  - [PyTorch Lightning](https://pypi.org/project/pytorch-lightning/) (test with 1.5.8)
  - [SciPy](https://scipy.org/install/) (tested with 1.7.3 and with 1.4.1)


In [2]:
!pip install pytorch-lightning==1.5.8

In [3]:
!pip install gdown
!gdown --id 1thWkUj7uGOApr_dXRvMr9TsEHpo_H_2q -O sst2.zip
!mkdir -p data
!unzip sst2.zip -d data
!rm sst2.zip

In [4]:
def print_important_weights(weights, words):
    """
    Print importtatn pairs of weights and words.
    # Parameters
    weights : `Iterable`, required.
        Weights from a learned model.
    words : `Iterable`, required.
        Word types of the vocabulary.  
        It must be true that `len(weights) == len(words)`.
    # Returns
        `None`
    """

    def print_pairs(pairs):
        for weight, word in pairs:
            print("{: .4f} | {}".format(weight, word))

    assert len(weights) == len(words)
    pairs = list(zip(weights, words))
    pairs = sorted(pairs, key=lambda x: x[0], reverse=True)
    print("Most positive words:")
    print_pairs(pairs[:10])
    print("\nMost negative words:")
    print_pairs(reversed(pairs[-10:]))

    pairs = list(zip(abs(weights), words))
    pairs = sorted(pairs, key=lambda x: x[0], reverse=False)
    print("\nMost neutral words:")
    print_pairs(pairs[:10])


 ## Torch Logistic Regression Model

In [5]:
import argparse
from argparse import ArgumentParser
from datetime import datetime
import json
import logging
from pathlib import Path
import shutil
from typing import Dict, List, Tuple, Type

import numpy as np
import pytorch_lightning as pl
from pytorch_lightning import loggers as pl_loggers
from scipy import sparse
import torch
from torch import nn
from torch.utils.data.dataloader import DataLoader
from torch.utils.data.dataset import Dataset
from torchmetrics import Accuracy

class LogisticRegressionModel(nn.Module):
    """
    Logistic regression binary classification model
    """

    def __init__(self, num_features):
        """
        # Parameters
        num_features : `int`, required.
            Number of the features.
        # Returns
            `None`
        """
        super().__init__()
        self.linear = nn.Linear(num_features, 1)
        
    def forward(self, features):
        """
        Returns the logits of the model given features. 
        Note that model predictions should be either 0 or 1 based on a threshold.
        # Parameters
        features : `torch.FloatTensor`, required.
            The tensor of features with the shape (batch_size, num_of_features)
        # Returns
        probs : `torch.FloatTensor`, required.
            The tensor of probabilities with the shape (batch_size, 1) or (batch_size,)
        """
        probs = torch.sigmoid(self.linear(features))
        
        return probs


 ## Binary classifier as a Pytorch lightning module

In [6]:
class BinaryClassificationLModule(pl.LightningModule):

    def __init__(self, **kwargs):
        super().__init__()

        self.save_hyperparameters()
        data_dir = Path(self.hparams.data_dir)
        
        self.hparams.vocab = json.load(
            open(data_dir.joinpath(self.hparams.vocab_filename)))
        self.hparams.vocab_size = len(self.hparams.vocab)

        self.model = self.get_model()
        self.step_count = 0
        self.accuracy = Accuracy()

    def forward(self, *args, **kwargs):
        return self.model(*args, **kwargs)

    def training_step(self, batch, batch_idx):
        input = self.batch2input(batch)
        labels = self.batch2labels(batch)
        probs = self(**input)
        probs = probs.squeeze()
        
        criterion = torch.nn.BCEWithLogitsLoss()
        loss = criterion(probs, labels)

        self.log('train_loss', loss, prog_bar=True)
        self.log('train_acc', self.accuracy(probs, labels.int()), prog_bar=True)
        output_dict = {'loss': loss}
        return output_dict

    def validation_step(self, batch, batch_idx):
        input = self.batch2input(batch)
        labels = self.batch2labels(batch)
        probs = self(**input)
        probs = probs.squeeze()

        criterion = torch.nn.BCEWithLogitsLoss()
        loss = criterion(probs, labels)

        self.log('val_loss', loss)
        self.log('val_acc', self.accuracy(probs, labels.int()))

    def test_step(self, batch, batch_idx):
        input = self.batch2input(batch)
        labels = self.batch2labels(batch)
        probs = self(**input)
        probs = probs.squeeze()

        criterion = torch.nn.BCEWithLogitsLoss()
        loss = criterion(probs, labels)

        self.log('test_loss', loss)
        self.log('test_acc', self.accuracy(probs, labels.int()))

    def configure_optimizers(self):
        if self.hparams.optimizer == 'sgd':
            optimizer = torch.optim.SGD(self.model.parameters(),
                                        lr=self.hparams.learning_rate)
        elif self.hparams.optimizer == 'adam':
            optimizer = torch.optim.Adam(self.model.parameters(),
                                         lr=self.hparams.learning_rate)
        elif self.hparams.optimizer == 'adagrad':
            optimizer = torch.optim.Adagrad(self.model.parameters(), 
                                          lr=0.01)
        elif self.hparams.optimizer == 'rprop':
            optimizer = torch.optim.Rprop(self.model.parameters(), 
                                          lr=0.01)
        else:
            raise NotImplementedError
        return optimizer

    def train_dataloader(self):
        return self.get_dataloader('train', self.hparams.train_batch_size, shuffle=True)

    def val_dataloader(self):
        return self.get_dataloader('dev', self.hparams.eval_batch_size, shuffle=False)

    def test_dataloader(self):
        return self.get_dataloader('test', self.hparams.eval_batch_size, shuffle=False)

    def get_model(self) -> nn.Module:
        raise NotImplementedError

    def batch2input(self, batch: Tuple[torch.Tensor]) -> Dict[str, torch.Tensor]:
        raise NotImplementedError

    def batch2labels(self, batch: Tuple[torch.Tensor]) -> torch.Tensor:
        raise NotImplementedError

    def get_dataloader(self,
                       split: str,
                       batch_size: int,
                       shuffle: bool = False) -> DataLoader:
        raise NotImplementedError

    @classmethod
    def add_model_specific_args(cls, parser: ArgumentParser) -> ArgumentParser:
        """
        Add arguments to the parser and return the parser.
        See (https://pytorch-lightning.readthedocs.io/en/stable/common/hyperparameters.html)
        for usage of this method.
        """
        parser.add_argument('--vocab_filename',
                            default=None,
                            type=str,
                            required=True,
                            help="File name of the feature.")
        parser.add_argument('--optimizer',
                            default='adam',
                            type=str,
                            help="The optimizer to use, such as sgd or adam.")
        parser.add_argument('--learning_rate',
                            #default=1,
                            default=1e-3,
                            type=float,
                            help="The initial learning rate for training.")
        parser.add_argument('--max_epochs',
                            default=10,
                            type=int,
                            help="The number of epochs to train your model.")
        parser.add_argument('--train_batch_size', default=32, type=int)
        parser.add_argument('--eval_batch_size', default=32, type=int)
        parser.add_argument('--seed',
                            type=int,
                            default=42,
                            help="The random seed for initialization")
        parser.add_argument('--do_train',
                            action="store_true",
                            default=True,
                            help="Whether to run training.")
        parser.add_argument('--do_predict',
                            action="store_true",
                            help="Whether to run predictions on the test set.")
        parser.add_argument('--data_dir',
                            default="sst2",
                            type=str,
                            help="The input data dir. Should contain the training files.")
        parser.add_argument('--output_dir',
                            type=str,
                            help=("The output directory where the model predictions "
                                  "and checkpoints will be written."))
        parser.add_argument('--gpus',
                            default=0,
                            type=int,
                            help="The number of GPUs allocated for this, 0 meaning none")
        parser.add_argument('--num_workers',
                            default=8,
                            type=int,
                            help="Config `DataLoader` of pytorch")
        return parser


def generic_train(args: argparse.Namespace,
                  model_class: Type[pl.LightningModule]) -> Dict:
    """
        Train (and optionally predict) and return dict results.
        # Parameters
        args : `argparse.Namespace`, required.
            Configuration of the training and the model
        model_class : `Type[pl.LightningModule]`, required.
            Class of the model to be trained.
        # Returns
        A `dict` object containing the following keys and types.
            trainer: `pl.Trainer`
            model: `pl.LightningModule`
            val_results_best: `list[dict]`
                If `args.do_predict==True`
            test_results_best: `list[dict]`
                If `args.do_predict==True`
            best_model_path: `Path`
                Path to the checkpoint of the best model.
        """
    pl.seed_everything(args.seed)

    tensorboard_log_dir = Path(args.output_dir).joinpath('tensorboard_logs')
    tensorboard_log_dir.mkdir(parents=True, exist_ok=True)

    tensorboard_logger = pl_loggers.TensorBoardLogger(
        save_dir=tensorboard_log_dir,
        version='version_' + datetime.now().strftime('%Y%m%d-%H%M%S'),
        name='',
        default_hp_metric=True)

    checkpoint_dir = Path(args.output_dir).joinpath(tensorboard_logger.version,
                                                    'checkpoints')
    checkpoint_callback = pl.callbacks.ModelCheckpoint(dirpath=checkpoint_dir,
                                                       filename='{epoch}-{val_acc:.2f}',
                                                       monitor='val_acc',
                                                       mode='max',
                                                       save_top_k=1,
                                                       verbose=True)

    dict_args = vars(args)
    model = model_class(**dict_args)
    trainer = pl.Trainer.from_argparse_args(args,
                                            logger=tensorboard_logger,
                                            callbacks=[checkpoint_callback])

    output_dict = {'trainer': trainer, 'model': model}

    if args.do_train:
        trainer.fit(model=model)
        tensorboard_logger.log_hyperparams(
            params=model.hparams,
            metrics={'hp_metric': checkpoint_callback.best_model_score.item()})
        tensorboard_logger.save()

        best_model_path = checkpoint_dir.joinpath('best_model.ckpt')
        logger.info(f"Copy best model from {checkpoint_callback.best_model_path} "
                    f"to {best_model_path}.")
        shutil.copy(checkpoint_callback.best_model_path, best_model_path)

        output_dict.update({
            'trainer': trainer,
            'model': model,
            'best_model_path': best_model_path
        })

    if args.do_predict:
        best_model_path = checkpoint_dir.joinpath('best_model.ckpt')
        model = model.load_from_checkpoint(best_model_path)
        val_results_best = trainer.validate(model, verbose=True)
        test_results_best = trainer.test(model, verbose=True)
        print("Validation accuracy on the best model: {: .4f}".format(
            val_results_best[0]['val_acc']))
        print("Test       accuracy on the best model: {: .4f}".format(
            test_results_best[0]['test_acc']))
        output_dict.update({
            'val_results_best': val_results_best,
            'test_results_best': test_results_best,
        })

    return output_dict


 ## Binary classifier based on curated features.

In [7]:
class FeatureBasedBinaryClassificationLModule(BinaryClassificationLModule):

    def __init__(self, **kwargs):
        super().__init__(**kwargs)

    def get_model(self) -> nn.Module:
        return LogisticRegressionModel(num_features=self.hparams.vocab_size)

    def batch2input(self, batch):
        return {'features': batch[0]}

    def batch2labels(self, batch):
        return batch[1]

    def get_dataloader(self,
                       split: str,
                       batch_size: int,
                       shuffle: bool = False) -> DataLoader:
        
        data_dir = Path(self.hparams.data_dir)
        features_filepath = data_dir.joinpath(
            f"{split}_{self.hparams.feature_name}_features.npz")
        labels_filepath = data_dir.joinpath(split + "_labels.npz")
        features = sparse.load_npz(features_filepath).todense()
        labels = np.load(labels_filepath, allow_pickle=True)["arr_0"]
        dataset = torch.utils.data.TensorDataset(
            torch.from_numpy(features).float(),
            torch.from_numpy(labels).float())

        logger.info(f"Loading {split} features and labels "
                    f"from {features_filepath} and {labels_filepath}")
        data_loader = torch.utils.data.DataLoader(dataset=dataset,
                                                  batch_size=batch_size,
                                                  shuffle=shuffle,
                                                  num_workers=self.hparams.num_workers)
        return data_loader

    @classmethod
    def add_model_specific_args(cls, parser: ArgumentParser) -> ArgumentParser:
        parser = super().add_model_specific_args(parser)
        parser.add_argument('--feature_name',
                            default=None,
                            type=str,
                            required=True,
                            help="Name of the feature")
        parser.add_argument('--task',
                            default='featurebinarycls',
                            type=str,
                            help="Name of the task.")
        return parser


 ## Training loop for the feature-based model of binary logistic regression

In [8]:
logging.basicConfig(format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
                    datefmt="%Y-%m-%d %H:%M:%S",
                    level=logging.INFO)
logger = logging.getLogger(__name__)

parser = ArgumentParser()
parser = FeatureBasedBinaryClassificationLModule.add_model_specific_args(parser)

args_str = ("--vocab_filename unigram_vocab.json --feature_name unigram_binary "
            "--output_dir output/ftrlogistic --optimizer adam --do_train --do_predict ")

args = parser.parse_args(args_str.split())

if args.output_dir is None:
    args.output_dir = str(
        Path('output').joinpath(
            f"{args.task}_{datetime.now().strftime('%Y%m%d-%H%M%S')}"))
Path(args.output_dir).mkdir(parents=True, exist_ok=True)

print(f"Parsed arguments: {args}")

training_outout = generic_train(args=args,
                                model_class=FeatureBasedBinaryClassificationLModule)


Global seed set to 42
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs


Parsed arguments: Namespace(data_dir='sst2', do_predict=True, do_train=True, eval_batch_size=32, feature_name='unigram_binary', gpus=0, learning_rate=0.001, max_epochs=10, num_workers=8, optimizer='adam', output_dir='output/ftrlogistic', seed=42, task='featurebinarycls', train_batch_size=32, vocab_filename='unigram_vocab.json')



  | Name     | Type                    | Params
-----------------------------------------------------
0 | model    | LogisticRegressionModel | 5.0 K 
1 | accuracy | Accuracy                | 0     
-----------------------------------------------------
5.0 K     Trainable params
0         Non-trainable params
5.0 K     Total params
0.020     Total estimated model params size (MB)


Validation sanity check: 0it [00:00, ?it/s]

2022-02-07 01:46:40 - INFO - __main__ - Loading dev features and labels from sst2/dev_unigram_binary_features.npz and sst2/dev_labels.npz
  cpuset_checked))
Global seed set to 42
2022-02-07 01:46:42 - INFO - __main__ - Loading train features and labels from sst2/train_unigram_binary_features.npz and sst2/train_labels.npz
  cpuset_checked))


Training: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Epoch 0, global step 216: val_acc reached 0.49083 (best 0.49083), saving model to "/content/output/ftrlogistic/version_20220207-014634/checkpoints/epoch=0-val_acc=0.49.ckpt" as top 1


Validating: 0it [00:00, ?it/s]

Epoch 1, global step 433: val_acc was not in top 1


Validating: 0it [00:00, ?it/s]

Epoch 2, global step 650: val_acc reached 0.53211 (best 0.53211), saving model to "/content/output/ftrlogistic/version_20220207-014634/checkpoints/epoch=2-val_acc=0.53.ckpt" as top 1


Validating: 0it [00:00, ?it/s]

Epoch 3, global step 867: val_acc reached 0.59977 (best 0.59977), saving model to "/content/output/ftrlogistic/version_20220207-014634/checkpoints/epoch=3-val_acc=0.60.ckpt" as top 1


Validating: 0it [00:00, ?it/s]

Epoch 4, global step 1084: val_acc reached 0.66399 (best 0.66399), saving model to "/content/output/ftrlogistic/version_20220207-014634/checkpoints/epoch=4-val_acc=0.66.ckpt" as top 1


Validating: 0it [00:00, ?it/s]

Epoch 5, global step 1301: val_acc reached 0.68922 (best 0.68922), saving model to "/content/output/ftrlogistic/version_20220207-014634/checkpoints/epoch=5-val_acc=0.69.ckpt" as top 1


Validating: 0it [00:00, ?it/s]

Epoch 6, global step 1518: val_acc reached 0.71330 (best 0.71330), saving model to "/content/output/ftrlogistic/version_20220207-014634/checkpoints/epoch=6-val_acc=0.71.ckpt" as top 1


Validating: 0it [00:00, ?it/s]

Epoch 7, global step 1735: val_acc reached 0.73739 (best 0.73739), saving model to "/content/output/ftrlogistic/version_20220207-014634/checkpoints/epoch=7-val_acc=0.74.ckpt" as top 1


Validating: 0it [00:00, ?it/s]

Epoch 8, global step 1952: val_acc reached 0.74541 (best 0.74541), saving model to "/content/output/ftrlogistic/version_20220207-014634/checkpoints/epoch=8-val_acc=0.75.ckpt" as top 1


Validating: 0it [00:00, ?it/s]

Epoch 9, global step 2169: val_acc reached 0.75000 (best 0.75000), saving model to "/content/output/ftrlogistic/version_20220207-014634/checkpoints/epoch=9-val_acc=0.75.ckpt" as top 1
2022-02-07 01:47:17 - INFO - __main__ - Copy best model from /content/output/ftrlogistic/version_20220207-014634/checkpoints/epoch=9-val_acc=0.75.ckpt to output/ftrlogistic/version_20220207-014634/checkpoints/best_model.ckpt.
2022-02-07 01:47:17 - INFO - __main__ - Loading dev features and labels from sst2/dev_unigram_binary_features.npz and sst2/dev_labels.npz
  cpuset_checked))


Validating: 0it [00:00, ?it/s]

--------------------------------------------------------------------------------
DATALOADER:0 VALIDATE RESULTS
{'val_acc': 0.75, 'val_loss': 0.6399576663970947}
--------------------------------------------------------------------------------


2022-02-07 01:47:18 - INFO - __main__ - Loading test features and labels from sst2/test_unigram_binary_features.npz and sst2/test_labels.npz
  cpuset_checked))


Testing: 0it [00:00, ?it/s]

--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'test_acc': 0.7567270994186401, 'test_loss': 0.6439116597175598}
--------------------------------------------------------------------------------
Validation accuracy on the best model:  0.7500
Test       accuracy on the best model:  0.7567


 ## Feature weight analysis

In [9]:
model = training_outout['model']
best_model_path = training_outout['best_model_path']
data_dir = Path('sst2')

model = model.load_from_checkpoint(best_model_path)
weights = model.model.linear.weight.squeeze().detach().numpy()
vocab = json.load(open(data_dir.joinpath('unigram_vocab.json')))
print_important_weights(weights=weights, words=vocab.keys())


Most positive words:
 0.7245 | best
 0.7157 | powerful
 0.7087 | solid
 0.7068 | fun
 0.6341 | heart
 0.6335 | enjoyable
 0.6182 | entertaining
 0.5945 | moving
 0.5836 | portrait
 0.5788 | wonderful

Most negative words:
-0.8932 | too
-0.8373 | bad
-0.7212 | ?
-0.6349 | dull
-0.6252 | worst
-0.6169 | mess
-0.6143 | no
-0.5759 | suffers
-0.5630 | stupid
-0.5351 | plot

Most neutral words:
 0.0000 | charge
 0.0001 | storyline
 0.0001 | somehow
 0.0001 | zombie
 0.0002 | luster
 0.0002 | philosophical
 0.0003 | rob
 0.0004 | telling
 0.0005 | wallace
 0.0005 | effects


## Error Analysis

In [10]:
def error_analysis(fit_model, test_X):
    '''
    Returns predicted label output for given training and testing data
    '''

    test_X = test_X.todense()
    test_X = torch.from_numpy(test_X).float()
    
    y_pred = fit_model(test_X)
    y_pred = y_pred.detach().numpy()
    y_pred = np.where(y_pred <= 0.5, 0, y_pred) 
    y_pred = np.where(y_pred > 0.5, 1, y_pred) 
    
    return y_pred.T[0]

test_labels = np.load('sst2/test_labels.npz')
test_features = sparse.load_npz('sst2/test_unigram_binary_features.npz')
test_model = error_analysis(model.load_from_checkpoint(best_model_path), test_features)

In [11]:
import pandas as pd
from nltk.tokenize import WordPunctTokenizer

pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', -1)

def error_frame(pred_model, labels):
    '''
    Generates a pandas dataframe of mislabeled reviews
    '''
    
    tokenizer = WordPunctTokenizer()
    label_column = pd.DataFrame(labels)
    label_column.rename(columns = {0: "True"}, inplace = True)
    model_column = pd.DataFrame(pred_model)
    model_column.rename(columns = {0: "Predicted"}, inplace = True)
    
    data_test = open(data_dir.joinpath('sst2.test')).readlines()
    tokens = []
    for line in data_test:
        lower_line = line.lower()
        tokens.append(tokenizer.tokenize(lower_line))
    
    full_frame = label_column.join(model_column)
    full_frame["Match"] = (full_frame["True"] == full_frame["Predicted"]).astype(int)
    full_frame["Line"] = tokens
    full_frame.index += 1
    
    print("The number of errors found: ", len(full_frame.loc[full_frame['Match'] == 0]))
    return full_frame.loc[full_frame['Match'] == 0]
    
error_frame(test_model, test_labels['arr_0'])

  import sys


The number of errors found:  443


Unnamed: 0,True,Predicted,Match,Line
8,1,0.0,0,"[1, the, movie, exists, for, its, soccer, action, and, its, fine, acting, .]"
11,1,0.0,0,"[1, jason, x, has, cheesy, effects, and, a, hoary, plot, ,, but, its, macabre, ,, self, -, deprecating, sense, of, humor, makes, up, for, a, lot, .]"
14,1,0.0,0,"[1, though, the, violence, is, far, less, sadistic, than, usual, ,, the, film, is, typical, miike, :, fast, ,, furious, and, full, of, off, -, the, -, cuff, imaginative, flourishes, .]"
17,1,0.0,0,"[1, if, your, senses, have, n, ', t, been, dulled, by, slasher, films, and, gorefests, ,, if, you, ', re, a, connoisseur, of, psychological, horror, ,, this, is, your, ticket, .]"
20,0,1.0,0,"[0, those, who, managed, to, avoid, the, deconstructionist, theorizing, of, french, philosopher, jacques, derrida, in, college, can, now, take, an, 85, -, minute, brush, -, up, course, with, the, documentary, derrida, .]"
21,1,0.0,0,"[1, most, new, movies, have, a, bright, sheen, .]"
23,1,0.0,0,"[1, steve, irwin, ', s, method, is, ernest, hemmingway, at, accelerated, speed, and, volume, .]"
31,1,0.0,0,"[1, finally, ,, a, genre, movie, that, delivers, --, in, a, couple, of, genres, ,, no, less, .]"
36,1,0.0,0,"[1, haneke, challenges, us, to, confront, the, reality, of, sexual, aberration, .]"
37,1,0.0,0,"[1, an, experience, so, engrossing, it, is, like, being, buried, in, a, new, environment, .]"


In [12]:
def print_weight(weights, words):
    """
    Prints all word-weight pairs for given data and vocabulary
    """

    def print_pairs(pairs):
        for weight, word in pairs:
            print("{: .4f} | {}".format(weight, word))

    assert len(weights) == len(words)
    pairs = list(zip(weights, words))
    pairs = sorted(pairs, key=lambda x: x[0], reverse=True)
    print_pairs(pairs)
    

In [13]:
print_weight(weights=weights, words=vocab.keys())

 0.7245 | best
 0.7157 | powerful
 0.7087 | solid
 0.7068 | fun
 0.6341 | heart
 0.6335 | enjoyable
 0.6182 | entertaining
 0.5945 | moving
 0.5836 | portrait
 0.5788 | wonderful
 0.5593 | beautifully
 0.5571 | works
 0.5567 | fascinating
 0.5530 | always
 0.5521 | touching
 0.5501 | funny
 0.5377 | remarkable
 0.5363 | love
 0.5308 | family
 0.5289 | terrific
 0.5284 | rare
 0.5259 | beautiful
 0.5238 | perfectly
 0.5217 | worth
 0.5206 | captures
 0.5193 | refreshing
 0.5167 | documentary
 0.5083 | engrossing
 0.5055 | human
 0.5024 | hilarious
 0.5006 | inventive
 0.4992 | performances
 0.4972 | charming
 0.4939 | warm
 0.4913 | spirit
 0.4901 | thoughtful
 0.4872 | spielberg
 0.4867 | cinema
 0.4836 | manages
 0.4831 | somewhat
 0.4829 | delivers
 0.4802 | performance
 0.4766 | our
 0.4697 | fashioned
 0.4676 | unexpected
 0.4656 | definitely
 0.4643 | deeply
 0.4627 | years
 0.4597 | brings
 0.4547 | also
 0.4545 | provides
 0.4541 | unique
 0.4501 | wonderfully
 0.4482 | world
 0

 # View modeling training curves

In [23]:
%load_ext tensorboard
%tensorboard --logdir output/ftrlogistic


The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


Reusing TensorBoard on port 6007 (pid 35340), started 0:38:15 ago. (Use '!kill 35340' to kill it.)

<IPython.core.display.Javascript object>

 ## Deep Averaging Networks (DAN)

In [15]:
class DeepAveragingNetworksModel(nn.Module):

    def __init__(self,
                 vocab,
                 vocab_size: int,
                 word_embedding_size: int,
                 hidden_size: int,
                 num_intermediate_layers: int,
                 dropout_rate: float,
                 use_glove: bool = False):
        """
        # Parameters
        vocab : `dict[str, int]`, required.
            A map from the word type to the index of the word.
        vocab_size : `int`, required.
            Size of the vocabulary.
        word_embedding_size : `int`, required.
            Size of word embeddings.
        hidden_size : `int`, required.
            Size of hidden layer or number of hidden units per layer.
        num_intermediate_layers : `int`, required.
            Number of intermediate layers, the arg takes 0 or greater integers.
        dropout_rate : `float`, required.
            Dropout rate.
        use_glove : `bool`, optional.
            Whether or not to use Glove embeddings instead of randomly initialized ones.
        """
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, word_embedding_size, padding_idx=0)
        if use_glove:
            self.load_glove(vocab, word_embedding_size)

        self.linear = nn.Linear(hidden_size, hidden_size)
        m = torch.nn.Tanh()
        
        if num_intermediate_layers <= 0:
            self.int_layers = [m]
        else:
            layers = torch.nn.ModuleList([m for i in range(0, num_intermediate_layers)])
            self.int_layers = torch.nn.ModuleList(layers)
            
        self.dropout_layer = torch.nn.Dropout(dropout_rate)
        self.output_layer = torch.nn.Linear(hidden_size, 1)

    def forward(self, input_ids, lengths):
        """
        # Parameters
        input_ids : `torch.Tensor`, required.
            Tensor of shape (batch_size, feature_length).
            Each row is a datapoint represented by input words.
        lengths: `torch.Tensor`, required.
            Tensor of shape (batch_size, 1). Token length of input text.
            Used to compute average word embeddings.
        # Returns
        probs : `torch.Tensor`
            Tensor of shape (batch_size)
        """
        out = self.embedding(input_ids)
        out = torch.mean(out, 1)
        
        for index, layer in enumerate(self.int_layers):
            out = self.linear(out)
            out = layer(out)
            out = self.dropout_layer(out)
        
        return self.output_layer(out)

    def load_glove(self, vocab, word_embedding_size):
        logger.info("Load glove pretrained word embeddings")


 ## Binary classifier based on curated features.

In [16]:
from nltk.tokenize import WordPunctTokenizer

class SST2Dataset(Dataset):
    """
    Using dataset to process input text on-the-fly
    """

    def __init__(self, vocab, data, tokenizer):
        self.data = data
        self.vocab = vocab
        self.max_len = 50
        self.tokenizer = tokenizer

    def __getitem__(self, index):
        note = []
        label, text = int(self.data[index][0]), self.data[index][1]
        tokens = self.tokenizer.tokenize(text.lower())
        token_ids = [self.vocab.get(t, 1) for t in tokens]
        length = min(len(token_ids), self.max_len)
        padded_token_ids = token_ids[:50] + [0] * (self.max_len - length)
        return padded_token_ids, length, label

    def collate_fn(self, batch_data):
        padded_token_ids, lengths, labels = list(zip(*batch_data))
        return (
            torch.LongTensor(padded_token_ids).view(-1, self.max_len),
            torch.FloatTensor(lengths).view(-1, 1),
            torch.FloatTensor(labels).view(-1, 1),
        )

    def __len__(self):
        return len(self.data)


class DeepAveragingBinaryClassificationLModule(BinaryClassificationLModule):

    def __init__(self, **kwargs):
        super().__init__(**kwargs)

    def get_model(self) -> nn.Module:
        return DeepAveragingNetworksModel(
            vocab=self.hparams.vocab,
            vocab_size=self.hparams.vocab_size,
            word_embedding_size=self.hparams.word_embedding_size,
            hidden_size=self.hparams.hidden_size,
            num_intermediate_layers=self.hparams.num_intermediate_layers,
            dropout_rate=self.hparams.dropout_rate,
            use_glove=self.hparams.use_glove)

    def batch2input(self, batch):
        return {'input_ids': batch[0], 'lengths': batch[1]}

    def batch2labels(self, batch):
        return batch[2].squeeze()

    def get_dataloader(self, split, batch_size, shuffle=False) -> DataLoader:
        data_dir = Path(self.hparams.data_dir)
        datapath = data_dir.joinpath(f"sst2.{split}")
        data = open(datapath).readlines()
        data = [d.strip().split(" ", maxsplit=1) for d in data]  
        dataset = SST2Dataset(vocab=self.hparams.vocab,
                              data=data,
                              tokenizer=WordPunctTokenizer())

        logger.info(f"Loading {split} data and labels from {datapath}")
        data_loader = DataLoader(dataset=dataset,
                                 batch_size=batch_size,
                                 shuffle=shuffle,
                                 num_workers=self.hparams.num_workers,
                                 collate_fn=dataset.collate_fn)

        return data_loader

    def configure_optimizers(self):
        if self.hparams.optimizer == 'sgd':
            optimizer = torch.optim.SGD(self.model.parameters(),
                                        lr=self.hparams.learning_rate)
        elif self.hparams.optimizer == 'adam':
            optimizer = torch.optim.Adam(self.model.parameters(),
                                         lr=self.hparams.learning_rate)
        elif self.hparams.optimizer == 'adagrad':
            optimizer = torch.optim.Adagrad(self.model.parameters(),
                                         lr=self.hparams.learning_rate)
        elif self.hparams.optimizer == 'rprop':
            optimizer = torch.optim.Rprop(self.model.parameters(), 
                                          lr=0.01)
        else:
            raise NotImplementedError
        return optimizer

    @classmethod
    def add_model_specific_args(cls, parser: ArgumentParser) -> ArgumentParser:
        parser = super().add_model_specific_args(parser)

        parser.add_argument('--num_intermediate_layers',
                            type=int,
                            help="number of intermediate layers")
        parser.add_argument('--dropout_rate',
                            default=0.5,
                            type=float,
                            help="Dropout rate")
        parser.add_argument('--word_embedding_size',
                            default=300,
                            type=int,
                            help="Size of word embeddings")
        parser.add_argument('--hidden_size',
                            default=300,
                            type=int,
                            help="Size of hidden layer")
        parser.add_argument('--use_glove',
                            action="store_true",
                            help="Whether to run predictions on the test set.")
        parser.add_argument('--task',
                            default='danbinarycls',
                            type=str,
                            help="Name of the task.")
        return parser



 ## Training loop for the feature-based model of deep averaging networks.

In [17]:
logging.basicConfig(format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
                    datefmt="%Y-%m-%d %H:%M:%S",
                    level=logging.INFO)
logger = logging.getLogger(__name__)

parser = ArgumentParser()
parser = DeepAveragingBinaryClassificationLModule.add_model_specific_args(parser)

args_str = ("--vocab_filename unigram_vocab.json "
            "--optimizer sgd --num_intermediate_layers 1 "
            "--output_dir output/dan  --do_train --do_predict ")
args = parser.parse_args(args_str.split())

if args.output_dir is None:
    args.output_dir = str(
        Path('output').joinpath(
            f"{args.task}_{datetime.now().strftime('%Y%m%d-%H%M%S')}"))
Path(args.output_dir).mkdir(parents=True, exist_ok=True)

print(f"Parsed arguments: {args}")

training_outout = generic_train(args=args,
                                model_class=DeepAveragingBinaryClassificationLModule)


Global seed set to 42
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs

  | Name     | Type                       | Params
--------------------------------------------------------
0 | model    | DeepAveragingNetworksModel | 1.6 M 
1 | accuracy | Accuracy                   | 0     
--------------------------------------------------------
1.6 M     Trainable params
0         Non-trainable params
1.6 M     Total params
6.301     Total estimated model params size (MB)


Parsed arguments: Namespace(data_dir='sst2', do_predict=True, do_train=True, dropout_rate=0.5, eval_batch_size=32, gpus=0, hidden_size=300, learning_rate=0.001, max_epochs=10, num_intermediate_layers=1, num_workers=8, optimizer='sgd', output_dir='output/dan', seed=42, task='danbinarycls', train_batch_size=32, use_glove=False, vocab_filename='unigram_vocab.json', word_embedding_size=300)


Validation sanity check: 0it [00:00, ?it/s]

2022-02-07 01:47:21 - INFO - __main__ - Loading dev data and labels from sst2/sst2.dev
  cpuset_checked))
Global seed set to 42
2022-02-07 01:47:22 - INFO - __main__ - Loading train data and labels from sst2/sst2.train


Training: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Epoch 0, global step 216: val_acc reached 0.49083 (best 0.49083), saving model to "/content/output/dan/version_20220207-014721/checkpoints/epoch=0-val_acc=0.49.ckpt" as top 1


Validating: 0it [00:00, ?it/s]

Epoch 1, global step 433: val_acc was not in top 1


Validating: 0it [00:00, ?it/s]

Epoch 2, global step 650: val_acc was not in top 1


Validating: 0it [00:00, ?it/s]

Epoch 3, global step 867: val_acc was not in top 1


Validating: 0it [00:00, ?it/s]

Epoch 4, global step 1084: val_acc was not in top 1


Validating: 0it [00:00, ?it/s]

Epoch 5, global step 1301: val_acc was not in top 1


Validating: 0it [00:00, ?it/s]

Epoch 6, global step 1518: val_acc was not in top 1


Validating: 0it [00:00, ?it/s]

Epoch 7, global step 1735: val_acc was not in top 1


Validating: 0it [00:00, ?it/s]

Epoch 8, global step 1952: val_acc was not in top 1


Validating: 0it [00:00, ?it/s]

Epoch 9, global step 2169: val_acc was not in top 1
2022-02-07 01:48:16 - INFO - __main__ - Copy best model from /content/output/dan/version_20220207-014721/checkpoints/epoch=0-val_acc=0.49.ckpt to output/dan/version_20220207-014721/checkpoints/best_model.ckpt.
2022-02-07 01:48:16 - INFO - __main__ - Loading dev data and labels from sst2/sst2.dev


Validating: 0it [00:00, ?it/s]

2022-02-07 01:48:17 - INFO - __main__ - Loading test data and labels from sst2/sst2.test


--------------------------------------------------------------------------------
DATALOADER:0 VALIDATE RESULTS
{'val_acc': 0.49082568287849426, 'val_loss': 0.6934987902641296}
--------------------------------------------------------------------------------


Testing: 0it [00:00, ?it/s]

--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'test_acc': 0.5008237361907959, 'test_loss': 0.6940311789512634}
--------------------------------------------------------------------------------
Validation accuracy on the best model:  0.4908
Test       accuracy on the best model:  0.5008


 Another example with Adam optimizer and 2 hidden layers

In [18]:
parser = ArgumentParser()
parser = DeepAveragingBinaryClassificationLModule.add_model_specific_args(parser)

args_str = ("--vocab_filename unigram_vocab.json "
            "--optimizer adam --num_intermediate_layers 2 "
            "--output_dir output/dan  --do_train --do_predict ")
args = parser.parse_args(args_str.split())

if args.output_dir is None:
    args.output_dir = str(
        Path('output').joinpath(
            f"{args.task}_{datetime.now().strftime('%Y%m%d-%H%M%S')}"))
Path(args.output_dir).mkdir(parents=True, exist_ok=True)

print(f"Parsed arguments: {args}")

training_outout = generic_train(args=args,
                                model_class=DeepAveragingBinaryClassificationLModule)


Global seed set to 42
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs

  | Name     | Type                       | Params
--------------------------------------------------------
0 | model    | DeepAveragingNetworksModel | 1.6 M 
1 | accuracy | Accuracy                   | 0     
--------------------------------------------------------
1.6 M     Trainable params
0         Non-trainable params
1.6 M     Total params
6.301     Total estimated model params size (MB)


Parsed arguments: Namespace(data_dir='sst2', do_predict=True, do_train=True, dropout_rate=0.5, eval_batch_size=32, gpus=0, hidden_size=300, learning_rate=0.001, max_epochs=10, num_intermediate_layers=2, num_workers=8, optimizer='adam', output_dir='output/dan', seed=42, task='danbinarycls', train_batch_size=32, use_glove=False, vocab_filename='unigram_vocab.json', word_embedding_size=300)


Validation sanity check: 0it [00:00, ?it/s]

2022-02-07 01:48:18 - INFO - __main__ - Loading dev data and labels from sst2/sst2.dev
  cpuset_checked))
Global seed set to 42
2022-02-07 01:48:19 - INFO - __main__ - Loading train data and labels from sst2/sst2.train


Training: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Epoch 0, global step 216: val_acc reached 0.66972 (best 0.66972), saving model to "/content/output/dan/version_20220207-014818/checkpoints/epoch=0-val_acc=0.67.ckpt" as top 1


Validating: 0it [00:00, ?it/s]

Epoch 1, global step 433: val_acc reached 0.73624 (best 0.73624), saving model to "/content/output/dan/version_20220207-014818/checkpoints/epoch=1-val_acc=0.74.ckpt" as top 1


Validating: 0it [00:00, ?it/s]

Epoch 2, global step 650: val_acc reached 0.76376 (best 0.76376), saving model to "/content/output/dan/version_20220207-014818/checkpoints/epoch=2-val_acc=0.76.ckpt" as top 1


Validating: 0it [00:00, ?it/s]

Epoch 3, global step 867: val_acc reached 0.76491 (best 0.76491), saving model to "/content/output/dan/version_20220207-014818/checkpoints/epoch=3-val_acc=0.76.ckpt" as top 1


Validating: 0it [00:00, ?it/s]

Epoch 4, global step 1084: val_acc reached 0.76950 (best 0.76950), saving model to "/content/output/dan/version_20220207-014818/checkpoints/epoch=4-val_acc=0.77.ckpt" as top 1


Validating: 0it [00:00, ?it/s]

Epoch 5, global step 1301: val_acc was not in top 1


Validating: 0it [00:00, ?it/s]

Epoch 6, global step 1518: val_acc reached 0.77294 (best 0.77294), saving model to "/content/output/dan/version_20220207-014818/checkpoints/epoch=6-val_acc=0.77.ckpt" as top 1


Validating: 0it [00:00, ?it/s]

Epoch 7, global step 1735: val_acc was not in top 1


Validating: 0it [00:00, ?it/s]

Epoch 8, global step 1952: val_acc was not in top 1


Validating: 0it [00:00, ?it/s]

Epoch 9, global step 2169: val_acc was not in top 1
2022-02-07 01:49:34 - INFO - __main__ - Copy best model from /content/output/dan/version_20220207-014818/checkpoints/epoch=6-val_acc=0.77.ckpt to output/dan/version_20220207-014818/checkpoints/best_model.ckpt.
2022-02-07 01:49:34 - INFO - __main__ - Loading dev data and labels from sst2/sst2.dev


Validating: 0it [00:00, ?it/s]

2022-02-07 01:49:35 - INFO - __main__ - Loading test data and labels from sst2/sst2.test


--------------------------------------------------------------------------------
DATALOADER:0 VALIDATE RESULTS
{'val_acc': 0.7729358077049255, 'val_loss': 0.6977782845497131}
--------------------------------------------------------------------------------


Testing: 0it [00:00, ?it/s]

--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'test_acc': 0.7841845154762268, 'test_loss': 0.6381437182426453}
--------------------------------------------------------------------------------
Validation accuracy on the best model:  0.7729
Test       accuracy on the best model:  0.7842


Example with Adagrad optimizer and 2 hidden layers (lr default = 1e-2)

In [19]:
parser = ArgumentParser()
parser = DeepAveragingBinaryClassificationLModule.add_model_specific_args(parser)

args_str = ("--vocab_filename unigram_vocab.json "
            "--optimizer adagrad --num_intermediate_layers 2 "
            "--output_dir output/dan  --do_train --do_predict ")
args = parser.parse_args(args_str.split())

if args.output_dir is None:
    args.output_dir = str(
        Path('output').joinpath(
            f"{args.task}_{datetime.now().strftime('%Y%m%d-%H%M%S')}"))
Path(args.output_dir).mkdir(parents=True, exist_ok=True)

print(f"Parsed arguments: {args}")

training_outout = generic_train(args=args,
                                model_class=DeepAveragingBinaryClassificationLModule)

Global seed set to 42
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs

  | Name     | Type                       | Params
--------------------------------------------------------
0 | model    | DeepAveragingNetworksModel | 1.6 M 
1 | accuracy | Accuracy                   | 0     
--------------------------------------------------------
1.6 M     Trainable params
0         Non-trainable params
1.6 M     Total params
6.301     Total estimated model params size (MB)


Parsed arguments: Namespace(data_dir='sst2', do_predict=True, do_train=True, dropout_rate=0.5, eval_batch_size=32, gpus=0, hidden_size=300, learning_rate=0.001, max_epochs=10, num_intermediate_layers=2, num_workers=8, optimizer='adagrad', output_dir='output/dan', seed=42, task='danbinarycls', train_batch_size=32, use_glove=False, vocab_filename='unigram_vocab.json', word_embedding_size=300)


Validation sanity check: 0it [00:00, ?it/s]

2022-02-07 01:49:36 - INFO - __main__ - Loading dev data and labels from sst2/sst2.dev
  cpuset_checked))
Global seed set to 42
2022-02-07 01:49:37 - INFO - __main__ - Loading train data and labels from sst2/sst2.train


Training: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Epoch 0, global step 216: val_acc reached 0.49312 (best 0.49312), saving model to "/content/output/dan/version_20220207-014936/checkpoints/epoch=0-val_acc=0.49.ckpt" as top 1


Validating: 0it [00:00, ?it/s]

Epoch 1, global step 433: val_acc reached 0.50573 (best 0.50573), saving model to "/content/output/dan/version_20220207-014936/checkpoints/epoch=1-val_acc=0.51.ckpt" as top 1


Validating: 0it [00:00, ?it/s]

Epoch 2, global step 650: val_acc reached 0.51835 (best 0.51835), saving model to "/content/output/dan/version_20220207-014936/checkpoints/epoch=2-val_acc=0.52.ckpt" as top 1


Validating: 0it [00:00, ?it/s]

Epoch 3, global step 867: val_acc reached 0.54243 (best 0.54243), saving model to "/content/output/dan/version_20220207-014936/checkpoints/epoch=3-val_acc=0.54.ckpt" as top 1


Validating: 0it [00:00, ?it/s]

Epoch 4, global step 1084: val_acc reached 0.55161 (best 0.55161), saving model to "/content/output/dan/version_20220207-014936/checkpoints/epoch=4-val_acc=0.55.ckpt" as top 1


Validating: 0it [00:00, ?it/s]

Epoch 5, global step 1301: val_acc reached 0.56307 (best 0.56307), saving model to "/content/output/dan/version_20220207-014936/checkpoints/epoch=5-val_acc=0.56.ckpt" as top 1


Validating: 0it [00:00, ?it/s]

Epoch 6, global step 1518: val_acc reached 0.58486 (best 0.58486), saving model to "/content/output/dan/version_20220207-014936/checkpoints/epoch=6-val_acc=0.58.ckpt" as top 1


Validating: 0it [00:00, ?it/s]

Epoch 7, global step 1735: val_acc reached 0.59060 (best 0.59060), saving model to "/content/output/dan/version_20220207-014936/checkpoints/epoch=7-val_acc=0.59.ckpt" as top 1


Validating: 0it [00:00, ?it/s]

Epoch 8, global step 1952: val_acc reached 0.60780 (best 0.60780), saving model to "/content/output/dan/version_20220207-014936/checkpoints/epoch=8-val_acc=0.61.ckpt" as top 1


Validating: 0it [00:00, ?it/s]

Epoch 9, global step 2169: val_acc was not in top 1
2022-02-07 01:50:39 - INFO - __main__ - Copy best model from /content/output/dan/version_20220207-014936/checkpoints/epoch=8-val_acc=0.61.ckpt to output/dan/version_20220207-014936/checkpoints/best_model.ckpt.
2022-02-07 01:50:40 - INFO - __main__ - Loading dev data and labels from sst2/sst2.dev


Validating: 0it [00:00, ?it/s]

2022-02-07 01:50:41 - INFO - __main__ - Loading test data and labels from sst2/sst2.test


--------------------------------------------------------------------------------
DATALOADER:0 VALIDATE RESULTS
{'val_acc': 0.607798159122467, 'val_loss': 0.6229878067970276}
--------------------------------------------------------------------------------


Testing: 0it [00:00, ?it/s]

--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'test_acc': 0.6232839226722717, 'test_loss': 0.6190968751907349}
--------------------------------------------------------------------------------
Validation accuracy on the best model:  0.6078
Test       accuracy on the best model:  0.6233


Example with RProp optimizer and 2 hidden layers (lr default = 1e-2)

In [20]:
parser = ArgumentParser()
parser = DeepAveragingBinaryClassificationLModule.add_model_specific_args(parser)

args_str = ("--vocab_filename unigram_vocab.json "
            "--optimizer rprop --num_intermediate_layers 2 "
            "--output_dir output/dan  --do_train --do_predict ")
args = parser.parse_args(args_str.split())

if args.output_dir is None:
    args.output_dir = str(
        Path('output').joinpath(
            f"{args.task}_{datetime.now().strftime('%Y%m%d-%H%M%S')}"))
Path(args.output_dir).mkdir(parents=True, exist_ok=True)

print(f"Parsed arguments: {args}")

training_outout = generic_train(args=args,
                                model_class=DeepAveragingBinaryClassificationLModule)

Global seed set to 42
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs

  | Name     | Type                       | Params
--------------------------------------------------------
0 | model    | DeepAveragingNetworksModel | 1.6 M 
1 | accuracy | Accuracy                   | 0     
--------------------------------------------------------
1.6 M     Trainable params
0         Non-trainable params
1.6 M     Total params
6.301     Total estimated model params size (MB)


Parsed arguments: Namespace(data_dir='sst2', do_predict=True, do_train=True, dropout_rate=0.5, eval_batch_size=32, gpus=0, hidden_size=300, learning_rate=0.001, max_epochs=10, num_intermediate_layers=2, num_workers=8, optimizer='rprop', output_dir='output/dan', seed=42, task='danbinarycls', train_batch_size=32, use_glove=False, vocab_filename='unigram_vocab.json', word_embedding_size=300)


Validation sanity check: 0it [00:00, ?it/s]

2022-02-07 01:50:42 - INFO - __main__ - Loading dev data and labels from sst2/sst2.dev
  cpuset_checked))
Global seed set to 42
2022-02-07 01:50:43 - INFO - __main__ - Loading train data and labels from sst2/sst2.train


Training: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Epoch 0, global step 216: val_acc reached 0.63761 (best 0.63761), saving model to "/content/output/dan/version_20220207-015042/checkpoints/epoch=0-val_acc=0.64.ckpt" as top 1


Validating: 0it [00:00, ?it/s]

Epoch 1, global step 433: val_acc reached 0.67661 (best 0.67661), saving model to "/content/output/dan/version_20220207-015042/checkpoints/epoch=1-val_acc=0.68.ckpt" as top 1


Validating: 0it [00:00, ?it/s]

Epoch 2, global step 650: val_acc reached 0.69954 (best 0.69954), saving model to "/content/output/dan/version_20220207-015042/checkpoints/epoch=2-val_acc=0.70.ckpt" as top 1


Validating: 0it [00:00, ?it/s]

Epoch 3, global step 867: val_acc reached 0.71904 (best 0.71904), saving model to "/content/output/dan/version_20220207-015042/checkpoints/epoch=3-val_acc=0.72.ckpt" as top 1


Validating: 0it [00:00, ?it/s]

Epoch 4, global step 1084: val_acc reached 0.73853 (best 0.73853), saving model to "/content/output/dan/version_20220207-015042/checkpoints/epoch=4-val_acc=0.74.ckpt" as top 1


Validating: 0it [00:00, ?it/s]

Epoch 5, global step 1301: val_acc reached 0.74541 (best 0.74541), saving model to "/content/output/dan/version_20220207-015042/checkpoints/epoch=5-val_acc=0.75.ckpt" as top 1


Validating: 0it [00:00, ?it/s]

Epoch 6, global step 1518: val_acc reached 0.75000 (best 0.75000), saving model to "/content/output/dan/version_20220207-015042/checkpoints/epoch=6-val_acc=0.75.ckpt" as top 1


Validating: 0it [00:00, ?it/s]

Epoch 7, global step 1735: val_acc reached 0.75917 (best 0.75917), saving model to "/content/output/dan/version_20220207-015042/checkpoints/epoch=7-val_acc=0.76.ckpt" as top 1


Validating: 0it [00:00, ?it/s]

Epoch 8, global step 1952: val_acc reached 0.76835 (best 0.76835), saving model to "/content/output/dan/version_20220207-015042/checkpoints/epoch=8-val_acc=0.77.ckpt" as top 1


Validating: 0it [00:00, ?it/s]

Epoch 9, global step 2169: val_acc reached 0.77294 (best 0.77294), saving model to "/content/output/dan/version_20220207-015042/checkpoints/epoch=9-val_acc=0.77.ckpt" as top 1
2022-02-07 01:52:39 - INFO - __main__ - Copy best model from /content/output/dan/version_20220207-015042/checkpoints/epoch=9-val_acc=0.77.ckpt to output/dan/version_20220207-015042/checkpoints/best_model.ckpt.
2022-02-07 01:52:39 - INFO - __main__ - Loading dev data and labels from sst2/sst2.dev


Validating: 0it [00:00, ?it/s]

2022-02-07 01:52:40 - INFO - __main__ - Loading test data and labels from sst2/sst2.test


--------------------------------------------------------------------------------
DATALOADER:0 VALIDATE RESULTS
{'val_acc': 0.7729358077049255, 'val_loss': 0.48662206530570984}
--------------------------------------------------------------------------------


Testing: 0it [00:00, ?it/s]

--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'test_acc': 0.7753981351852417, 'test_loss': 0.4759145677089691}
--------------------------------------------------------------------------------
Validation accuracy on the best model:  0.7729
Test       accuracy on the best model:  0.7754


In [21]:
%reload_ext  tensorboard
%tensorboard --logdir output/dan


Reusing TensorBoard on port 6006 (pid 32322), started 0:48:42 ago. (Use '!kill 32322' to kill it.)

<IPython.core.display.Javascript object>