In [1]:
# taking code from
# https://github.com/jowagner/CA4023-NLP/blob/main/notebooks/sentiment-bert.ipynb

## 1.1 BERT Configuration

In [2]:
model_size          = 'base'  # choose between 'tiny', 'base' and 'large'
max_sequence_length = 256
batch_size          = 10

# compensate for small batch size with batch accumulation if needed
accumulate_grad_batches = 1
while batch_size * accumulate_grad_batches < 32:
    # accumulated batch size too small
    # --> accumulate more batches
    accumulate_grad_batches += 1

print('Batch size:', batch_size)
if accumulate_grad_batches > 1:
    print('Accumulating gradients of %d batches' %accumulate_grad_batches)
    
size2name = {
    'tiny':  'distilbert-base-uncased',
    'base':  'bert-base-uncased',
    'large': 'bert-large-uncased',
}

model_name = size2name[model_size]

from transformers import AutoTokenizer
from tokenizers.pre_tokenizers import Whitespace

tokeniser = AutoTokenizer.from_pretrained(model_name)

Batch size: 10
Accumulating gradients of 4 batches


## 1.2 Dataset Configuration

In [3]:
domains = ['laptop', 'restaurant']

train_dev_split = (90, 10)

data_prefix = 'data/'

filenames = {
    'laptop':     'ABSA16_Laptops_Train_SB1_v2.xml',
    'restaurant': 'ABSA16_Restaurants_Train_SB1_v2.xml',
}

for domain in domains:
    filename = data_prefix + filenames[domain]
    print('Using', filename)

Using data/ABSA16_Laptops_Train_SB1_v2.xml
Using data/ABSA16_Restaurants_Train_SB1_v2.xml


## 1.3 Question Templates

In [4]:
put_question_first = True  # whether to put question into seq A or B

templates = [
    
    # Hoang et al. (2019)
    {   'question': '%(entity_type)s, %(attribute_label)s',
        'label':    '%(polarity)s',
    },
    
    # Sun et al. (2019) format 1
    {   'question': '%(entity_type)s - %(attribute_label)s',
        'label':    '%(polarity)s',
    },
    
    # Sun et al. (2019) format 2
    {    'question': 'What do you think of the %(attribute_label)s of %(entity_type)s?',
         'label':    '%(polarity)s',
    },
    
    # Sun et al. (2019) format 3
    {    'question': 'The polarity of the aspect %(attribute_label)s of %(entity_type)s is %(candidate_polarity)s.',
         'label':    '%(yesno)s',
    },
    
    # Sun et al. (2019) format 4
    {   'question': '%(entity_type)s - %(attribute_label)s - %(candidate_polarity)s',
        'label':    '%(yesno)s',
    },
    
    # Variant 1
    {    'question': 'In terms of %(attribute_label)s, what do you think of %(entity_type)s?',
         'label':    '%(polarity)s',
    },
    
    # Variant 2
    {    'question': 'What polarity has the sentiment towards the %(attribute_label)s of %(entity_type)s in the following rewview?',
         'label':    '%(polarity)s',
    },
    
    # Variant 3
    {    'question': 'Do you agree that the sentiment towards the aspect %(attribute_label)s of %(entity_type)s in the following review is %(candidate_polarity)s?',
         'label':    '%(yesno)s',
    },
    
]

# TODO: add variants with entity type and attribute label not in ALLCAPS and
#       with _ between words (requires additional code)

## 2.1 Get Data Instances from XML File

In [5]:
# mostly implemented from scratch, some inspiration from
# https://opengogs.adaptcentre.ie/rszk/sea/src/master/lib/semeval_absa.py

from xml.etree import ElementTree

observed_entity_types = set()
observed_attribute_labels = set()
observed_polarities = set()
observed_targets = set()

def get_dataset(filename):
    global observed_entity_types
    global observed_attribute_labels
    global observed_polarities
    global observed_targets
    xmltree = ElementTree.parse(filename)
    xmlroot = xmltree.getroot()
    dataset = []
    for sentence in xmlroot.iter('sentence'):
        sent_id = sentence.get('id')
        # get content inside the first <text>...</text> sub-element
        text = sentence.findtext('text').strip()
        #print(sent_id, text)
        for opinion in sentence.iter('Opinion'):
            opin_cat = opinion.get('category')
            entity_type, attribute_label = opin_cat.split('#')
            polarity = opinion.get('polarity')
            target = opinion.get('target')
            try:
                span = (int(opinion.get('from')), int(opinion.get('to')))
            except TypeError:
                # at least one of 'from' or 'to' is missing
                span = (0, 0)
            if target == 'NULL':
                target = None
            # add to dataset
            dataset.append((
                sent_id, text,
                entity_type, attribute_label,
                target, span,
                polarity
            ))
            # update vocabularies
            observed_entity_types.add(entity_type)
            observed_attribute_labels.add(attribute_label)
            observed_polarities.add(polarity)
            if target:
                observed_targets.add(target)
    return dataset

datasets = []
for domain in domains:
    filename = data_prefix + filenames[domain]
    datasets.append((domain, get_dataset(filename)))
    
print('observed entity types:',     sorted(observed_entity_types))
print('\nobserved attribute labels:', sorted(observed_attribute_labels))
print('\nobserved polarities:',       sorted(observed_polarities))
print('\nnumber of unique targets:',  len(observed_targets))

observed entity types: ['AMBIENCE', 'BATTERY', 'COMPANY', 'CPU', 'DISPLAY', 'DRINKS', 'FANS_COOLING', 'FOOD', 'GRAPHICS', 'HARDWARE', 'HARD_DISC', 'KEYBOARD', 'LAPTOP', 'LOCATION', 'MEMORY', 'MOTHERBOARD', 'MOUSE', 'MULTIMEDIA_DEVICES', 'OPTICAL_DRIVES', 'OS', 'PORTS', 'POWER_SUPPLY', 'RESTAURANT', 'SERVICE', 'SHIPPING', 'SOFTWARE', 'SUPPORT', 'WARRANTY']

observed attribute labels: ['CONNECTIVITY', 'DESIGN_FEATURES', 'GENERAL', 'MISCELLANEOUS', 'OPERATION_PERFORMANCE', 'PORTABILITY', 'PRICE', 'PRICES', 'QUALITY', 'STYLE_OPTIONS', 'USABILITY']

observed polarities: ['negative', 'neutral', 'positive']

number of unique targets: 721


## 2.2 PyTorch DataLoader

To use the PyTorch Lighting framwork, we need to distinguish 3 types of objects handling our data:

### Dataset

PyTorch Dataset objects provide access to a data set and behave like a list of dictionaries, one dictionary for each data instance (training or test item). The framework does not prescribe what the dictionaries look like, i.e. you can choose the keys. The length of the list determines the number of training instances in each epoch, unless the DataLoader (below) is extended to filter or augment the data. The standard way to augment data is to keep the number of instances identical to the number of raw instances and to apply a different or random transformation in each call of `__getitem__()`.

### DataLoader

PyTorch DataLoader objects shuffle data provided by a Dataset object and create batches of data.

### LightningDataModule

LightningDataModule objects create 3 DataLoader objects, one each for training, validation and test data.

In [6]:
# basic usage of pytorch and lightning from
# https://pytorch.org/tutorials/beginner/data_loading_tutorial.html
# and
# https://github.com/ricardorei/lightning-text-classification/blob/master/classifier.py

import torch
from torch.utils.data import Dataset, DataLoader, RandomSampler

class ABSA_Dataset_part_1(Dataset):
    
    def __init__(
        self,
        raw_data,
        put_question_first = True,
        question_prefix = None,
        template_index = -1,    # -1 = pick random template
        info = None,            # additional info to keep with each instance
    ):
        self.raw_data            = raw_data
        self.put_question_first  = put_question_first
        self.question_prefix     = question_prefix
        self.template_index      = template_index
        self.info                = info
        
    def __len__(self):
        return len(self.raw_data)
    
    def __getitem__(self, idx):
        ''' get one instance of the dataset as a custom dictionary
        '''
        if torch.is_tensor(idx):
            idx = idx.tolist()
            assert isinstance(idx, int)
        sent_id, text, \
            entity_type, attribute_label, \
            target, span, \
            polarity = self.raw_data[idx]
        question, label = self.pick_question(entity_type, attribute_label, polarity)
        if self.question_prefix:
            question = self.question_prefix + ' ' + question
        # TODO: support adding context (previous sentences) to text
        retval = {}
        if self.put_question_first:
            retval['seq_A'] = question
            retval['seq_B'] = text
        else:
            retval['seq_A'] = text
            retval['seq_B'] = question
        retval['label'] = label
        retval['info']  = self.info
        return retval

In [7]:
import random

class ABSA_Dataset(ABSA_Dataset_part_1):
                   
    def pick_question(self, entity_type, attribute_label, polarity):
        global templates
        global observed_polarities
        if self.template_index < 0:
            template = random.choice(templates)
        else:
            template = templates[self.template_index]
        candidate_polarity = random.choice(list(observed_polarities))
        if candidate_polarity == polarity:
            yesno = 'yes'
        else:
            yesno = 'no'
        question = template['question'] %locals()
        label    = template['label']    %locals()
        return (question, label)        

## 2.3 Training-Dev Split
The SemEval ABSA dataset comes without a dev set. We need a dev set to decide how long to train, to select other parameters and to select a good run.

In [8]:
# concatenate domains

tr_dataset_objects = []

for domain, dataset in datasets:
    print(domain, len(dataset))
    tr_dataset_objects.append(ABSA_Dataset(
        dataset,
        put_question_first = put_question_first,
        question_prefix = domain + ':',
        template_index  = 0,   # a template that keeps the original 3-value polarity
        info = domain
    ))

tr_dataset = torch.utils.data.ConcatDataset(tr_dataset_objects)
n = len(tr_dataset)
print('Total size:', n)

laptop 2909
restaurant 2507
Total size: 5416


In [9]:
# how many instances are there for each label?

group2indices = {}
for index in range(n):
    label = tr_dataset[index]['label']
    domain = tr_dataset[index]['info']
    group = (label, domain)
    if not group in group2indices:
        group2indices[group] = []
    group2indices[group].append(index)

# create stratified sample
    
rel_train_size, rel_dev_size = train_dev_split
rel_total = rel_train_size + rel_dev_size

tr_indices = []
dev_indices = []

for group in group2indices:
    indices = group2indices[group]
    n = len(indices)
    select = (n * rel_train_size) // rel_total
    remaining = n - select
    print('%r: split %d (%.1f%%) to %d (%.1f%%)' %(
        group, select, 100.0*select/float(n),
        remaining, 100.0*remaining/float(n),
    ))
    random.shuffle(indices)
    tr_indices += indices[:select]
    dev_indices += indices[select:]

tr_indices.sort()
dev_indices.sort()

dev_dataset = torch.utils.data.Subset(tr_dataset, dev_indices)
tr_dataset  = torch.utils.data.Subset(tr_dataset, tr_indices)

print()
print('Training data size:', len(tr_dataset))
print('Development data size:', len(dev_dataset))

('positive', 'laptop'): split 1473 (90.0%) to 164 (10.0%)
('negative', 'laptop'): split 975 (89.9%) to 109 (10.1%)
('neutral', 'laptop'): split 169 (89.9%) to 19 (10.1%)
('negative', 'restaurant'): split 674 (90.0%) to 75 (10.0%)
('positive', 'restaurant'): split 1491 (90.0%) to 166 (10.0%)
('neutral', 'restaurant'): split 90 (89.1%) to 11 (10.9%)

Training data size: 4872
Development data size: 544


## 2.4 Lightning Wrapper for Training, Development and Test Data

In [10]:
# https://github.com/ricardorei/lightning-text-classification/blob/master/classifier.py
    
import pytorch_lightning as pl
from torchnlp.encoders import LabelEncoder

class ABSA_DataModule(pl.LightningDataModule):
    
    def __init__(self, classifier, data_split = None, **kwargs):
        super().__init__()
        self.hparams = classifier.hparams
        self.classifier = classifier
        if data_split is None:      # this happens when loading a checkpoint
            data_split = (None, None, None)
        self.data_split = data_split
        self.kwargs = kwargs
        self.label_encoder = LabelEncoder(
            sorted(list(observed_polarities)) + ['yes', 'no'],
            reserved_labels = [],
        )

    def train_dataloader(self) -> DataLoader:
        ''' create a data loader for the training data '''
        dataset = self.data_split[0]
        return DataLoader(
            dataset     = dataset,
            sampler     = RandomSampler(dataset),
            batch_size  = self.hparams.batch_size,
            collate_fn  = self.classifier.prepare_sample,
            num_workers = self.hparams.loader_workers,
        )
    
    def val_dataloader(self) -> DataLoader:
        ''' create a data loader for the validation data '''
        return DataLoader(
            dataset     = self.data_split[1],
            batch_size  = self.hparams.batch_size,
            collate_fn  = self.classifier.prepare_sample,
            num_workers = self.hparams.loader_workers,
        )
    
    def test_dataloader(self) -> DataLoader:
        ''' create a data loader for the test data '''
        return DataLoader(
            dataset     = self.data_split[2],
            batch_size  = self.hparams.batch_size,
            collate_fn  = self.classifier.prepare_sample,
            num_workers = self.hparams.loader_workers,
        )

  "`pytorch_lightning.metrics.*` module has been renamed to `torchmetrics.*` and split off to its own package"


## 3.1 Classifier

In [11]:
from transformers import AutoModel
import torch.nn as nn

class Classifier_part_1(pl.LightningModule):
    
    #def __init__(self, hparams = None, **kwargs) -> None:
    def __init__(self, hparams = None, **kwargs) -> None:
        super().__init__()
        if type(hparams) is dict:
            #print('Converting', type(hparams))
            hparams = pl.utilities.AttributeDict(hparams)
        #print('New classifier with', hparams)
        # https://discuss.pytorch.org/t/pytorch-lightning-module-cant-set-attribute-error/121125
        self.hparams.update(hparams)
        self.batch_size = hparams.batch_size
        self.data = ABSA_DataModule(self, **kwargs)
        if 'tokeniser' in kwargs:
            self.tokenizer = kwargs['tokeniser']  # attribute expected by lightning
        else:
            # this happens when loading a checkpoint
            self.tokenizer = None  # TODO: this may break ability to use the model
        self.__build_model()
        self.__build_loss()
        # prepare training with frozen BERT layers so that the new
        # classifier head can first adjust to BERT before BERT
        # adjusts to the classifier in later epochs        
        if hparams.nr_frozen_epochs > 0:
            self.freeze_encoder()
        else:
            self._frozen = False
        self.nr_frozen_epochs = hparams.nr_frozen_epochs
        self.record_predictions = False
            
    def __build_model(self) -> None:
        ''' Init BERT model, tokeniser and classification head '''
        # Q: Why not use AutoModelForSequenceClassification?
        self.bert = AutoModel.from_pretrained(
            model_name,  # was: self.hparams.encoder_model
            output_hidden_states = True
        )
        # parameters for the classification head: best values
        # depend on the task and dataset; the below values
        # have not been tuned much but work reasonable well
        # for the P&L04 data
        self.classification_head = nn.Sequential(
            nn.Dropout(0.2),
            nn.Linear(self.bert.config.hidden_size, 1536),
            nn.Tanh(),
            nn.Dropout(0.5),
            nn.Linear(1536, 256),
            nn.Tanh(),
            nn.Dropout(0.1),
            nn.Linear(256, self.data.label_encoder.vocab_size)
        )
        
    def __build_loss(self):
        self._loss = nn.CrossEntropyLoss()

In [12]:
import logging as log

class Classifier_part_2(Classifier_part_1):
    
    def unfreeze_encoder(self) -> None:
        if self._frozen:
            log.info('\n== Encoder model fine-tuning ==')
            for param in self.bert.parameters():
                param.requires_grad = True
            self._frozen = False
            
    def freeze_encoder(self) -> None:
        for param in self.bert.parameters():
            param.requires_grad = False
        self._frozen = True

    def predict(self, sample: dict) -> dict:
        ''' make a prediction for a single data instance '''
        if self.training:
            self.eval()
        with torch.no_grad():
            batch_inputs, _ = self.prepare_sample(
                [sample],
                prepare_target = False
            )
            model_out = self.forward(batch_inputs)
            logits = torch.Tensor.cpu(model_out["logits"]).numpy()
            predicted_labels = [
                self.data.label_encoder.index_to_token[prediction]
                for prediction in numpy.argmax(logits, axis=1)
            ]
            sample["predicted_label"] = predicted_labels[0]
        return sample
    
    # functionality to obtain predictions for a dataset as a
    # side effect of asking PyTorch Lightning to get evaluation
    # results for a dataset
    # (the framework does not seem to provide a function to get
    # all predictions for a dataset)
    
    def start_recording_predictions(self):
        self.record_predictions = True
        self.reset_recorded_predictions()
        
    def stop_recording_predictions(self):
        self.record_predictions = False
        
    def reset_recorded_predictions(self):
        self.seq2label = {}

In [13]:
from torchnlp.utils import lengths_to_mask

class Classifier_part_3(Classifier_part_2):
    
    def forward(self, batch_input):
        tokens  = batch_input['input_ids']
        lengths = batch_input['length']
        mask = batch_input['attention_mask']
        # Run BERT model.
        word_embeddings = self.bert(tokens, mask).last_hidden_state
        sentemb = word_embeddings[:,0]  # at position of [CLS]
        logits = self.classification_head(sentemb)
        # Hack to conveniently use the model and trainer to
        # get predictions for a test set:
        if self.record_predictions:
            logits_np = torch.Tensor.cpu(logits).numpy()
            predicted_labels = [
                self.data.label_encoder.index_to_token[prediction]
                for prediction in numpy.argmax(logits_np, axis=1)
            ]
            for index, input_token_ids in enumerate(tokens):
                key = torch.Tensor.cpu(input_token_ids).numpy().tolist()
                # truncate trailing zeros
                while key and key[-1] == 0:
                    del key[-1]
                self.seq2label[tuple(key)] = predicted_labels[index]
        return {"logits": logits}
    
    def loss(self, predictions: dict, targets: dict) -> torch.tensor:
        """
        Computes Loss value according to a loss function.
        :param predictions: model specific output. Must contain a key 'logits' with
            a tensor [batch_size x 1] with model predictions
        :param labels: Label values [batch_size]
        Returns:
            torch.tensor with loss value.
        """
        return self._loss(predictions["logits"], targets["labels"])

In [14]:


class Classifier_part_4(Classifier_part_3):
    
    def prepare_sample(self, sample: list, prepare_target: bool = True) -> (dict, dict):
        """ prepare a batch of instances to pass them into the model
        
        :param sample: list of dictionaries.
        
        Returns:
            - dictionary with the expected model inputs.
            - dictionary with the expected target labels.
        """
        assert len(sample) <= batch_size
        assert self.tokenizer is not None
        batch_seq_A = []
        batch_seq_B = []
        for item in sample:
            batch_seq_A.append(item['seq_A'])
            batch_seq_B.append(item['seq_B'])
        # run the tokeniser
        encoded_batch = self.tokenizer(
            batch_seq_A,
            batch_seq_B,
            is_split_into_words = False,
            return_length       = True,
            padding             = 'max_length',
            # https://github.com/huggingface/transformers/issues/8691
            return_tensors      = 'pt',
        )
        if not prepare_target:
            return encoded_batch, {}  # no target labels requested
        # Prepare target:
        batch_labels = []
        for item in sample:
            batch_labels.append(item['label'])
        assert len(batch_labels) <= batch_size
        try:
            targets = {
                "labels": self.data.label_encoder.batch_encode(batch_labels)
            }
            return encoded_batch, targets
        except RuntimeError:
            raise Exception("Label encoder found an unknown label.")


In [15]:


from collections import OrderedDict

class Classifier_part_5(Classifier_part_4):
    
    def training_step(self, batch: tuple, batch_nb: int, *args, **kwargs) -> dict:
        ''' perform a training step with the given batch '''
        inputs, targets = batch
        model_out = self.forward(inputs)
        loss_val = self.loss(model_out, targets)
        # in DP mode (default) make sure if result is scalar, there's another dim in the beginning
        # Q: What is this about?
        if self.trainer.use_dp or self.trainer.use_ddp2:
            loss_val = loss_val.unsqueeze(0)
        output = OrderedDict({"loss": loss_val})
        self.log('train_loss', loss_val, on_step=True, on_epoch=True, prog_bar=True)
        # can also return just a scalar instead of a dict (return loss_val)
        return output
   
    def test_or_validation_step(self, test_type, batch: tuple, batch_nb: int, *args, **kwargs) -> dict:
        ''' perform a test or validation step with the given batch '''
        inputs, targets = batch
        model_out = self.forward(inputs)
        loss_val = self.loss(model_out, targets)
        y = targets["labels"]
        # get predictions
        y_hat = model_out["logits"]
        labels_hat = torch.argmax(y_hat, dim=1)
        # get accuracy
        val_acc = torch.sum(y == labels_hat).item() / (len(y) * 1.0)
        val_acc = torch.tensor(val_acc)
        if self.on_gpu:
            val_acc = val_acc.cuda(loss_val.device.index)
        # in DP mode (default) make sure if result is scalar, there's another dim in the beginning
        if self.trainer.use_dp or self.trainer.use_ddp2:
            loss_val = loss_val.unsqueeze(0)
            val_acc = val_acc.unsqueeze(0)
        output = OrderedDict({
            test_type + "_loss": loss_val,
            test_type + "_acc":  val_acc,
            'batch_size': len(batch),
        })
        return output
    
    def validation_step(self, batch: tuple, batch_nb: int, *args, **kwargs) -> dict:
        return self.test_or_validation_step(
            'val', batch, batch_nb, *args, **kwargs
        )
    
    def test_step(self, batch: tuple, batch_nb: int, *args, **kwargs) -> dict:
        return self.test_or_validation_step(
            'test', batch, batch_nb, *args, **kwargs
        )



In [16]:
from torch import optim

class Classifier(Classifier_part_5):
    
    # validation_end() is now validation_epoch_end()
    # https://github.com/PyTorchLightning/pytorch-lightning/blob/efd272a3cac2c412dd4a7aa138feafb2c114326f/CHANGELOG.md
    
    def test_or_validation_epoch_end(self, test_type, outputs: list) -> None:
        ''' calculate average loss and accuracy over all batches,
            reducing the weight of the last batch according to its
            size so that all data instances have equal influence
            on the scores
        '''
        val_loss_mean = 0.0
        val_acc_mean = 0.0
        total_size = 0
        for output in outputs:
            val_loss = output[test_type + "_loss"]
            # reduce manually when using dp
            if self.trainer.use_dp or self.trainer.use_ddp2:
                val_loss = torch.mean(val_loss)
            val_loss_mean += val_loss
            # reduce manually when using dp
            val_acc = output[test_type + "_acc"]
            if self.trainer.use_dp or self.trainer.use_ddp2:
                val_acc = torch.mean(val_acc)
            # We weight the batch accuracy by batch size to not give
            # higher weight to the items of a smaller, final bacth.
            batch_size = output['batch_size']
            val_acc_mean += val_acc * batch_size
            total_size += batch_size
        val_loss_mean /= len(outputs)
        val_acc_mean /= total_size
        self.log(test_type+'_loss', val_loss_mean)
        self.log(test_type+'_acc',  val_acc_mean)

    def validation_epoch_end(self, outputs: list) -> None:
        self.test_or_validation_epoch_end('val', outputs)
                                     
    def test_epoch_end(self, outputs: list) -> None:
        self.test_or_validation_epoch_end('test', outputs)
        
    def configure_optimizers(self):
        """ Sets different Learning rates for different parameter groups. """
        parameters = [
            {"params": self.classification_head.parameters()},
            {
                "params": self.bert.parameters(),
                "lr": self.hparams.encoder_learning_rate,
                #"weight_decay": 0.01,  # TODO: try this as it is in the BERT paper
            },
        ]
        optimizer = optim.Adam(parameters, lr=self.hparams.learning_rate)
        return [optimizer], []

    def on_epoch_end(self):
        """ Pytorch lightning hook """
        if self.current_epoch + 1 >= self.nr_frozen_epochs:
            self.unfreeze_encoder()

## 4.1 Training

TODO: move configuration to section 1

In [17]:

classifier = Classifier(
    hparams = {
        "encoder_learning_rate": 1e-05,  # Encoder specific learning rate
        "learning_rate":         3e-05,  # Classification head learning rate
        "nr_frozen_epochs":      3,      # Number of epochs we want to keep the encoder model frozen
        "loader_workers":        4,      # How many subprocesses to use for data loading.
                                         # (0 means that the data will be loaded in the main process)
        "batch_size":            batch_size,
        "gpus":                  1,
    },
    # parameters for SlicedDataModule:
    data_split = (tr_dataset, dev_dataset),
    # parameters for SlicedDocument():
    tokeniser                   = tokeniser,
    max_sequence_length         = max_sequence_length,
    preproc_batch_size          = 8
)   
print('Ready.')

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Ready.


In [20]:
# https://pytorch-lightning.readthedocs.io/en/latest/common/early_stopping.html

from pytorch_lightning.callbacks.early_stopping import EarlyStopping
from pytorch_lightning.callbacks.model_checkpoint import ModelCheckpoint
import os
import time

early_stop_callback = EarlyStopping(
    monitor   = 'val_acc',
    min_delta = 0.00,
    patience  = 5,
    verbose   = False,
    mode      = 'max',
)

save_top_model_callback = ModelCheckpoint(
    save_top_k = 3,
    monitor    = 'val_acc',
    mode       = 'max',
    filename   = '{val_acc:.4f}-{epoch:02d}-{val_loss:.4f}'
)

trainer = pl.Trainer(
    callbacks=[early_stop_callback, save_top_model_callback],
    max_epochs = 6,
    min_epochs = classifier.hparams.nr_frozen_epochs + 2,
    gpus = classifier.hparams.gpus,
    accumulate_grad_batches = accumulate_grad_batches,   # compensate for small batch size
    #limit_train_batches = 10,  # use only a subset of the data during development for higher speed
    check_val_every_n_epoch = 1,
    # https://github.com/PyTorchLightning/pytorch-lightning/issues/6690
    logger = pl.loggers.TensorBoardLogger(os.path.abspath('lightning_logs')),
)
start = time.time()
trainer.fit(classifier, classifier.data)
print('Training time: %.0f minutes' %((time.time()-start)/60.0))

## Appendix A: Example BERT Tokenisation

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name                | Type             | Params
---------------------------------------------------------
0 | bert                | BertModel        | 109 M 
1 | classification_head | Sequential       | 1.6 M 
2 | _loss               | CrossEntropyLoss | 0     
---------------------------------------------------------
1.6 M     Trainable params
109 M     Non-trainable params
111 M     Total params
444.233   Total estimated model params size (MB)


Validation sanity check: 0it [00:00, ?it/s]

  rank_zero_deprecation("Internal: `use_dp` is deprecated in v1.2 and will be removed in v1.4.")
  rank_zero_deprecation("Internal: `use_ddp2` is deprecated in v1.2 and will be removed in v1.4.")


Training: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Training time: 16 minutes


In [21]:
# https://www.youtube.com/watch?v=nCq_vy9qE-k at 44:59

%reload_ext tensorboard
%tensorboard --logdir lightning_logs/

In [22]:
print('The best model is', save_top_model_callback.best_model_path)

print('Best validation set accuracy:', save_top_model_callback.best_model_score)

# The following automatically loads the best weights according to
# https://pytorch-lightning.readthedocs.io/en/latest/common/lightning_module.html

# TODO: need test set
#       + fix https://github.com/jowagner/absa-rationale-eval/issues/3 before proceeding
#print('Test results via trainer.test():')
#results = trainer.test()  # also prints results as a side effect



The best model is /home/jwagner/Documents/research/interpretability/rationale-eval-2021/absa-rationale-eval/notebooks/lightning_logs/default/version_0/checkpoints/val_acc=0.8691-epoch=04-val_loss=0.4086.ckpt
Best validation set accuracy: tensor(0.8691, device='cuda:0')


## 5.1 Save Best Model outside Logs

Rather than manually locating the best model in the lightning logs folder and copying it to another location, use the library to save a copy. This also gives us the option to save a copy without the training state of the Adam optimiser, reducing model size by about 67%, training parameters and filesystem paths that we may not want to share with users of the model.


In [24]:
# https://pytorch-lightning.readthedocs.io/en/latest/common/weights_loading.html

# after just having run test(), the best checkpoint is still loaded but that's
# not a documented feature so to be on the safe side for future versions we
# need to explicitly load the best checkpoint:

best_model = Classifier.load_from_checkpoint(
    checkpoint_path = trainer.checkpoint_callback.best_model_path
    # the hparams including hparams.batch_size appear to have been
    # saved in the checkpoint automatically
)
# best_model.save_checkpoint('best.ckpt') does not exist
# --> need to wrap model into trainer to be able to save a checkpoint

new_trainer = pl.Trainer(
    resume_from_checkpoint = trainer.checkpoint_callback.best_model_path,
    gpus = -1,  # avoid warnings (-1 = automatic selection)
    # https://github.com/PyTorchLightning/pytorch-lightning/issues/6690
    logger = pl.loggers.TensorBoardLogger(os.path.abspath('lightning_logs')),
)
new_trainer.model = best_model  # @model.setter in plugins/training_type/training_type_plugin.py



new_trainer.save_checkpoint(
    "best-model-weights-only.ckpt",
    True,  # save_weights_only
    # (if saved with setting the 2nd arg to True, the checkpoint
    # will contain absoulte paths and training parameters)
)

# to just save the bert model in pytorch format and without the classification head, we could follow
# https://github.com/PyTorchLightning/pytorch-lightning/issues/3096#issuecomment-686877242
best_model.bert.save_pretrained('best-bert-encoder.pt')

# Since the lightning module inherits from pytorch, we can save the full network in
# pytorch format:
torch.save(best_model.state_dict(), 'best-model.pt')

print('Ready')

TypeError: 'NoneType' object is not iterable

## Appendix A: Example BERT Tokenisation

In [25]:
example_batch = []
for domain, dataset in datasets:
    if domain == 'laptop':
        for i in (0, 4, 8, 18):  # select a few interesting instances
            example_batch.append(dataset[i][1])   

tokenised_text = tokeniser(
    example_batch,
    is_split_into_words = False,
)

for i, token_ids in enumerate(tokenised_text['input_ids']):
    if i: print()
    print(i, '\tinput:        ', example_batch[i])
    print(   "\t['input_ids']:", token_ids)
    print(   '\ttokens:       ', tokeniser.convert_ids_to_tokens(token_ids))

0 	input:         This computer is absolutely AMAZING!!!
	['input_ids']: [101, 2023, 3274, 2003, 7078, 6429, 999, 999, 999, 102]
	tokens:        ['[CLS]', 'this', 'computer', 'is', 'absolutely', 'amazing', '!', '!', '!', '[SEP]']

1 	input:         and plenty of storage with 250 gb(though I will upgrade this and the ram..)
	['input_ids']: [101, 1998, 7564, 1997, 5527, 2007, 5539, 16351, 1006, 2295, 1045, 2097, 12200, 2023, 1998, 1996, 8223, 1012, 1012, 1007, 102]
	tokens:        ['[CLS]', 'and', 'plenty', 'of', 'storage', 'with', '250', 'gb', '(', 'though', 'i', 'will', 'upgrade', 'this', 'and', 'the', 'ram', '.', '.', ')', '[SEP]']

2 	input:         GET THIS COMPUTER FOR PORTABILITY AND FAST PROCESSING!!!
	['input_ids']: [101, 2131, 2023, 3274, 2005, 3417, 8010, 1998, 3435, 6364, 999, 999, 999, 102]
	tokens:        ['[CLS]', 'get', 'this', 'computer', 'for', 'port', '##ability', 'and', 'fast', 'processing', '!', '!', '!', '[SEP]']

3 	input:         without a big ol' clunky machine i

## Appendix B: Sequence Length Distribution

In [26]:
from collections import defaultdict
    
bin_width = 10

for domain, dataset in datasets:
    print(domain)
    distribution = defaultdict(lambda: 0)
    dataset.append(7*[None])  # hack to simplify loop below
    batch = []
    labels = []
    max_length_bin = 0
    for _, text, _, _, _, _, label in dataset:
        if text is not None:
            batch.append(text)
            labels.append(label)
        if len(batch) == batch_size \
        or (text is None and len(batch) > 0):
            tokenised_batch = tokeniser(
                batch,
                is_split_into_words = False,
            )
            for index, token_ids in enumerate(tokenised_batch['input_ids']):
                label = labels[index]
                length = len(token_ids)
                length_bin = length // bin_width
                distribution[(label,   length_bin)] += 1
                distribution[('total', length_bin)] += 1
                if length_bin > max_length_bin:
                    max_length_bin = length_bin
            batch = []
            labels = []
    del dataset[-1]  # remove "end of data" marker of hack above   
    header = []
    header.append('LengthBin')
    for polarity in sorted(observed_polarities):
        header.append('%12s' %polarity)
    header.append('%12s' %'Total')
    header.append('%12s' %'Positivity')
    print('\t'.join(header))
    for length_bin in range(0, max_length_bin+1):
        row = []
        row.append('%4d-%4d' %(
            bin_width*length_bin,
            bin_width*(1+length_bin)-1
        ))
        total = 0
        for label in sorted(observed_polarities):
            count = distribution[(label, length_bin)]
            row.append('%12d' %count)
            total += count
        row.append('%12d' %total)
        if total:
            row.append('%10.0f%%' %(100.0*distribution[('positive', length_bin)]/float(total)))
        else:
            row.append('%11s' %'n/a')
        print('\t'.join(row))

laptop
LengthBin	    negative	     neutral	    positive	       Total	  Positivity
   0-   9	         123	          20	         279	         422	        66%
  10-  19	         484	          75	         744	        1303	        57%
  20-  29	         303	          59	         428	         790	        54%
  30-  39	         102	          17	         125	         244	        51%
  40-  49	          47	          15	          38	         100	        38%
  50-  59	          11	           2	          20	          33	        61%
  60-  69	           8	           0	           0	           8	         0%
  70-  79	           3	           0	           3	           6	        50%
  80-  89	           3	           0	           0	           3	         0%
restaurant
LengthBin	    negative	     neutral	    positive	       Total	  Positivity
   0-   9	          74	          13	         276	         363	        76%
  10-  19	         300	          52	         724	        1076	        67%
  20-  29	        

## Appendix C: Example Dataset Object

In [27]:
is_first = True
for domain, dataset in datasets:
    if not is_first: print()
    print(domain)
    dataset_obj = ABSA_Dataset(
        dataset,
        put_question_first = put_question_first,
        question_prefix = domain + ':',
        template_index = -1,   # -1 = random pick
    )
    for i in range(2):
        print(i, dataset_obj[i])
        print(i, dataset_obj[i])  # repeat call doesn't give the same result with template_index = -1
    is_first = False

laptop
0 {'seq_A': 'laptop: LAPTOP - GENERAL', 'seq_B': 'This computer is absolutely AMAZING!!!', 'label': 'positive', 'info': None}
0 {'seq_A': 'laptop: LAPTOP - GENERAL - neutral', 'seq_B': 'This computer is absolutely AMAZING!!!', 'label': 'no', 'info': None}
1 {'seq_A': 'laptop: What polarity has the sentiment towards the OPERATION_PERFORMANCE of BATTERY in the following rewview?', 'seq_B': '10 plus hours of battery...', 'label': 'positive', 'info': None}
1 {'seq_A': 'laptop: BATTERY - OPERATION_PERFORMANCE', 'seq_B': '10 plus hours of battery...', 'label': 'positive', 'info': None}

restaurant
0 {'seq_A': 'restaurant: In terms of GENERAL, what do you think of RESTAURANT?', 'seq_B': 'Judging from previous posts this used to be a good place, but not any longer.', 'label': 'negative', 'info': None}
0 {'seq_A': 'restaurant: RESTAURANT - GENERAL', 'seq_B': 'Judging from previous posts this used to be a good place, but not any longer.', 'label': 'negative', 'info': None}
1 {'seq_A': 're

## Appendix D: Location of Instances Picked for Devset

In [28]:
row = []
n = len(tr_indices) + len(dev_indices)
for i in range(n):
    if i in dev_indices:
        row.append('d')
    else:
        row.append('_')
    if len(row) == 75 or i+1 == n:
        print(''.join(row))
        row = []
    if i > 750:
        break

___d_____d__________d____d____d______________________dd__________dd__dd____
_________d____________d____d______d________________dd__d_______d___d_______
d___d_____d___dd________________dd____d__d___________________________d_____
________________________d________________d___d_________________________d___
______d___d______d__________d____________d___d_______d_______d__d__________
_________________d___d___d__________________________________d__d_____d_____
________________d_____d_____d___d____d____d____________________d___________
_____________________dd__d______________d______d_______________d_________d_
____d_______d______d_____________d_d_____d__d______d____________d________d_
__________d____d_d_________________________d___dd__d______________________d
