# NLP - Homework 2 - task b

## Setup

In [None]:
# install lightning
!pip install pytorch_lightning &> /dev/null
!pip install transformers



In [None]:
# here go all the imports

import torch
from torch import nn
from torch.nn.utils.rnn import pad_sequence
from torch.utils.data import Dataset, DataLoader
import pytorch_lightning as pl

from pprint import pprint
import random
import numpy as np
from typing import *
from string import punctuation

# huggingface's transformers library
import transformers
from transformers import RobertaTokenizer


import nltk

nltk.download('punkt')

torch.__version__, torch.cuda.get_device_name(0), transformers.__version__

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


('1.8.1+cu101', 'Tesla T4', '4.6.1')

## Allow reproducibility

In [None]:
VARIANTS = {'Average Last Four Hidden', 'Sum Last Four Hidden', 'INS', 'ISNS', 'ENS', 'ATPC with sqrt', 'ATPC'}
VARIANT_TO_TEST = 'ATPC with sqrt'

SEED = 1234

random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.backends.cudnn.deterministic = True  # will use only deterministic algorithms

## Importing data

In [None]:
# Let's create a new folder and download the dataset.
! mkdir data
! wget -O data/laptops_train.json https://raw.githubusercontent.com/SapienzaNLP/nlp2021-hw2/master/data/laptops_train.json
! wget -O data/laptops_dev.json https://raw.githubusercontent.com/SapienzaNLP/nlp2021-hw2/master/data/laptops_dev.json
! wget -O data/restaurants_train.json https://raw.githubusercontent.com/SapienzaNLP/nlp2021-hw2/master/data/restaurants_train.json
! wget -O data/restaurants_dev.json https://raw.githubusercontent.com/SapienzaNLP/nlp2021-hw2/master/data/restaurants_dev.json

mkdir: cannot create directory ‘data’: File exists
--2021-06-15 20:22:06--  https://raw.githubusercontent.com/SapienzaNLP/nlp2021-hw2/master/data/laptops_train.json
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.111.133, 185.199.108.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 668974 (653K) [text/plain]
Saving to: ‘data/laptops_train.json’


2021-06-15 20:22:06 (22.1 MB/s) - ‘data/laptops_train.json’ saved [668974/668974]

--2021-06-15 20:22:07--  https://raw.githubusercontent.com/SapienzaNLP/nlp2021-hw2/master/data/laptops_dev.json
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 148196 (145K) 

In [None]:
import pandas as pd
import os
import json

root_folder: str = r"./data" # to save dataset

laptops_train_filename = "laptops_train.json"
laptops_valid_filename = "laptops_dev.json"
restaurants_train_filename = "restaurants_train.json"
restaurants_valid_filename = "restaurants_dev.json"

laptops_train_dataframe = pd.read_json(os.sep.join([root_folder, laptops_train_filename]), orient='records')
laptops_valid_dataframe = pd.read_json(os.sep.join([root_folder, laptops_valid_filename]), orient='records')
restaurants_train_dataframe = pd.read_json(os.sep.join([root_folder, restaurants_train_filename]), orient='records')
restaurants_valid_dataframe = pd.read_json(os.sep.join([root_folder, restaurants_valid_filename]), orient='records')

restaurants_train_dataframe.drop('categories', inplace=True, axis=1)
restaurants_valid_dataframe.drop('categories', inplace=True, axis=1)

train_dataframe = pd.concat([laptops_train_dataframe, restaurants_train_dataframe], ignore_index=True)
valid_dataframe = pd.concat([laptops_valid_dataframe, restaurants_valid_dataframe], ignore_index=True)

laptops_train_dataframe = None
laptops_valid_dataframe = None
restaurants_train_dataframe = None
restaurants_valid_dataframe = None

VALID_SIZE = len(valid_dataframe)
TEST_SIZE = VALID_SIZE

train_dataframe

Unnamed: 0,targets,text
0,"[[[22, 31], hard disk, neutral]]",I always use a backup hard disk to store impor...
1,"[[[34, 38], size, positive]]","I also love the small, convenient size of my l..."
2,"[[[76, 84], mousepad, neutral]]",I thought the white Mac computers looked dirty...
3,"[[[40, 48], responds, positive]]","It is always reliable, never bugged and respon..."
4,"[[[55, 63], keyboard, positive], [[73, 78], sp...",The real stand out on this computer is the fee...
...,...,...
4995,[],I actually gave Patroon another chance before ...
4996,"[[[77, 84], service, positive], [[123, 128], p...",Although they do the typical what kind of wate...
4997,[],"That said, I thought Scalini Fedeli was one of..."
4998,"[[[27, 34], waiters, positive], [[125, 131], d...",Have always found that the waiters will go out...


In [None]:
valid_dataframe

Unnamed: 0,targets,text
0,[],It was over rated!
1,"[[[55, 75], adding the bluetooth, negative]]",But Sony said we could send it back and be cha...
2,"[[[4, 21], Windows 7 Starter, positive]]","The Windows 7 Starter is, in my opinion, a gre..."
3,"[[[4, 14], powerpoint, positive]]",The powerpoint opened seamlessly in the apple ...
4,"[[[74, 80], screen, positive]]","I chose the iBookG4, a laptop that is an attra..."
...,...,...
1081,[],"(or sister, in my case!)."
1082,"[[[0, 5], Staff, positive]]",Staff is very accomodating.
1083,"[[[26, 40], yellowfun tuna, positive], [[51, 6...",I particularly love their yellowfun tuna and t...
1084,[],Enjoy!


In [None]:
def spans(txt: str) -> List[Tuple[int, int]]:
    # This function returns the spans of the several tokens of the input text
    tokens = nltk.word_tokenize(txt)
    tokens = filter(lambda token: preprocess_term(token) != '', tokens)
    offset = 0
    spans_list = list()
    for token in tokens:
        offset = txt.find(token, offset)
        spans_list.append((offset, offset+len(token)))
        offset += len(token)
    return spans_list

def preprocess_term(term: str) -> str: # I remove punctuation
    # This function is used in order to preprocess a signle tokenized term
    cleaned_term = ''
    for char in term:
      if (char not in punctuation) and (char not in '“”'):
        cleaned_term = cleaned_term + char

    return cleaned_term

def preprocess_text(text: str, start_indexes: List[int], end_indexes: List[int], polarities: List[str]) -> Tuple[List[Dict[str, str]], List[str]]:
  # This function returns a list of dicts, where each dict is associated to a tokenized preprocessed term and contains information about the token itslef and its label
  preprocessed_text = []
  end_indexes.insert(0, 0)
  true_aspect_terms = list()
  
  for i in range(len(start_indexes)):
    start_index = start_indexes[i]
    end_index = end_indexes[i + 1]
    preprocessed_text += list(map(lambda indexes: {'token': text[end_indexes[i] + indexes[0] : end_indexes[i] + indexes[1]], 'ne_label': 'O'}, spans(text[end_indexes[i] : start_index])))
    aspect_term = text[start_index:end_index]
    true_aspect_terms.append(aspect_term)
    aspect_term = aspect_term.split(' ')
    aspect_term_list = []
    for j in range(len(aspect_term)):
      term = aspect_term[j]
      preprocessed_term = preprocess_term(term)
      if preprocessed_term != '':
        d = {'token': preprocessed_term, 'ne_label': 'B-AT' if len(aspect_term_list) == 0 else 'I-AT', 'polarity': polarities[i]}
        aspect_term_list.append(d)
      start_index += len(term) + 1
    preprocessed_text += aspect_term_list
  preprocessed_text += list(map(lambda indexes: {'token': text[end_indexes[-1] + indexes[0] : end_indexes[-1] + indexes[1]], 'ne_label': 'O'}, spans(text[end_indexes[-1] : ])))

  return preprocessed_text, true_aspect_terms

In [None]:
BERT_MODEL = 'roberta-large'
tokenizer = RobertaTokenizer.from_pretrained(BERT_MODEL, do_lower_case=False, do_basic_tokenize=False) # I have already done the tokenization with the preprocessing function ---> do_basic_tokenize=False

class ABSA_B_Dataset(Dataset):

    # static variables
    polarities = ['positive', 'negative', 'neutral', 'conflict', 'PAD']
    polarity2idx = {t: i for i, t in enumerate(polarities)}
    padding_polarity_index = polarity2idx['PAD']


    def __init__(self, 
                 targets: pd.Series,
                 text: pd.Series,
                 preprocess_text: Callable[[str, List[int], List[int]], Tuple[List[Dict[str, str]], List[str]]]=preprocess_text):
        """
        We assume that the dataset can fit in memory.
        Args:
            targets (pd.Series): Pandas Dataframe column containing the targets.
            text (pd.Series): Pandas Dataframe column containing the text.
            device (string): device where to put tensors (cpu or cuda).
        """

        self.data = []
        for row_targets, row_text in zip(targets, text):
          indexes_and_polarities = list()
          for target in row_targets:
            indexes = target[0]
            indexes_and_polarities.append((indexes[0], indexes[1], target[2]))
          indexes_and_polarities.sort()
          start_indexes, end_indexes, polarities = [elem[0] for elem in indexes_and_polarities], [elem[1] for elem in indexes_and_polarities], [elem[2] for elem in indexes_and_polarities]
          preprocessed_text, true_aspect_terms = preprocess_text(row_text, start_indexes, end_indexes, polarities)
          assert len(true_aspect_terms) == len(polarities)

          self.data.append({"preprocessed_text": preprocessed_text,
                            "true_outputs": {'targets': list(zip(true_aspect_terms, polarities))}})
        self.encoded_data = None
    
    def index_dataset(self):
        self.encoded_data = list()
        for i in range(len(self.data)):
            # for each sentence
            data_i = self.data[i]
            elem = data_i["preprocessed_text"]
            encoded_elem, encoded_labels, start_indexes, end_indexes = self.encode_text_and_labels(elem)
            true_outputs = data_i["true_outputs"]
            assert len(encoded_labels) == len(true_outputs['targets'])

            self.encoded_data.append({"inputs": encoded_elem, 
                                      "outputs": encoded_labels,
                                      "start_indexes": start_indexes,
                                      "end_indexes": end_indexes,
                                      "true_outputs": true_outputs})

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        if self.encoded_data is None:
            raise RuntimeError("""Trying to retrieve elements but index_dataset
            has not been invoked yet! Be sure to invoce index_dataset on this object
            before trying to retrieve elements. In case you want to retrieve raw
            elements, use the method get_raw_element(idx)""")
        return self.encoded_data[idx]
    
    def get_raw_element(self, idx):
        return self.data[idx]

    @staticmethod
    def encode_text_and_labels(sentence:list):
        """
        Args:
            sentences (list): list of Dicts, each carrying the information about
            one token.
        Return:
            The method returns four lists of indexes corresponding to the input tokens, input labels, start indexes and end indexes of word pieces of aspect terms in the input sentence
        """

        words_pieces_list, polarity_indexes, start_indexes, end_indexes = [tokenizer.bos_token], list(), list(), list()

        for i, d in enumerate(sentence):

          word = d['token']
          label = d['ne_label']
          
          tokens = tokenizer.tokenize(word, is_split_into_words = i != 0) # The Ġ char should not be added at the first word of the sentence
          n_word_pieces = len(tokens)

          word_pieces_without_punct = list()
          add_special_symbol = False

          for word_piece_index, word_piece in enumerate(tokens):
            filtered_word_pieces = word_piece.replace('Ġ', '')
            if filtered_word_pieces not in punctuation:
              if add_special_symbol:
                new_word_pieces = tokenizer.tokenize(word_piece, is_split_into_words = i != 0)
                word_pieces_without_punct.extend(new_word_pieces)
                add_special_symbol = False
              else:
                word_pieces_without_punct.append(word_piece)
            elif word_piece_index == 0:
              add_special_symbol = True

          if label == 'B-AT':
            start_indexes.append(len(words_pieces_list))
            polarity_indexes.append(ABSA_B_Dataset.polarity2idx[d['polarity']])

          words_pieces_list.extend(word_pieces_without_punct if len(word_pieces_without_punct) > 0 else tokens)

          if label == 'B-AT':
            end_indexes.append(len(words_pieces_list) - 1)
          elif label == 'I-AT':
            end_indexes[-1] = len(words_pieces_list) - 1
              

        words_pieces_list.append(tokenizer.eos_token)

        assert len(start_indexes) == len(end_indexes) == len(polarity_indexes)

        return torch.LongTensor(tokenizer.convert_tokens_to_ids(words_pieces_list)), torch.LongTensor(polarity_indexes), torch.LongTensor(start_indexes), torch.LongTensor(end_indexes)
    
    
    @staticmethod
    def decode_output(max_indices:List[List[int]]):
        """
        Args:
            max_indices: a List where the i-th entry is a List containing the
            indexes preds for the i-th sample
        Output:
            The method returns a list of batch_size length where each element is a list
            of labels, one for each input token.
        """
        predictions = list()
        for indices in max_indices:
            predictions.append([ABSA_B_Dataset.polarities[i] for i in indices])
        return predictions

In [None]:
from math import sqrt

def get_polarities_weights(train_polarities_count: pd.Series):
  
  if VARIANT_TO_TEST == 'INS':

    weights = 1.0 / np.array(train_polarities_count)
    weights = weights / np.sum(weights) * len(train_polarities_count)
    weights = np.append(weights, 0) # weight for the PAD polarity

  elif VARIANT_TO_TEST == 'ISNS':

    weights = np.sqrt(1.0 / np.array(train_polarities_count))
    weights = weights / np.sum(weights) * len(train_polarities_count)
    weights = np.append(weights, 0)

  elif VARIANT_TO_TEST == 'ENS':

    beta = 0.9999
    effective_num = 1.0 - np.power(beta, train_polarities_count)
    weights = (1.0 - beta) / np.array(effective_num)
    weights = weights / np.sum(weights) * len(train_polarities_count)
    weights = np.append(weights, 0)

  elif VARIANT_TO_TEST == 'ATPC':

    max_polarity_count = max(train_polarities_count)
    weights = [max_polarity_count / train_polarities_count[polarity] for polarity in ABSA_B_Dataset.polarities if polarity != 'PAD'] + [0]

  else:

    max_polarity_count = max(train_polarities_count)
    weights = [sqrt(max_polarity_count / train_polarities_count[polarity]) for polarity in ABSA_B_Dataset.polarities if polarity != 'PAD'] + [0]

  return torch.FloatTensor(weights)
  



In [None]:
train_polarities_count = pd.Series([target[-1] for row_targets in train_dataframe.targets for target in row_targets], name='train polarities').value_counts()
train_polarities_count.to_frame().style.bar()

Unnamed: 0,train polarities
positive,2605
negative,1364
neutral,877
conflict,111


In [None]:
POLARITIES_WEIGHTS = get_polarities_weights(train_polarities_count)
POLARITIES_WEIGHTS

tensor([1.0000, 1.3820, 1.7235, 4.8444, 0.0000])

In [None]:
pd.Series([target[-1] for row_targets in valid_dataframe.targets for target in row_targets], name='valid polarities').value_counts().to_frame().style.bar()

Unnamed: 0,valid polarities
positive,546
negative,307
neutral,216
conflict,25


In [None]:
def test_dataset_class():

    dataset = ABSA_B_Dataset(train_dataframe['targets'], train_dataframe['text'])
    
    print('Dataset test:')
    for i in range(10):
        print('  sample {}: {}'.format(i, ["Token: " + t["token"] + ", NE label: " +  t["ne_label"] + ", polarity: " + t.get('polarity', '') for t in dataset.get_raw_element(i)['preprocessed_text']]))
    dataset = None

test_dataset_class()

Dataset test:
  sample 0: ['Token: I, NE label: O, polarity: ', 'Token: always, NE label: O, polarity: ', 'Token: use, NE label: O, polarity: ', 'Token: a, NE label: O, polarity: ', 'Token: backup, NE label: O, polarity: ', 'Token: hard, NE label: B-AT, polarity: neutral', 'Token: disk, NE label: I-AT, polarity: neutral', 'Token: to, NE label: O, polarity: ', 'Token: store, NE label: O, polarity: ', 'Token: important, NE label: O, polarity: ', 'Token: files, NE label: O, polarity: ', 'Token: at, NE label: O, polarity: ', 'Token: all, NE label: O, polarity: ', 'Token: times, NE label: O, polarity: ']
  sample 1: ['Token: I, NE label: O, polarity: ', 'Token: also, NE label: O, polarity: ', 'Token: love, NE label: O, polarity: ', 'Token: the, NE label: O, polarity: ', 'Token: small, NE label: O, polarity: ', 'Token: convenient, NE label: O, polarity: ', 'Token: size, NE label: B-AT, polarity: positive', 'Token: of, NE label: O, polarity: ', 'Token: my, NE label: O, polarity: ', 'Token: la

In [None]:
dataset = ABSA_B_Dataset(train_dataframe['targets'], train_dataframe['text'])
dataset.index_dataset()

In [None]:
def rnn_collate_fn(
    data_elements: List[Dict[str, Union[torch.Tensor, List]]]) -> List[Dict[str, Union[torch.Tensor, List]]]:

    X, Y = [de['inputs'] for de in data_elements], [de['outputs'] for de in data_elements] # lists of index tensors
    start_indexes, end_indexes = [de['start_indexes'] for de in data_elements], [de['end_indexes'] for de in data_elements] # lists of index tensors
    
    batch = {}
    padding_token_id = tokenizer.pad_token_id
    padding_polarity_index = ABSA_B_Dataset.padding_polarity_index
    batch['inputs'] = pad_sequence(X, batch_first=True, padding_value=padding_token_id)
    batch['outputs'] = pad_sequence(Y, batch_first=True, padding_value=padding_polarity_index)
    batch['start_indexes'] = pad_sequence(start_indexes, batch_first=True, padding_value=0)
    batch['end_indexes'] = pad_sequence(end_indexes, batch_first=True, padding_value=0)
    batch['true_outputs'] = [de['true_outputs'] for de in data_elements]
    batch['attention_masks'] = torch.tensor([[int(index != padding_token_id) for index in input_sample] for input_sample in batch['inputs']])
    batch['label_masks'] = torch.tensor([[POLARITIES_WEIGHTS[index] for index in output_sample] for output_sample in batch['outputs']])


    assert batch['inputs'].shape == batch['attention_masks'].shape and batch['outputs'].shape == batch['start_indexes'].shape == batch['end_indexes'].shape == batch['label_masks'].shape
    
    return batch

In [None]:
class DataModuleABSA_B(pl.LightningDataModule):
    def __init__(self, training_targets, training_text, dev_targets, dev_text, batch_size=32, number_of_valid_samples=VALID_SIZE, number_of_test_samples=TEST_SIZE):
        super().__init__()
        self.training_targets = training_targets
        self.training_text = training_text
        self.dev_targets = dev_targets
        self.dev_text = dev_text
        self.batch_size = batch_size
        self.number_of_valid_samples = number_of_valid_samples
        self.number_of_test_samples = number_of_test_samples

    def setup(self, stage=None):
      self.trainingset = ABSA_B_Dataset(self.training_targets, self.training_text)
      self.devset = ABSA_B_Dataset(self.dev_targets, self.dev_text)
      self.testset = ABSA_B_Dataset(self.dev_targets, self.dev_text)

      self.trainingset.index_dataset()
      self.devset.index_dataset()
      self.testset.index_dataset()
          
    def train_dataloader(self):
      return DataLoader(self.trainingset,
                        batch_size=self.batch_size,
                        shuffle=True,
                        collate_fn=rnn_collate_fn,
                        num_workers=0)
    
    def val_dataloader(self):
        return DataLoader(self.devset,
                          batch_size=self.number_of_valid_samples//2, # If you want to test with the last four layers ---> //4, because, otherwise, the CUDA memory is not sufficient
                          shuffle=False,
                          collate_fn=rnn_collate_fn,
                          num_workers=0)
    
    def test_dataloader(self):
        return DataLoader(self.testset,
                          batch_size=self.number_of_test_samples//2,
                          shuffle=False,
                          collate_fn=rnn_collate_fn,
                          num_workers=0)

In [None]:
train_dataloader = DataLoader(dataset, batch_size=32, collate_fn=rnn_collate_fn)
for batch in train_dataloader:
    print(batch['inputs'])
    print(batch['outputs'])
    print(batch['start_indexes'])
    print(batch['end_indexes'])
    print(batch['true_outputs'])
    print(batch['attention_masks'])
    print(batch['label_masks'])
    print(batch['inputs'].shape)
    print(batch['outputs'].shape)
    print(batch['start_indexes'].shape)
    print(batch['end_indexes'].shape)
    print(len(batch['true_outputs']))
    print(batch['attention_masks'].shape)
    print(batch['label_masks'].shape)
    for (indexes, mask_values, start_indexes, end_indexes, polarities, label_mask) in zip(batch['inputs'], batch['attention_masks'], batch['start_indexes'], batch['end_indexes'], batch['outputs'], batch['label_masks']):
      print(list(zip(tokenizer.convert_ids_to_tokens(indexes), mask_values)))
      print(list(zip(start_indexes, end_indexes, polarities, label_mask)))
      print('\n')
    break

dataset = None # To free up RAM
train_dataloader = None

tensor([[    0,   100,   460,  ...,     1,     1,     1],
        [    0,   100,    67,  ...,     1,     1,     1],
        [    0,   100,   802,  ...,     1,     1,     1],
        ...,
        [    0, 34647,   615,  ...,     1,     1,     1],
        [    0,  2409,    38,  ...,     1,     1,     1],
        [    0,   133,  2332,  ...,     1,     1,     1]])
tensor([[2, 4, 4],
        [0, 4, 4],
        [2, 4, 4],
        [0, 4, 4],
        [0, 0, 4],
        [1, 4, 4],
        [4, 4, 4],
        [4, 4, 4],
        [4, 4, 4],
        [1, 4, 4],
        [4, 4, 4],
        [1, 4, 4],
        [0, 0, 2],
        [0, 4, 4],
        [4, 4, 4],
        [4, 4, 4],
        [0, 0, 4],
        [4, 4, 4],
        [4, 4, 4],
        [2, 4, 4],
        [4, 4, 4],
        [4, 4, 4],
        [0, 4, 4],
        [4, 4, 4],
        [4, 4, 4],
        [4, 4, 4],
        [1, 4, 4],
        [4, 4, 4],
        [4, 4, 4],
        [0, 4, 4],
        [4, 4, 4],
        [1, 4, 4]])
tensor([[ 6,  0,  0],
       

## Model Building

In [None]:
import transformers.models.roberta.modeling_roberta
from transformers.models.roberta.modeling_roberta import RobertaPreTrainedModel, RobertaModel
from torch.nn import CrossEntropyLoss
from transformers.modeling_outputs import TokenClassifierOutput

def get_aspect_terms_word_pieces(start_indexes, end_indexes, contextualized_embs, embs_mask):
    sentences_length = torch.sum(embs_mask, dim=-1)
    terms_offset = torch.cumsum(sentences_length, dim=0)
    terms_offset -= sentences_length
    start_indexes_offset = (start_indexes + terms_offset.unsqueeze(1)).view(-1)
    end_indexes_offset = (end_indexes + terms_offset.unsqueeze(1)).view(-1)

    number_of_word_pieces_per_aspect_term = end_indexes_offset - start_indexes_offset + 1
    max_aspect_term_len = torch.max(number_of_word_pieces_per_aspect_term)
    embs_mask = embs_mask.view(-1)
    batch_size, sequence_length, hidden_dim = contextualized_embs.shape
    contextualized_embs = contextualized_embs.view(batch_size * sequence_length, hidden_dim)
    contextualized_embs = contextualized_embs[embs_mask.nonzero().squeeze(), :]
    text_length = contextualized_embs.shape[0]

    aspect_terms_word_pieces_indexes = torch.arange(max_aspect_term_len).unsqueeze(0).to(start_indexes_offset.device) + start_indexes_offset.unsqueeze(1)
    aspect_terms_word_pieces_indexes = torch.min(aspect_terms_word_pieces_indexes, (text_length - 1) * torch.ones_like(aspect_terms_word_pieces_indexes))
    aspect_terms_word_pieces = contextualized_embs[aspect_terms_word_pieces_indexes, :]
    word_pieces_indexes_range = torch.arange(max_aspect_term_len).to(number_of_word_pieces_per_aspect_term.device)
    aspect_terms_word_pieces_mask = word_pieces_indexes_range < number_of_word_pieces_per_aspect_term.unsqueeze(-1)
    return aspect_terms_word_pieces, aspect_terms_word_pieces_mask.long()

def get_aspect_terms_representation(aspect_terms_word_pieces, attention_scores, aspect_terms_word_pieces_mask):
    aspect_terms_word_pieces_mask = (1.0 - aspect_terms_word_pieces_mask) * -100000.0
    attention_scores = attention_scores + aspect_terms_word_pieces_mask # To avoid to consider padding in the self-attention combination
    probs = (nn.Softmax(dim=-1)(attention_scores)).unsqueeze(-1)
    return torch.sum(probs * aspect_terms_word_pieces, dim=1)

class RobertaForAspectTermClassification(RobertaPreTrainedModel):

    _keys_to_ignore_on_load_unexpected = [r"pooler"]
    _keys_to_ignore_on_load_missing = [r"position_ids"]

    def __init__(self, config):
        super().__init__(config)

        if VARIANT_TO_TEST not in VARIANTS:
          raise RuntimeError('The inserted VARIANT is not valid')
        
        self.num_labels = config.num_labels

        self.roberta = RobertaModel(config, add_pooling_layer=False)
        self.dropout = nn.Dropout(config.hidden_dropout_prob)
        self.linear = nn.Linear(config.hidden_size, config.hidden_size)
        self.tanh = nn.Tanh()
        self.a = nn.Linear(config.hidden_size, 1) # It is the a vector named in the report
        self.classifier = nn.Linear(config.hidden_size, config.num_labels)

        self.init_weights()

    def forward(
        self,
        input_ids=None,
        attention_mask=None,
        token_type_ids=None,
        position_ids=None,
        head_mask=None,
        inputs_embeds=None,
        labels=None,
        output_attentions=None,
        output_hidden_states=True,
        return_dict=None,
        start_indexes=None,
        end_indexes=None,
        label_masks=None,
    ):
        r"""
        labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`):
            Labels for computing the token classification loss. Indices should be in ``[0, ..., config.num_labels -
            1]``.
        """
        return_dict = return_dict if return_dict is not None else self.config.use_return_dict
        output_hidden_states = VARIANT_TO_TEST in {'Average Last Four Hidden', 'Sum Last Four Hidden'}

        outputs = self.roberta(
            input_ids,
            attention_mask=attention_mask,
            token_type_ids=token_type_ids,
            position_ids=position_ids,
            head_mask=head_mask,
            inputs_embeds=inputs_embeds,
            output_attentions=output_attentions,
            output_hidden_states=output_hidden_states,
            return_dict=return_dict,
        )

        if VARIANT_TO_TEST in {'INS', 'ISNS', 'ENS', 'ATPC with sqrt', 'ATPC'}:
          sequence_output = outputs[0]
        else:
          hidden_states = outputs.hidden_states[1:] if return_dict else outputs[2][1:]
          stacked_last_four_layers = torch.stack(hidden_states[-4:], dim=0)
          sequence_output = stacked_last_four_layers.mean(dim=0) if VARIANT_TO_TEST == 'Average Last Four Hidden' else stacked_last_four_layers.sum(dim=0)

        aspect_terms_word_pieces, aspect_terms_word_pieces_mask = get_aspect_terms_word_pieces(start_indexes, end_indexes, sequence_output, attention_mask)

        attention_scores = (self.a(aspect_terms_word_pieces)).squeeze(-1)
        aspect_terms_representation = get_aspect_terms_representation(aspect_terms_word_pieces, attention_scores, aspect_terms_word_pieces_mask)

        aspect_terms_representation = self.linear(aspect_terms_representation)
        aspect_terms_representation = self.tanh(aspect_terms_representation)
        aspect_terms_representation = self.dropout(aspect_terms_representation)
        logits = self.classifier(aspect_terms_representation)
        loss_function = CrossEntropyLoss(reduction='none') # If I use ignore_index, it still considers it in the normalization
        label_masks = label_masks.view(-1)
        loss = loss_function(logits, labels.view(-1))
        loss = torch.sum(loss * label_masks.to(dtype=loss.dtype)) / torch.sum(label_masks.to(dtype=loss.dtype))

        if not return_dict:
            output = (logits,) + outputs[2:]
            return ((loss,) + output) if loss is not None else output

        return TokenClassifierOutput(
            loss=loss,
            logits=logits,
            hidden_states=outputs.hidden_states,
            attentions=outputs.attentions,
        )

In [None]:
class AspectTermClassificationModel(nn.Module):
    # we provide the hyperparameters as input
    def __init__(self, hparams):
        super(AspectTermClassificationModel, self).__init__()
        pprint(hparams)

        self.roberta = RobertaForAspectTermClassification.from_pretrained(hparams.bert_model, num_labels=hparams.num_labels)

    
    def forward(self, x, start_indexes, end_indexes, label_masks):
      b_input_ids, b_input_mask, b_labels = x
      outputs = self.roberta(b_input_ids, token_type_ids=None,
                          attention_mask=b_input_mask, labels=b_labels,
                          start_indexes=start_indexes, end_indexes=end_indexes,
                          label_masks=label_masks)
      loss, logits = outputs[0], outputs[1]
      return loss, logits

In [None]:
def get_sentiment_results(sentiment_types, scores):

  # Compute per sentiment Precision / Recall / F1
    for sent_type in scores.keys():
        if scores[sent_type]["tp"]:
            scores[sent_type]["p"] = 100 * scores[sent_type]["tp"] / (scores[sent_type]["fp"] + scores[sent_type]["tp"])
            scores[sent_type]["r"] = 100 * scores[sent_type]["tp"] / (scores[sent_type]["fn"] + scores[sent_type]["tp"])
        else:
            scores[sent_type]["p"], scores[sent_type]["r"] = 0, 0

        if not scores[sent_type]["p"] + scores[sent_type]["r"] == 0:
            scores[sent_type]["f1"] = 2 * scores[sent_type]["p"] * scores[sent_type]["r"] / (
                    scores[sent_type]["p"] + scores[sent_type]["r"])
        else:
            scores[sent_type]["f1"] = 0

    # Compute micro F1 Scores
    tp = sum([scores[sent_type]["tp"] for sent_type in sentiment_types])
    fp = sum([scores[sent_type]["fp"] for sent_type in sentiment_types])
    fn = sum([scores[sent_type]["fn"] for sent_type in sentiment_types])

    if tp:
        precision = 100 * tp / (tp + fp)
        recall = 100 * tp / (tp + fn)
        f1 = 2 * precision * recall / (precision + recall)

    else:
        precision, recall, f1 = 0, 0, 0

    scores["ALL"]["p"] = precision
    scores["ALL"]["r"] = recall
    scores["ALL"]["f1"] = f1
    scores["ALL"]["tp"] = tp
    scores["ALL"]["fp"] = fp
    scores["ALL"]["fn"] = fn

    # Compute Macro F1 Scores
    scores["ALL"]["Macro_f1"] = sum([scores[ent_type]["f1"] for ent_type in sentiment_types])/len(sentiment_types)
    scores["ALL"]["Macro_p"] = sum([scores[ent_type]["p"] for ent_type in sentiment_types])/len(sentiment_types)
    scores["ALL"]["Macro_r"] = sum([scores[ent_type]["r"] for ent_type in sentiment_types])/len(sentiment_types)
        
    return scores, precision, recall, f1



def evaluate_sentiment(samples, predictions_b):
    scores = {}
    sentiment_types = ["positive", "negative", "neutral", "conflict"]
    scores = {sent: {"tp": 0, "fp": 0, "fn": 0} for sent in sentiment_types + ["ALL"]}
    for label, pred in zip(samples, predictions_b):
      for sentiment in sentiment_types:
        pred_sent = {(term_pred[0], term_pred[1]) for term_pred in pred["targets"] if term_pred[1] == sentiment}
        gt_sent = {(term_pred[0], term_pred[1]) for term_pred in label["targets"] if term_pred[1] == sentiment}

        scores[sentiment]["tp"] += len(pred_sent & gt_sent)
        scores[sentiment]["fp"] += len(pred_sent - gt_sent)
        scores[sentiment]["fn"] += len(gt_sent - pred_sent)

    return get_sentiment_results(sentiment_types, scores)

    
        
def get_metrics_results(true_outputs: List[Dict[str, List[Tuple[str, str]]]], predictions: List[List[str]]) -> None:
  aspect_terms_polarities_predictions = list()
  
  for (true_output, pred_output) in zip(true_outputs, predictions):
    prediction = {'targets': []}
    
    for (aspect_term, true_polarity), pred_polarity in zip(true_output['targets'], pred_output):
      prediction['targets'].append((aspect_term, pred_polarity))

    aspect_terms_polarities_predictions.append(prediction)

  return evaluate_sentiment(true_outputs, aspect_terms_polarities_predictions)

In [None]:
from torchmetrics import F1
from transformers import get_linear_schedule_with_warmup
from transformers import AdamW


class PolarityModule(pl.LightningModule):
    def __init__(self, hparams, *args, **kwargs):
        super(PolarityModule, self).__init__(*args, **kwargs)
        """
        Args:
            model: the model we want to train.
            loss_function: the loss_function to minimize.
            optimizer: the optimizer used to minimize the loss_function.
        """
        self.save_hyperparameters(hparams)
        self.F1 = F1(num_classes=self.hparams.num_labels, average='macro', ignore_index=ABSA_B_Dataset.padding_polarity_index)
        self.model = AspectTermClassificationModel(self.hparams)
        self.sent_scores = {sent: {"tp": 0, "fp": 0, "fn": 0} for sent in ['positive', 'negative', 'neutral', 'conflict']}
    # This performs a forward pass of the model, as well as returning the predicted index.
    def forward(self, x, start_indexes, end_indexes, label_masks):
        outputs = self.model(x, start_indexes, end_indexes, label_masks)
        return outputs

    # This runs the model in training mode mode, ie. activates dropout and gradient computation. It defines a single training step.
    def training_step(self, batch, batch_nb):
        labels = batch['outputs']
        inputs = (batch['inputs'], batch['attention_masks'], labels)

        loss, logits = self.forward(inputs, batch['start_indexes'], batch['end_indexes'], batch['label_masks'])
        
        predictions = torch.argmax(logits, -1)
        labels = labels.view(-1)
        assert predictions.shape == labels.shape
        predictions = torch.tensor([int(p) for p, l in zip(predictions, labels) if int(l) != ABSA_B_Dataset.padding_polarity_index]).to(self.hparams.device)
        
        labels = torch.tensor([int(l) for l in labels if int(l) != ABSA_B_Dataset.padding_polarity_index]).to(self.hparams.device)

        assert predictions.shape == labels.shape
        F1 = self.F1(predictions, labels)
        # Log it:
        self.log('train_loss', loss, prog_bar=True)
        self.log('train_f1', F1, prog_bar=True)
        # Very important for PL to return the loss that will be used to update the weights:
        return loss


    # This runs the model in eval mode, ie. sets dropout to 0 and deactivates grad. Needed when we are in inference mode.
    def validation_step(self, batch, batch_nb):
        labels = batch['outputs']
        inputs = (batch['inputs'], batch['attention_masks'], labels)
        
        sample_loss, logits = self.forward(inputs, batch['start_indexes'], batch['end_indexes'], batch['label_masks'])

        predictions = torch.argmax(logits, -1)
        labels = labels.view(-1)
        assert predictions.shape == labels.shape
        predictions = torch.tensor([int(p) for p, l in zip(predictions, labels) if int(l) != ABSA_B_Dataset.padding_polarity_index]).to(self.hparams.device)
        
        labels = torch.tensor([int(l) for l in labels if int(l) != ABSA_B_Dataset.padding_polarity_index]).to(self.hparams.device)

        assert predictions.shape == labels.shape
        sample_F1 = self.F1(predictions, labels)

        self.log('valid_loss', sample_loss, prog_bar=True)
        self.log('valid_f1', sample_F1, prog_bar=True)

    # This runs the model in eval mode, ie. sets dropout to 0 and deactivates grad. Needed when we are in inference mode.
    def test_step(self, batch, batch_nb):
        labels = batch['outputs']
        inputs_indexes = batch['inputs']

        inputs = (inputs_indexes, batch['attention_masks'], labels)

        sample_loss, logits = self.forward(inputs, batch['start_indexes'], batch['end_indexes'], batch['label_masks'])

        batch_size = labels.size()[0]
        max_number_of_aspect_terms = labels.size()[1]
        sequence_length = logits.size()[1]
        logits = logits.view([batch_size, max_number_of_aspect_terms, sequence_length])

        predictions = torch.argmax(logits, -1)

        predictions = [[int(p_i) for p_i, l_i in zip(p, l) if int(l_i) != ABSA_B_Dataset.padding_polarity_index]
                                 for p, l in zip(predictions, labels)]

        labels = torch.tensor([int(l_i) for l in labels
                                        for l_i in l if int(l_i) != ABSA_B_Dataset.padding_polarity_index]).to(self.hparams.device)
        decoded_labels = ABSA_B_Dataset.decode_output(predictions)

        scores, precision, recall, f1 = get_metrics_results(batch['true_outputs'], decoded_labels)
        for sentiment in self.sent_scores:
          self.sent_scores[sentiment]['tp'] += scores[sentiment]['tp']
          self.sent_scores[sentiment]['fp'] += scores[sentiment]['fp']
          self.sent_scores[sentiment]['fn'] += scores[sentiment]['fn']


        predictions = torch.tensor([prediction for sample_predictions in predictions for prediction in sample_predictions]).to(self.hparams.device)
        assert predictions.shape == labels.shape
        sample_F1 = self.F1(predictions, labels)
        
        
        self.log('test_loss', sample_loss, prog_bar=True)
        self.log('test_f1_on_NE_labels', sample_F1, prog_bar=True)
        self.log('test_macro_precision', scores["ALL"]["Macro_p"], prog_bar=True)
        self.log('test_macro_recall', scores["ALL"]["Macro_r"], prog_bar=True)
        self.log('test_macro_f1', scores["ALL"]["Macro_f1"], prog_bar=True)

    def configure_optimizers(self):
      if self.hparams.full_finetuning:
          parameters_optimizer = list(self.model.roberta.named_parameters())
          without_decay = ['bias', 'gamma', 'beta']
          optimizer_grouped_parameters = [
              {'params': [p for n, p in parameters_optimizer if not any(wd in n for wd in without_decay)],
              'weight_decay_rate': 0.01},
              {'params': [p for n, p in parameters_optimizer if any(wd in n for wd in without_decay)],
              'weight_decay_rate': 0.0}
          ]
      else:
          parameters_optimizer = list(self.model.roberta.classifier.named_parameters())
          optimizer_grouped_parameters = [{"params": [p for n, p in parameters_optimizer]}]

      optimizer = AdamW(
          optimizer_grouped_parameters,
          lr=3e-5,
          eps=1e-8
      )

      total_train_steps = self.hparams.n_batches * self.hparams.n_epochs

      scheduler = get_linear_schedule_with_warmup(
          optimizer,
          num_warmup_steps=0,
          num_training_steps=total_train_steps
      )

      return [optimizer], [scheduler]
    
    def print_test_results(self):
      # Compute per sentiment Precision / Recall / F1
      scores = self.sent_scores
      sentiment_types = ["positive", "negative", "neutral", "conflict"]
      scores["ALL"] = {"tp": 0, "fp": 0, "fn": 0}
      scores, precision, recall, f1 = get_sentiment_results(sentiment_types, scores)

      print("Evaluation\n")

      print(
          "\tALL\t TP: {};\tFP: {};\tFN: {}".format(
              scores["ALL"]["tp"],
              scores["ALL"]["fp"],
              scores["ALL"]["fn"]))
      print(
          "\t\t(m avg): precision: {:.2f};\trecall: {:.2f};\tf1: {:.2f} (micro)".format(
              precision,
              recall,
              f1))
      print(
          "\t\t(M avg): precision: {:.2f};\trecall: {:.2f};\tf1: {:.2f} (Macro)\n".format(
              scores["ALL"]["Macro_p"],
              scores["ALL"]["Macro_r"],
              scores["ALL"]["Macro_f1"]))

      for sent_type in sentiment_types:
          print("\t{}: \tTP: {};\tFP: {};\tFN: {};\tprecision: {:.2f};\trecall: {:.2f};\tf1: {:.2f};\t{}".format(
              sent_type,
              scores[sent_type]["tp"],
              scores[sent_type]["fp"],
              scores[sent_type]["fn"],
              scores[sent_type]["p"],
              scores[sent_type]["r"],
              scores[sent_type]["f1"],
              scores[sent_type]["tp"] +
              scores[sent_type]["fn"]))


## Model training

In [None]:
MAX_GRAD_NORM = 1.0

check_point_callback = pl.callbacks.ModelCheckpoint(
    monitor='valid_loss',  # the value that we want to use for model selection.
    verbose=True,  # whether to log or not information in the console.
    save_top_k=1,  # the number of checkpoints we want to store.
    mode='min',  # wheter we want to maximize (max) or minimize the "monitor" value.
    dirpath='experiments/Aspect_Term_Classifier',  # output directory path
    filename='{epoch}-{valid_loss:.4f}'  # the prefix on the checkpoint values. Metrics store by the trainer can be used to dynamically change the name.
)



hparams = {'num_labels': len(ABSA_B_Dataset.polarities), # number of different NE labels in our case
           'bert_model': BERT_MODEL,
           'full_finetuning': True,
           'n_epochs': 3,
           'device': 'cuda' if torch.cuda.is_available() else 'cpu'}

# sets seeds for numpy, torch, python.random and PYTHONHASHSEED.
pl.seed_everything(SEED, workers=True)

data_module = DataModuleABSA_B(train_dataframe['targets'], train_dataframe['text'], valid_dataframe['targets'], valid_dataframe['text'])
data_module.setup()
n_batches = len(data_module.train_dataloader())
hparams['n_batches'] = n_batches
data_module = None
data_module = DataModuleABSA_B(train_dataframe['targets'], train_dataframe['text'], valid_dataframe['targets'], valid_dataframe['text'])
train_dataframe = None
valid_dataframe = None

trainer = pl.Trainer(gpus=1 if torch.cuda.is_available() else 0,
                     val_check_interval=1.0,
                     deterministic=True,
                     max_epochs=hparams['n_epochs'] - 1,
                     gradient_clip_val=MAX_GRAD_NORM,
                     callbacks=[check_point_callback] # the callback we want our trainer to use.
)

model = PolarityModule(hparams)
trainer.fit(model, datamodule=data_module)

Global seed set to 1234
GPU available: True, used: True
TPU available: False, using: 0 TPU cores


{'bert_model': 'roberta-large',
 'device': 'cuda',
 'full_finetuning': True,
 'n_batches': 157,
 'n_epochs': 3,
 'num_labels': 5}


Some weights of the model checkpoint at roberta-large were not used when initializing RobertaForAspectTermClassification: ['lm_head.decoder.weight', 'lm_head.layer_norm.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.bias', 'lm_head.dense.weight']
- This IS expected if you are initializing RobertaForAspectTermClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForAspectTermClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForAspectTermClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['linear.bias', 'classifier.weight', 'a.weight', 'classifier.bias', 'linear.weight', 'a.

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validation sanity check', layout=Layout…

Global seed set to 1234




HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Training', layout=Layout(flex='2'), max…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

Epoch 0, global step 156: valid_loss reached 0.78722 (best 0.78722), saving model to "/content/experiments/Aspect_Term_Classifier/epoch=0-valid_loss=0.7872.ckpt" as top 1


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

Epoch 1, global step 313: valid_loss reached 0.68218 (best 0.68218), saving model to "/content/experiments/Aspect_Term_Classifier/epoch=1-valid_loss=0.6822.ckpt" as top 1





In [None]:
test_set_results = trainer.test(model, test_dataloaders=data_module.test_dataloader())
print("test set results: {}".format(test_set_results))

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Testing', layout=Layout(flex='2'), max=…


--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'test_f1_on_NE_labels': 0.6767185926437378,
 'test_loss': 0.682178258895874,
 'test_macro_f1': 67.69412231445312,
 'test_macro_precision': 67.98619079589844,
 'test_macro_recall': 68.80020904541016}
--------------------------------------------------------------------------------
test set results: [{'test_loss': 0.682178258895874, 'test_f1_on_NE_labels': 0.6767185926437378, 'test_macro_precision': 67.98619079589844, 'test_macro_recall': 68.80020904541016, 'test_macro_f1': 67.69412231445312}]


In [None]:
model.print_test_results()

Evaluation

	ALL	 TP: 863;	FP: 222;	FN: 223
		(m avg): precision: 79.54;	recall: 79.47;	f1: 79.50 (micro)
		(M avg): precision: 67.84;	recall: 66.47;	f1: 66.95 (Macro)

	positive: 	TP: 489;	FP: 84;	FN: 54;	precision: 85.34;	recall: 90.06;	f1: 87.63;	543
	negative: 	TP: 248;	FP: 66;	FN: 54;	precision: 78.98;	recall: 82.12;	f1: 80.52;	302
	neutral: 	TP: 116;	FP: 57;	FN: 100;	precision: 67.05;	recall: 53.70;	f1: 59.64;	216
	conflict: 	TP: 10;	FP: 15;	FN: 15;	precision: 40.00;	recall: 40.00;	f1: 40.00;	25


In [None]:
! rm -r experiments