In [1]:
!pip install transformers

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting transformers
  Downloading transformers-4.25.1-py3-none-any.whl (5.8 MB)
[K     |████████████████████████████████| 5.8 MB 32.1 MB/s 
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1
  Downloading tokenizers-0.13.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.6 MB)
[K     |████████████████████████████████| 7.6 MB 59.6 MB/s 
Collecting huggingface-hub<1.0,>=0.10.0
  Downloading huggingface_hub-0.11.1-py3-none-any.whl (182 kB)
[K     |████████████████████████████████| 182 kB 81.1 MB/s 
Installing collected packages: tokenizers, huggingface-hub, transformers
Successfully installed huggingface-hub-0.11.1 tokenizers-0.13.2 transformers-4.25.1


In [2]:
# imports
import random

import numpy as np
import pandas as pd
import torch
from torch.utils.data import Dataset

from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments, BertConfig
from transformers import logging
import gc
from scipy.special import softmax
from sklearn import metrics


In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive



#### Preprocessing and Dataset


In [4]:
# configuration 
base_dir = "/content/drive/MyDrive/ABSA"
dataset_type = "semeval2014"
task = "single"

In [5]:

# dicts for mapping labels to ids and vice versa
id2label = {0: "positive", 1: "neutral", 2: "negative", 3: "conflict", 4: "none"}
label2id = {"positive": 0, "neutral" : 1, "negative" : 2, "conflict": 3, "none": 4}


# number of classes
num_classes = 5
locations = [""]
# aspects
aspects = ["ambience", "anecdotes", "food", "price", "service"]


def get_dataset(path):
    sentences, labels = [], []
    data = pd.read_csv(path, header=0, sep="\t").values.tolist()
    for item in data:
        sentences.append(item[1])
        labels.append(item[3])
    return sentences, labels


# get dicts for data
train_sentences, train_labels = {}, {}
val_sentences, val_labels = {}, {}
test_sentences, test_labels = {}, {}


# get the dataset for train, val, test
for loc in locations:
    train_sentences[loc], train_labels[loc] = {}, {}
    val_sentences[loc], val_labels[loc] = {}, {}
    test_sentences[loc], test_labels[loc] = {}, {}

    # get the dataset for each aspect
    for aspect in aspects:
        train_sentences[loc][aspect], train_labels[loc][aspect] = get_dataset(f"{base_dir}/data/{loc}{aspect}/train.csv")
        val_sentences[loc][aspect], val_labels[loc][aspect] = get_dataset(f"{base_dir}/data/{loc}{aspect}/test.csv")
        test_sentences[loc][aspect], test_labels[loc][aspect] = get_dataset(f"{base_dir}/data/{loc}{aspect}/test.csv")

In [6]:

# using pre-trained BERT tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

train_encodings = {}
val_encodings = {}
test_encodings = {}

# tokenize the sentences
for loc in locations:
    # create dicts for each location
    train_encodings[loc] = {}
    val_encodings[loc] = {}
    test_encodings[loc] = {}

    # tokenize the sentences for each aspect
    for aspect in aspects:
        train_encodings[loc][aspect] = tokenizer(
            train_sentences[loc][aspect], 
            truncation=True, 
            padding=True)
        
        val_encodings[loc][aspect] = tokenizer(
            val_sentences[loc][aspect], 
            truncation=True, 
            padding=True)
        
        test_encodings[loc][aspect] = tokenizer(
            test_sentences[loc][aspect], 
            truncation=True, 
            padding=True)

Downloading:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/570 [00:00<?, ?B/s]

In [25]:
# Dataset class for Aspect-Based Sentiment Analysis

class ABSA_Dataset(Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {
            key: torch.tensor(val[idx]) for key, val in self.encodings.items()
        }
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)

In [9]:

train_dataset = {}
val_dataset = {}
test_dataset = {}

# create dataset for each location
for loc in locations:
    train_dataset[loc] = {}
    val_dataset[loc] = {}
    test_dataset[loc] = {}

    # create dataset for each aspect
    for aspect in aspects:
        train_dataset[loc][aspect] = ABSA_Dataset(
            encodings = train_encodings[loc][aspect], 
            labels = train_labels[loc][aspect])
        
        val_dataset[loc][aspect] = ABSA_Dataset(
            encodings = val_encodings[loc][aspect], 
            labels = val_labels[loc][aspect])
        
        test_dataset[loc][aspect] = ABSA_Dataset(
            encodings = test_encodings[loc][aspect], 
            labels = test_labels[loc][aspect])


#### Model

In [32]:

logging.set_verbosity_debug()


epochs = 1 # change to 4
batch_size = 24

# create header for csv file
header = ["predicted_label"]
for label in label2id.keys():
    header.append(label)

# Hyperparameters (Config) for BERT model 
config = BertConfig.from_pretrained(
        'bert-base-uncased',
        architectures = ['BertForSequenceClassification'],
        hidden_size = 768,
        num_hidden_layers = 12,
        num_attention_heads = 24,
        hidden_dropout_prob = 0.1,
        num_labels = num_classes
    )    


for loc in locations:
    for aspect in aspects:

        # number of training steps based on the data
        num_steps = len(train_dataset[loc][aspect]) * epochs // batch_size
        warmup_steps = num_steps // 10  # 10% of the training steps
        save_steps = num_steps // epochs    # Save a checkpoint at the end of each epoch

        # training args for BERT model that we'll use for trainer 
        training_args = TrainingArguments(
            output_dir = f'{base_dir}/models/{loc}{aspect}/',          
            num_train_epochs = epochs,              
            per_device_train_batch_size = batch_size,  
            per_device_eval_batch_size = batch_size,   
            warmup_steps = warmup_steps,   
            weight_decay = 0.01,               
            logging_dir = f'{base_dir}/logs/{loc}{aspect}/',            
            logging_steps = 10,
            evaluation_strategy = 'epoch',
            learning_rate = 2e-5,
            save_steps = save_steps,
            seed = 21
        )


        # create BERT model
        model = BertForSequenceClassification.from_pretrained('bert-base-uncased', config=config)

        # create trainer for training the model
        trainer = Trainer(
            model=model,                         
            args=training_args,                  
            train_dataset=train_dataset[loc][aspect],         
            eval_dataset=val_dataset[loc][aspect]             
        )

        # train the model
        trainer.train()

        # save the model
        model.save_pretrained(f"{base_dir}/models/{loc}{aspect}/last_step")

        # evaluate the model
        results = trainer.predict(test_dataset[loc][aspect])

        # get the predicted labels
        scores = [softmax(prediction) for prediction in results.predictions]
        # get the label with the highest score
        predicted_labels = [np.argmax(x) for x in scores]

        # save the results in a csv file
        csv_output = np.insert(scores, 0, predicted_labels, axis=1)
        df = pd.DataFrame(csv_output)
        df[0] = df[0].astype("int")
        df.to_csv(f"{base_dir}/results/{loc}{aspect}.csv", index=False, header=header)

        gc.collect()


loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--bert-base-uncased/snapshots/0a6aa9128b6194f4f3c4db429b6cb4891cdb421b/config.json
Model config BertConfig {
  "architectures": [
    "BertForSequenceClassification"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2",
    "3": "LABEL_3",
    "4": "LABEL_4"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2,
    "LABEL_3": 3,
    "LABEL_4": 4
  },
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 24,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.25.1",
  "type_vocab_size": 2,
  "use_cac

Epoch,Training Loss,Validation Loss
1,0.3391,0.363223


Saving model checkpoint to /content/drive/MyDrive/ABSA/models/ambience/checkpoint-126
Configuration saved in /content/drive/MyDrive/ABSA/models/ambience/checkpoint-126/config.json
Model weights saved in /content/drive/MyDrive/ABSA/models/ambience/checkpoint-126/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 800
  Batch size = 24


Training completed. Do not forget to share your model on huggingface.co/models =)


Configuration saved in /content/drive/MyDrive/ABSA/models/ambience/last_step/config.json
Model weights saved in /content/drive/MyDrive/ABSA/models/ambience/last_step/pytorch_model.bin
***** Running Prediction *****
  Num examples = 800
  Batch size = 24


PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
loading weights file pytorch_model.bin from cache at /root/.cache/huggingface/hub/models--bert-base-uncased/snapshots/0a6aa9128b6194f4f3c4db429b6cb4891cdb421b/pytorch_model.bin
Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on 

Epoch,Training Loss,Validation Loss
1,0.7019,0.642571


Saving model checkpoint to /content/drive/MyDrive/ABSA/models/anecdotes/checkpoint-126
Configuration saved in /content/drive/MyDrive/ABSA/models/anecdotes/checkpoint-126/config.json
Model weights saved in /content/drive/MyDrive/ABSA/models/anecdotes/checkpoint-126/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 800
  Batch size = 24


Training completed. Do not forget to share your model on huggingface.co/models =)


Configuration saved in /content/drive/MyDrive/ABSA/models/anecdotes/last_step/config.json
Model weights saved in /content/drive/MyDrive/ABSA/models/anecdotes/last_step/pytorch_model.bin
***** Running Prediction *****
  Num examples = 800
  Batch size = 24


PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
loading weights file pytorch_model.bin from cache at /root/.cache/huggingface/hub/models--bert-base-uncased/snapshots/0a6aa9128b6194f4f3c4db429b6cb4891cdb421b/pytorch_model.bin
Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on 

Epoch,Training Loss,Validation Loss
1,0.6107,0.668157


Saving model checkpoint to /content/drive/MyDrive/ABSA/models/food/checkpoint-126
Configuration saved in /content/drive/MyDrive/ABSA/models/food/checkpoint-126/config.json
Model weights saved in /content/drive/MyDrive/ABSA/models/food/checkpoint-126/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 800
  Batch size = 24


Training completed. Do not forget to share your model on huggingface.co/models =)


Configuration saved in /content/drive/MyDrive/ABSA/models/food/last_step/config.json
Model weights saved in /content/drive/MyDrive/ABSA/models/food/last_step/pytorch_model.bin
***** Running Prediction *****
  Num examples = 800
  Batch size = 24


PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
loading weights file pytorch_model.bin from cache at /root/.cache/huggingface/hub/models--bert-base-uncased/snapshots/0a6aa9128b6194f4f3c4db429b6cb4891cdb421b/pytorch_model.bin
Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on 

Epoch,Training Loss,Validation Loss
1,0.2099,0.194604


Saving model checkpoint to /content/drive/MyDrive/ABSA/models/price/checkpoint-126
Configuration saved in /content/drive/MyDrive/ABSA/models/price/checkpoint-126/config.json
Model weights saved in /content/drive/MyDrive/ABSA/models/price/checkpoint-126/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 800
  Batch size = 24


Training completed. Do not forget to share your model on huggingface.co/models =)


Configuration saved in /content/drive/MyDrive/ABSA/models/price/last_step/config.json
Model weights saved in /content/drive/MyDrive/ABSA/models/price/last_step/pytorch_model.bin
***** Running Prediction *****
  Num examples = 800
  Batch size = 24


PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
loading weights file pytorch_model.bin from cache at /root/.cache/huggingface/hub/models--bert-base-uncased/snapshots/0a6aa9128b6194f4f3c4db429b6cb4891cdb421b/pytorch_model.bin
Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on 

Epoch,Training Loss,Validation Loss
1,0.4013,0.371421


Saving model checkpoint to /content/drive/MyDrive/ABSA/models/service/checkpoint-126
Configuration saved in /content/drive/MyDrive/ABSA/models/service/checkpoint-126/config.json
Model weights saved in /content/drive/MyDrive/ABSA/models/service/checkpoint-126/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 800
  Batch size = 24


Training completed. Do not forget to share your model on huggingface.co/models =)


Configuration saved in /content/drive/MyDrive/ABSA/models/service/last_step/config.json
Model weights saved in /content/drive/MyDrive/ABSA/models/service/last_step/pytorch_model.bin
***** Running Prediction *****
  Num examples = 800
  Batch size = 24



#### Test the Model

In [33]:

def get_dataset(data_dir="data"):
    sentences, aspects, labels = [], [], []
    # read the data for all aspects at once
    data = pd.read_csv(f"{data_dir}/test_NLI_M.csv", header=0, sep="\t").values.tolist()
    for row in data:
        sentences.append(row[1])
        aspects.append(row[2])
        labels.append(row[3])
    return sentences, aspects, labels


def get_predictions(predictions_dir):
    predicted_labels, scores = [], []
    
    semeval_aspects = ["price", "anecdotes", "food", "ambience", "service"]
    data = {}
    
    # read the predictions for each aspect
    for aspect in semeval_aspects:
        data[aspect] = pd.read_csv(f"{predictions_dir}/{aspect}.csv", header=0).values.tolist()
    
    # get the predicted labels and scores for each aspect
    for i in range(len(data[semeval_aspects[0]])):
        for aspect in semeval_aspects:
            scores.append(data[aspect][i][1:])
            predicted_labels.append(int(data[aspect][i][0]))
    
    return predicted_labels, scores



def compute_semeval_PRF(test_labels, predicted_labels):
    num_total_intersection = 0
    num_total_test_aspects = 0
    num_total_predicted_aspects = 0
    num_examples = len(test_labels) // 5
    for i in range(num_examples):
        test_aspects = set()
        predicted_aspects = set()
        for j in range(5):
            if test_labels[i * 5 + j] != 4:
                test_aspects.add(j)
            if predicted_labels[i * 5 + j] != 4:
                predicted_aspects.add(j)
        if len(test_aspects) == 0:
            continue
        intersection = test_aspects.intersection(predicted_aspects)
        num_total_test_aspects += len(test_aspects)
        num_total_predicted_aspects += len(predicted_aspects)
        num_total_intersection += len(intersection)
    mi_P = num_total_intersection / num_total_predicted_aspects
    mi_R = num_total_intersection / num_total_test_aspects
    mi_F = (2 * mi_P * mi_R) / (mi_P + mi_R)
    return mi_P, mi_R, mi_F


def compute_semeval_accuracy(test_labels, predicted_labels, scores, num_classes=4):
    count_considered_examples = 0
    count_correct_examples = 0
    if num_classes == 4:
        for i in range(len(test_labels)):
            if test_labels[i] == 4:
                continue
            new_predicted_label = predicted_labels[i]
            if new_predicted_label == 4:
                new_scores = scores[i].copy()
                new_scores[4] = 0
                new_predicted_label = np.argmax(new_scores)
            if test_labels[i] == new_predicted_label:
                count_correct_examples += 1
            count_considered_examples += 1
        semeval_accuracy = count_correct_examples / count_considered_examples

    elif num_classes == 3:
        for i in range(len(test_labels)):
            if test_labels[i] >= 3:
                continue
            new_predicted_label = predicted_labels[i]
            if new_predicted_label >= 3:
                new_scores = scores[i].copy()
                new_scores[3] = 0
                new_scores[4] = 0
                new_predicted_label = np.argmax(new_scores)
            if test_labels[i] == new_predicted_label:
                count_correct_examples += 1
            count_considered_examples += 1
        semeval_accuracy = count_correct_examples / count_considered_examples
    elif num_classes == 2:
        for i in range(len(test_labels)):
            if test_labels[i] == 1 or test_labels[i] >= 3:
                continue
            new_predicted_label = predicted_labels[i]
            if new_predicted_label == 1 or new_predicted_label >= 3:
                new_scores = scores[i].copy()
                new_scores[1] = 0
                new_scores[3] = 0
                new_scores[4] = 0
                new_predicted_label = np.argmax(new_scores)
            if test_labels[i] == new_predicted_label:
                count_correct_examples += 1
            count_considered_examples += 1
        semeval_accuracy = count_correct_examples / count_considered_examples
    else:
        raise ValueError("num_classes must be equal to 2, 3, or 4")
    return semeval_accuracy


def test_data(task="NLI_M", dataset_type="", data_dir="", predictions_path=""):
    # get predictions with their scores
    predicted_labels, scores = get_predictions(predictions_path)

    # get the labels for test data
    test_sentences, test_aspects, test_labels = get_dataset(data_dir)

    # get precision, recall, micro_F1
    precision, recall, micro_F1 = compute_semeval_PRF(test_labels, predicted_labels)
    
    print("SemEval aspect evaluation")
    print(f"{task} precision: {precision}")
    print(f"{task} recall: {recall}")
    print(f"{task} micro F1: {micro_F1}")

    semeval_4_classes_acc = compute_semeval_accuracy(test_labels, predicted_labels, scores, 4)
    semeval_3_classes_acc = compute_semeval_accuracy(test_labels, predicted_labels, scores, 3)
    semeval_2_classes_acc = compute_semeval_accuracy(test_labels, predicted_labels, scores, 2)
    print("SemEval aspect evaluation")
    print(f"{task} 4-classes accuracy: {semeval_4_classes_acc}")
    print(f"{task} 3-classes accuracy: {semeval_3_classes_acc}")
    print(f"{task} 2-classes accuracy: {semeval_2_classes_acc}")


In [34]:
import sys
if base_dir not in sys.path:
    sys.path.insert(0, f'{base_dir}/')

test_data(task, dataset_type, f"{base_dir}/data", f"{base_dir}/results")

SemEval aspect evaluation
single precision: 0.939622641509434
single recall: 0.728780487804878
single micro F1: 0.8208791208791208
SemEval aspect evaluation
single 4-classes accuracy: 0.6448780487804878
single 3-classes accuracy: 0.6793422404933196
single 2-classes accuracy: 0.732650739476678
