In [1]:
%pip install -qU datasets accelerate evaluate

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m336.6/336.6 kB[0m [31m6.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.0/84.0 kB[0m [31m5.1 MB/s[0m eta [36m0:00:00[0m
[?25hNote: you may need to restart the kernel to use updated packages.


# Data Loading

In [2]:
from datasets import load_dataset

dataset_restaurant = load_dataset("jakartaresearch/semeval-absa", name='restaurant')
dataset_laptop = load_dataset("jakartaresearch/semeval-absa", name='laptop')
dataset_fabsa = load_dataset("jordiclive/FABSA")

README.md:   0%|          | 0.00/3.56k [00:00<?, ?B/s]

semeval-absa.py:   0%|          | 0.00/5.49k [00:00<?, ?B/s]

restaurant/train/0000.parquet:   0%|          | 0.00/231k [00:00<?, ?B/s]

restaurant/validation/0000.parquet:   0%|          | 0.00/69.6k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/3044 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/800 [00:00<?, ? examples/s]

laptop/train/0000.parquet:   0%|          | 0.00/225k [00:00<?, ?B/s]

laptop/validation/0000.parquet:   0%|          | 0.00/55.9k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/3048 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/800 [00:00<?, ? examples/s]

README.md:   0%|          | 0.00/5.43k [00:00<?, ?B/s]

train-00000-of-00001.parquet:   0%|          | 0.00/747k [00:00<?, ?B/s]

validation-00000-of-00001.parquet:   0%|          | 0.00/105k [00:00<?, ?B/s]

test-00000-of-00001.parquet:   0%|          | 0.00/158k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/7930 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/1057 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/1587 [00:00<?, ? examples/s]

# Data Preprocessing

## Expand Dataset into a convenient format

In [3]:
from tqdm.auto import tqdm
from typing import TypedDict, Generator
from datasets import Dataset, Features, Value, ClassLabel, DatasetDict


class LabeledExample(TypedDict):
    aspect: str
    labels: str
    sentence: str


def expand_dataset_semeval(dataset: Dataset) -> Generator[LabeledExample, None, None]:
    POLARITY_SET = {'positive', 'negative', 'neutral'}
    for data in tqdm(dataset):
        sentence = data['text']  # type: ignore
        terms = data['aspects']['term']  # type: ignore
        polarities = data['aspects']['polarity']  # type: ignore

        for term, polarity in zip(terms, polarities):
            if polarity not in POLARITY_SET or term == '':
                continue
            yield {
                'aspect': term,
                'labels': polarity,
                'sentence': sentence
            }


def get_dataset_semeval(dataset: DatasetDict) -> DatasetDict:
    ds_features = Features({
        'aspect': Value('string'),
        'labels': ClassLabel(names=['positive', 'negative', 'neutral']),
        'sentence': Value('string')
    })

    ds = DatasetDict(
        {
            split_name: \
            Dataset.from_generator(
                expand_dataset_semeval,
                features=ds_features,
                gen_kwargs={'dataset': dataset[split_name]}
            )
            for split_name in ['train', 'validation']
        }
    )
    return ds


def expand_dataset_fabsa(dataset: Dataset) -> Generator[LabeledExample, None, None]:
    POLARITY_SET = {'positive', 'negative', 'neutral'}
    for data in tqdm(dataset):
        sentence = data['text']  # type: ignore

        for term, polarity in data['labels']:
            if polarity not in POLARITY_SET or term == '':
                continue
            yield {
                'aspect': term.split(": ")[-1],
                'labels': polarity,
                'sentence': sentence
            }


def get_dataset_fabsa(dataset: DatasetDict) -> DatasetDict:
    ds_features = Features({
        'aspect': Value('string'),
        'labels': ClassLabel(names=['positive', 'negative', 'neutral']),
        'sentence': Value('string')
    })

    ds = DatasetDict(
        {
            split_name: \
            Dataset.from_generator(
                expand_dataset_fabsa,
                features=ds_features,
                gen_kwargs={'dataset': dataset[split_name]}
            )
            for split_name in ['train', 'validation', 'test']
        }
    )
    return ds


## Tokenize

In [4]:
from transformers import AutoModelForSequenceClassification, AutoTokenizer

model_name = "microsoft/deberta-v3-base" # 'google-bert/bert-base-cased'
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False)  # Must be like BertTokenizer
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=3)

The cache for model files in Transformers v4.22.0 has been updated. Migrating your old cache. This is a one-time only operation. You can interrupt this and resume the migration later on by calling `transformers.utils.move_cache()`.


0it [00:00, ?it/s]

tokenizer_config.json:   0%|          | 0.00/52.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/579 [00:00<?, ?B/s]

spm.model:   0%|          | 0.00/2.46M [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/371M [00:00<?, ?B/s]

Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [5]:
def tokenize_function(examples):
    """This tokenizer returns the format -
    [CLS] aspect tokens [SEP] sentence tokens [SEP]
    """
    return tokenizer(examples['aspect'], examples['sentence'], padding=True, truncation=True, return_token_type_ids=False, max_length=512)

In [6]:
ds_restaurant = get_dataset_semeval(dataset_restaurant).map(tokenize_function, batched=True)
ds_laptop = get_dataset_semeval(dataset_laptop).map(tokenize_function, batched=True)
ds_fabsa = get_dataset_fabsa(dataset_fabsa).map(tokenize_function, batched=True)

Generating train split: 0 examples [00:00, ? examples/s]

  0%|          | 0/3044 [00:00<?, ?it/s]

Generating train split: 0 examples [00:00, ? examples/s]

  0%|          | 0/800 [00:00<?, ?it/s]

Map:   0%|          | 0/3608 [00:00<?, ? examples/s]

Map:   0%|          | 0/1120 [00:00<?, ? examples/s]

Generating train split: 0 examples [00:00, ? examples/s]

  0%|          | 0/3048 [00:00<?, ?it/s]

Generating train split: 0 examples [00:00, ? examples/s]

  0%|          | 0/800 [00:00<?, ?it/s]

Map:   0%|          | 0/2328 [00:00<?, ? examples/s]

Map:   0%|          | 0/638 [00:00<?, ? examples/s]

Generating train split: 0 examples [00:00, ? examples/s]

  0%|          | 0/7930 [00:00<?, ?it/s]

Generating train split: 0 examples [00:00, ? examples/s]

  0%|          | 0/1057 [00:00<?, ?it/s]

Generating train split: 0 examples [00:00, ? examples/s]

  0%|          | 0/1587 [00:00<?, ?it/s]

Map:   0%|          | 0/13998 [00:00<?, ? examples/s]

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pai

Map:   0%|          | 0/1858 [00:00<?, ? examples/s]

Map:   0%|          | 0/2812 [00:00<?, ? examples/s]

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


# Training

In [7]:
from transformers import (
    Trainer,
    TrainingArguments,
    DataCollatorWithPadding
)
from datasets import concatenate_datasets


training_args = TrainingArguments(
    output_dir='./results',          # output directory
    overwrite_output_dir=True,
    num_train_epochs=3,              # number of training epochs
    per_device_train_batch_size=8,   # batch size for training
    # per_device_eval_batch_size=16,   # batch size for evaluation
    warmup_steps=500,                # number of warmup steps for learning rate scheduler
    weight_decay=0.01,               # strength of weight decay
    logging_dir='./logs',            # directory for storing logs
    # logging_steps=10,
    # evaluation_strategy="epoch",     # evaluation is done at the end of each epoch
    resume_from_checkpoint=False,
    learning_rate=5e-5,
    report_to="none"
)

# # Freeze all the parameters in the model
# for param in model.deberta.parameters():
#     param.requires_grad = False

# # Enable gradients for the classifier layer (last layer)
# for param in model.classifier.parameters():
#     param.requires_grad = True

train_dataset = concatenate_datasets(
    [
        ds_restaurant['train'],
        ds_laptop['train'],
        ds_fabsa['train'],
    ]
)  # training dataset

import numpy as np
from sklearn.utils.class_weight import compute_class_weight
import torch
from torch import nn
from sklearn.metrics import precision_recall_fscore_support



CLASS_WEIGHTS = compute_class_weight(
    class_weight="balanced",
    classes=np.unique(train_dataset['labels']),
    y=train_dataset['labels']
)

In [8]:
class CustomTrainer(Trainer):
    def compute_loss(self, model, inputs, return_outputs=False, **kwargs):
        labels = inputs.get("labels")
        # Forward pass
        outputs = model(**inputs)
        logits = outputs.get('logits')
        
        # Define fixed weights for positive, negative, and neutral classes
        fixed_weights = torch.tensor([1.0, 1.0, 2.0]).to(logits.device).float()
        
        # Compute custom loss with fixed weights
        loss_fct = nn.CrossEntropyLoss(weight=fixed_weights)
        loss = loss_fct(logits.view(-1, self.model.config.num_labels), labels.view(-1))
        return (loss, outputs) if return_outputs else loss


In [9]:
trainer = CustomTrainer(
    model=model,                         # the instantiated 🤗 Transformers model to be trained
    args=training_args,                  # training arguments, defined above
    train_dataset=train_dataset,
    data_collator=DataCollatorWithPadding(tokenizer)
)

In [10]:
model.train()
trainer.train()



Step,Training Loss
500,0.5786
1000,0.3875
1500,0.2938
2000,0.2526
2500,0.2459
3000,0.1527
3500,0.1411




TrainOutput(global_step=3738, training_loss=0.2817724584066147, metrics={'train_runtime': 4845.6276, 'train_samples_per_second': 12.341, 'train_steps_per_second': 0.771, 'total_flos': 1.56264216672096e+16, 'train_loss': 0.2817724584066147, 'epoch': 3.0})

# Evaluation

In [11]:
import evaluate
import numpy as np
from sklearn.metrics import classification_report


def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=1)  # Ensure predictions are derived correctly

    # Load the f1 metric from evaluate
    f1 = evaluate.load('f1')

    # Calculate F1 score using the 'weighted' average method
    f1_result = f1.compute(predictions=predictions, references=labels, average='macro')

    # Return a dictionary with the metric name and its value
    return {"f1_score": f1_result['f1'], 'classification_report': classification_report(labels, predictions)}

In [12]:
model.eval()
pred = trainer.predict(ds_restaurant['validation'])
restaurant_metrics = compute_metrics(
    (pred.predictions, ds_restaurant['validation']['labels']))
print(restaurant_metrics['classification_report'])



Downloading builder script:   0%|          | 0.00/6.79k [00:00<?, ?B/s]

              precision    recall  f1-score   support

           0       0.93      0.97      0.95       728
           1       0.84      0.87      0.86       196
           2       0.82      0.63      0.71       196

    accuracy                           0.90      1120
   macro avg       0.86      0.83      0.84      1120
weighted avg       0.89      0.90      0.89      1120



```
              precision    recall  f1-score   support

           0       0.91      0.97      0.94       728
           1       0.85      0.85      0.85       196
           2       0.80      0.59      0.68       196

    accuracy                           0.88      1120
   macro avg       0.85      0.80      0.82      1120
weighted avg       0.88      0.88      0.88      1120
```

In [13]:
model.eval()
pred = trainer.predict(ds_laptop['validation'])

laptop_metrics = compute_metrics(
    (pred.predictions, ds_laptop['validation']['labels']))
print(laptop_metrics['classification_report'])



              precision    recall  f1-score   support

           0       0.92      0.91      0.91       341
           1       0.67      0.94      0.78       128
           2       0.77      0.56      0.65       169

    accuracy                           0.82       638
   macro avg       0.79      0.80      0.78       638
weighted avg       0.83      0.82      0.82       638



```
              precision    recall  f1-score   support

           0       0.92      0.90      0.91       341
           1       0.72      0.91      0.80       128
           2       0.77      0.64      0.70       169

    accuracy                           0.83       638
   macro avg       0.80      0.82      0.80       638
weighted avg       0.84      0.83      0.83       638
```

In [14]:
model.eval()
pred = trainer.predict(ds_fabsa['test'])

fabsa_metrics = compute_metrics(
    (pred.predictions, ds_fabsa['test']['labels']))
print(fabsa_metrics['classification_report'])



              precision    recall  f1-score   support

           0       0.97      0.97      0.97      1825
           1       0.94      0.93      0.93       896
           2       0.85      0.82      0.84        91

    accuracy                           0.95      2812
   macro avg       0.92      0.91      0.91      2812
weighted avg       0.95      0.95      0.95      2812



```
              precision    recall  f1-score   support

           0       0.95      0.94      0.94      1825
           1       0.92      0.79      0.85       896
           2       0.31      0.81      0.44        91

    accuracy                           0.89      2812
   macro avg       0.73      0.85      0.75      2812
weighted avg       0.92      0.89      0.90      2812
```

In [15]:
# from huggingface_hub import notebook_login

# notebook_login()

In [16]:
# model.push_to_hub('deberta-v3-base-absa-semeval2014-fabsa', private=True)

In [17]:
# tokenizer.push_to_hub('deberta-v3-base-absa-semeval2014-fabsa', private=True)