In [1]:
import os
import random
import numpy as np
import pandas as pd
import shutil

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score
from datasets import Dataset, DatasetDict
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    TrainingArguments,
    Trainer,
)
from google.colab import files


# Config

In [2]:
DATA_PATH = "train_absa.csv"
MODEL_NAME = "microsoft/deberta-v3-base"
MAX_LENGTH = 256
RANDOM_SEED = 42

In [3]:
def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)

set_seed(RANDOM_SEED)

# Load Data

In [4]:
df = pd.read_csv(DATA_PATH)
df = df.sample(frac=1.0, random_state=RANDOM_SEED).reset_index(drop=True)
df = df[df["sentiment"].isin(["positive", "negative", "neutral"])].reset_index(drop=True)

# Label mapping
labels = sorted(df["sentiment"].unique())
label2id = {l: i for i, l in enumerate(labels)}
id2label = {i: l for l, i in label2id.items()}
df["label"] = df["sentiment"].map(label2id)

# Split
train, val = train_test_split(
    df,
    test_size=0.1,
    stratify=df["label"],
    random_state=RANDOM_SEED,
)

print("Train:", len(train), "Val:", len(val))

Train: 3927 Val: 437


In [5]:
train_ds = Dataset.from_pandas(train[["id","text","aspect","label"]])
val_ds   = Dataset.from_pandas(val[["id","text","aspect","label"]])

dataset = DatasetDict({
    "train": train_ds,
    "validation": val_ds,
})

In [6]:
train["sentiment"].value_counts()

Unnamed: 0_level_0,count
sentiment,Unnamed: 1_level_1
positive,1918
negative,1324
neutral,685


# Tokenizer

In [7]:
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

def preprocess(example):
    result = tokenizer(
        example["aspect"],
        example["text"],
        truncation=True,
        padding="max_length",
        max_length=MAX_LENGTH
    )
    result["labels"] = example["label"]
    return result

encoded_ds = dataset.map(preprocess, batched=True, remove_columns=["id","text","aspect","__index_level_0__"])


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/52.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/579 [00:00<?, ?B/s]

spm.model:   0%|          | 0.00/2.46M [00:00<?, ?B/s]



Map:   0%|          | 0/3927 [00:00<?, ? examples/s]

Map:   0%|          | 0/437 [00:00<?, ? examples/s]

# Model

In [8]:
model = AutoModelForSequenceClassification.from_pretrained(
    MODEL_NAME,
    num_labels=len(labels),
    id2label=id2label,
    label2id=label2id,
)

pytorch_model.bin:   0%|          | 0.00/371M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/371M [00:00<?, ?B/s]

Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [9]:
args = TrainingArguments(
    output_dir="deberta_absa_model",
    overwrite_output_dir=True,
    eval_strategy="epoch",
    save_strategy="epoch",
    logging_strategy="steps",
    logging_steps=50,

    save_total_limit=3,
    load_best_model_at_end=True,
    metric_for_best_model="f1_macro",
    greater_is_better=True,

    num_train_epochs=7,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=32,
    learning_rate=2e-5,
    weight_decay=0.01,
    warmup_ratio=0.1,
    fp16=True,
    report_to="none"
)

In [10]:
def compute_metrics(p):
    preds = np.argmax(p.predictions, axis=1)
    labels = p.label_ids
    return {
        "accuracy": accuracy_score(labels, preds),
        "f1_macro": f1_score(labels, preds, average="macro")
    }

trainer = Trainer(
    model=model,
    args=args,
    train_dataset=encoded_ds["train"],
    eval_dataset=encoded_ds["validation"],
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
)

trainer.train()

  trainer = Trainer(
The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'eos_token_id': 2, 'bos_token_id': 1}.


Epoch,Training Loss,Validation Loss,Accuracy,F1 Macro
1,0.7974,0.609644,0.73913,0.634793
2,0.5531,0.529483,0.79405,0.770458
3,0.3749,0.44063,0.832952,0.809341
4,0.2819,0.530143,0.82151,0.794829
5,0.2023,0.606571,0.839817,0.82279
6,0.1567,0.73557,0.823799,0.802526
7,0.1186,0.772798,0.823799,0.798633


TrainOutput(global_step=1722, training_loss=0.3719546222382168, metrics={'train_runtime': 873.4389, 'train_samples_per_second': 31.472, 'train_steps_per_second': 1.972, 'total_flos': 3616427224650240.0, 'train_loss': 0.3719546222382168, 'epoch': 7.0})

In [11]:
print("===== Evaluation on validation set =====")
val_metrics = trainer.evaluate(encoded_ds["validation"])
for k, v in val_metrics.items():
    print(f"{k}: {v:.4f}")

===== Evaluation on validation set =====


eval_loss: 0.6066
eval_accuracy: 0.8398
eval_f1_macro: 0.8228
eval_runtime: 3.2621
eval_samples_per_second: 133.9620
eval_steps_per_second: 4.2920
epoch: 7.0000


In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, classification_report
from collections import Counter
import textwrap

def plot_confusion_matrix(trainer, eval_dataset, id2label):
    preds_output = trainer.predict(eval_dataset)
    preds = np.argmax(preds_output.predictions, axis=1)
    labels_np = preds_output.label_ids

    cm = confusion_matrix(labels_np, preds)
    label_names = [id2label[i] for i in range(len(id2label))]

    plt.figure(figsize=(6, 5))
    plt.imshow(cm, interpolation="nearest")  # gunakan colormap default
    plt.title("Confusion Matrix")
    plt.colorbar()

    tick_marks = np.arange(len(label_names))
    plt.xticks(tick_marks, label_names, rotation=45, ha="right")
    plt.yticks(tick_marks, label_names)

    # Tulis angka di tiap kotak
    thresh = cm.max() / 2.0 if cm.max() > 0 else 0.5
    for i in range(cm.shape[0]):
        for j in range(cm.shape[1]):
            plt.text(
                j, i, format(cm[i, j], "d"),
                ha="center", va="center",
                color="white" if cm[i, j] > thresh else "black",
            )

    plt.xlabel("Predicted label")
    plt.ylabel("True label")
    plt.tight_layout()
    plt.show()

def show_classification_report(trainer, eval_dataset, id2label):
    preds_output = trainer.predict(eval_dataset)

    preds = np.argmax(preds_output.predictions, axis=1)
    labels_np = preds_output.label_ids

    target_names = [id2label[i] for i in range(len(id2label))]

    print("\n=== Classification Report ===")
    print(classification_report(labels_np, preds, target_names=target_names, digits=4))

from collections import Counter

def show_label_distributions(trainer, eval_dataset, id2label):
    preds_output = trainer.predict(eval_dataset)

    preds = np.argmax(preds_output.predictions, axis=1)
    labels_np = preds_output.label_ids

    true_counts = Counter(labels_np)
    pred_counts = Counter(preds)

    print("\n=== Label Distribution (True vs Pred) ===")
    print("index -> label_name | true_count | pred_count")
    for i in range(len(id2label)):
        label_name = id2label[i]
        t = true_counts.get(i, 0)
        p = pred_counts.get(i, 0)
        print(f"{i} -> {label_name:>10} | true={t:4d} | pred={p:4d}")

def show_error_examples(trainer, eval_dataset, val_df, id2label, max_examples=10):
    preds_output = trainer.predict(eval_dataset)

    preds = np.argmax(preds_output.predictions, axis=1)
    labels_np = preds_output.label_ids

    wrong_idx = np.where(preds != labels_np)[0]

    print(f"\n=== Error Analysis ===")
    print(f"Total validation samples : {len(labels_np)}")
    print(f"Total misclassified      : {len(wrong_idx)} "
          f"({len(wrong_idx)/len(labels_np):.2%})")
    print(f"Showing up to {max_examples} examples:\n")

    for idx in wrong_idx[:max_examples]:
        row = val_df.iloc[idx]
        true_label = id2label[int(labels_np[idx])]
        pred_label = id2label[int(preds[idx])]
        print("-" * 80)
        print(f"ID      : {row['id']}")
        print(f"ASPECT  : {row['aspect']}")
        print(f"TRUE    : {true_label}")
        print(f"PRED    : {pred_label}")
        print("TEXT    :")
        print(row["text"])
        print()


In [None]:
plot_confusion_matrix(trainer, encoded_ds["validation"], id2label)

In [None]:
show_classification_report(trainer, encoded_ds["validation"], id2label)

In [None]:
show_label_distributions(trainer, encoded_ds["validation"], id2label)

In [None]:
show_error_examples(trainer, encoded_ds["validation"], val, id2label, max_examples=10)

In [12]:
save_path = "/content/deberta_absa_saved"
trainer.model.save_pretrained(save_path)
tokenizer.save_pretrained(save_path)
print("Model disimpan di:", save_path)

Model disimpan di: /content/deberta_absa_saved


In [13]:
zip_path = "/content/deberta_absa_saved.zip"
shutil.make_archive("/content/deberta_absa_saved", 'zip', save_path)
files.download(zip_path)

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>