In [None]:
!pip install -U "transformers>=4.46.0" accelerate datasets scikit-learn

In [None]:
import os
import random
import numpy as np
import pandas as pd
import shutil

from sklearn.model_selection import train_test_split
from sklearn.metrics import (
    accuracy_score,
    f1_score,
    confusion_matrix,
    classification_report,
)
from datasets import Dataset, DatasetDict
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    TrainingArguments,
    Trainer,
)
from peft import LoraConfig, get_peft_model, TaskType
from google.colab import files

# Config

In [None]:
DATA_PATH = "train_absa.csv"
MODEL_NAME = "Qwen/Qwen2.5-0.5B-Instruct"
MAX_LENGTH = 512
RANDOM_SEED = 42

def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)

set_seed(RANDOM_SEED)

# Load Data

In [None]:
df = pd.read_csv(DATA_PATH)
df = df.sample(frac=1.0, random_state=RANDOM_SEED).reset_index(drop=True)
df = df[df["sentiment"].isin(["positive", "negative", "neutral"])].reset_index(drop=True)

print(df["sentiment"].value_counts())
labels = sorted(df["sentiment"].unique())
label2id = {l: i for i, l in enumerate(labels)}
id2label = {i: l for l, i in label2id.items()}
df["label"] = df["sentiment"].map(label2id)

train, val = train_test_split(
    df,
    test_size=0.1,
    stratify=df["label"],
    random_state=RANDOM_SEED,
)

print("Train:", len(train), "Val:", len(val))

In [None]:
train_ds = Dataset.from_pandas(train[["id", "text", "aspect", "label"]])
val_ds   = Dataset.from_pandas(val[["id", "text", "aspect", "label"]])

dataset = DatasetDict({
    "train": train_ds,
    "validation": val_ds,
})

train = train.reset_index(drop=True)
val   = val.reset_index(drop=True)

In [None]:
train["sentiment"].value_counts()

# Tokenizer

In [None]:
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

# Qwen biasanya pakai eos sebagai pad, kita set eksplisit untuk aman
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

# Kalau mau, bisa pakai left padding untuk Qwen2ForSequenceClassification,
# tapi di sini right padding + max_length juga oke.
tokenizer.padding_side = "right"

POWER_PROMPT_TEMPLATE = """You are an expert in Aspect-Based Sentiment Analysis (ABSA).
Your task is to analyze a movie review and determine the sentiment towards a specific aspect.

Follow these strict rules:
1. Focus ONLY on the given aspect, not the whole review.
2. Use the tone and context of the review to infer sentiment.
3. Output MUST be exactly one of the following lowercase labels:
   - positive
   - negative

Review:
{review}

Aspect:
{aspect}

Question:
What is the sentiment towards the given aspect?

Answer with ONLY ONE WORD (positive, negative), no explanation:

Sentiment:"""

In [None]:
def preprocess(example):
    prompt = POWER_PROMPT_TEMPLATE.format(
        review=example["text"],
        aspect=example["aspect"],
    )

    encoded = tokenizer(
        prompt,
        truncation=True,
        padding="max_length",
        max_length=MAX_LENGTH,
    )
    encoded["labels"] = int(example["label"])
    return encoded

encoded_ds = dataset.map(
    preprocess,
    batched=False,
    remove_columns=["id", "text", "aspect", "__index_level_0__"],
)

# Model

In [None]:
model = AutoModelForSequenceClassification.from_pretrained(
    MODEL_NAME,
    num_labels=len(labels),
    id2label=id2label,
    label2id=label2id,
)

model.config.pad_token_id = tokenizer.pad_token_id

lora_config = LoraConfig(
    task_type=TaskType.SEQ_CLS,
    r=8,
    lora_alpha=16,
    lora_dropout=0.1,
    target_modules=[
        "q_proj",
        "k_proj",
        "v_proj",
        "o_proj",
        "gate_proj",
        "up_proj",
        "down_proj",
    ],
)

model = get_peft_model(model, lora_config)
model.print_trainable_parameters()

In [None]:
args = TrainingArguments(
    output_dir="lamma3.2_absa_lora",
    overwrite_output_dir=True,

    eval_strategy="epoch",
    save_strategy="epoch",
    logging_strategy="steps",
    logging_steps=50,

    save_total_limit=3,
    load_best_model_at_end=True,
    metric_for_best_model="f1_macro",
    greater_is_better=True,

    num_train_epochs=5,

    per_device_train_batch_size=4,
    per_device_eval_batch_size=8,

    learning_rate=2e-4,
    weight_decay=0.01,
    warmup_ratio=0.1,

    fp16=True,
    gradient_checkpointing=True,
    report_to="none",
)

In [None]:
def compute_metrics(p):
    preds = np.argmax(p.predictions, axis=1)
    labels_np = p.label_ids
    return {
        "accuracy": accuracy_score(labels_np, preds),
        "f1_macro": f1_score(labels_np, preds, average="macro"),
    }

trainer = Trainer(
    model=model,
    args=args,
    train_dataset=encoded_ds["train"],
    eval_dataset=encoded_ds["validation"],
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
)

trainer.train()

# Eval

In [None]:
val_metrics = trainer.evaluate(encoded_ds["validation"])
for k, v in val_metrics.items():
    try:
        print(f"{k}: {v:.4f}")
    except TypeError:
        print(k, ":", v)

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, classification_report
from collections import Counter
import textwrap

def plot_confusion_matrix(trainer, eval_dataset, id2label):
    preds_output = trainer.predict(eval_dataset)
    preds = np.argmax(preds_output.predictions, axis=1)
    labels_np = preds_output.label_ids

    cm = confusion_matrix(labels_np, preds)
    label_names = [id2label[i] for i in range(len(id2label))]

    plt.figure(figsize=(6, 5))
    plt.imshow(cm, interpolation="nearest") 
    plt.title("Confusion Matrix")
    plt.colorbar()

    tick_marks = np.arange(len(label_names))
    plt.xticks(tick_marks, label_names, rotation=45, ha="right")
    plt.yticks(tick_marks, label_names)

    # Tulis angka di tiap kotak
    thresh = cm.max() / 2.0 if cm.max() > 0 else 0.5
    for i in range(cm.shape[0]):
        for j in range(cm.shape[1]):
            plt.text(
                j, i, format(cm[i, j], "d"),
                ha="center", va="center",
                color="white" if cm[i, j] > thresh else "black",
            )

    plt.xlabel("Predicted label")
    plt.ylabel("True label")
    plt.tight_layout()
    plt.show()

def show_classification_report(trainer, eval_dataset, id2label):
    preds_output = trainer.predict(eval_dataset)

    preds = np.argmax(preds_output.predictions, axis=1)
    labels_np = preds_output.label_ids

    target_names = [id2label[i] for i in range(len(id2label))]

    print("\n=== Classification Report ===")
    print(classification_report(labels_np, preds, target_names=target_names, digits=4))

from collections import Counter

def show_label_distributions(trainer, eval_dataset, id2label):
    preds_output = trainer.predict(eval_dataset)

    preds = np.argmax(preds_output.predictions, axis=1)
    labels_np = preds_output.label_ids

    true_counts = Counter(labels_np)
    pred_counts = Counter(preds)

    print("\n=== Label Distribution (True vs Pred) ===")
    print("index -> label_name | true_count | pred_count")
    for i in range(len(id2label)):
        label_name = id2label[i]
        t = true_counts.get(i, 0)
        p = pred_counts.get(i, 0)
        print(f"{i} -> {label_name:>10} | true={t:4d} | pred={p:4d}")

def show_error_examples(trainer, eval_dataset, val_df, id2label, max_examples=10):
    preds_output = trainer.predict(eval_dataset)

    preds = np.argmax(preds_output.predictions, axis=1)
    labels_np = preds_output.label_ids

    wrong_idx = np.where(preds != labels_np)[0]

    print(f"\n=== Error Analysis ===")
    print(f"Total validation samples : {len(labels_np)}")
    print(f"Total misclassified      : {len(wrong_idx)} "
          f"({len(wrong_idx)/len(labels_np):.2%})")
    print(f"Showing up to {max_examples} examples:\n")

    for idx in wrong_idx[:max_examples]:
        row = val_df.iloc[idx]
        true_label = id2label[int(labels_np[idx])]
        pred_label = id2label[int(preds[idx])]
        print("-" * 80)
        print(f"ID      : {row['id']}")
        print(f"ASPECT  : {row['aspect']}")
        print(f"TRUE    : {true_label}")
        print(f"PRED    : {pred_label}")
        print("TEXT    :")
        print(row["text"])
        print()


In [None]:
show_confusion_matrix(trainer, encoded_ds["validation"], id2label)

In [None]:
show_classification_report(trainer, encoded_ds["validation"], id2label)

In [None]:
show_label_distributions(trainer, encoded_ds["validation"], id2label)

In [None]:
show_error_examples(trainer, encoded_ds["validation"], val, id2label, max_examples=10)

In [None]:
save_path = "/content/llama3.2_absa_saved"
trainer.model.save_pretrained(save_path)
tokenizer.save_pretrained(save_path)
print("Model disimpan di:", save_path)

zip_path = "/content/llama3.2_absa_saved.zip"
shutil.make_archive("/content/llama3.2_absa_saved", "zip", save_path)
files.download(zip_path)