<a href="https://colab.research.google.com/github/samuelmnlu/assignment-ai/blob/main/indonesian_sentiment_bert_fixed.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Indonesian Sentiment Classification with BERT
This notebook implements a sentiment classifier for Indonesian text using the `indobert-base-p1` model.

In [None]:
!pip install transformers datasets scikit-learn

In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments
from datasets import load_dataset, Dataset
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, confusion_matrix
import numpy as np

In [None]:
data = {
    "text": ["Produk ini sangat bagus", "Layanan sangat buruk", "Sangat puas dengan pembelian ini", "Saya kecewa sekali", "Barang sesuai deskripsi", "Tidak akan beli lagi"],
    "label": [1, 0, 1, 0, 1, 0]  # 1 = positif, 0 = negatif
}

dataset = Dataset.from_dict(data)
dataset = dataset.train_test_split(test_size=0.2)

In [None]:
model_checkpoint = "indobenchmark/indobert-base-p1"
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)

def tokenize_function(examples):
    return tokenizer(examples["text"], padding="max_length", truncation=True)

tokenized_datasets = dataset.map(tokenize_function, batched=True)

In [None]:
model = AutoModelForSequenceClassification.from_pretrained(model_checkpoint, num_labels=2)

In [None]:
def compute_metrics(pred):
    labels = pred.label_ids
    preds = np.argmax(pred.predictions, axis=1)
    precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='binary')
    acc = accuracy_score(labels, preds)
    return {
        'accuracy': acc,
        'precision': precision,
        'recall': recall,
        'f1': f1,
        'confusion_matrix': confusion_matrix(labels, preds).tolist()
    }

In [None]:
training_args = TrainingArguments(
    output_dir="./results",
    do_train=True,
    do_eval=True,
    learning_rate=2e-5,
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    num_train_epochs=5,
    weight_decay=0.01,
    logging_dir='./logs',
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["test"],
    compute_metrics=compute_metrics,
)

trainer.train()

In [None]:
# 6. Train the model
trainer.train()

In [None]:
# 7. Evaluation
results = trainer.evaluate()
print("\nEvaluation Results:")
print(results)

In [None]:
# Confusion Matrix
predictions = trainer.predict(test_data)
preds = np.argmax(predictions.predictions, axis=1)
labels = predictions.label_ids
cm = confusion_matrix(labels, preds)
print("\nConfusion Matrix:\n", cm)

In [None]:
# 8. Error Analysis
errors = []
for i in range(len(preds)):
    if preds[i] != labels[i]:
        errors.append((test_data[i]['text'], labels[i], preds[i]))

error_df = pd.DataFrame(errors, columns=["Text", "True Label", "Predicted Label"])
print("\nSample Errors:\n", error_df.head())