<a href="https://colab.research.google.com/github/nuyyis/SENTIMEN-ANALISIS-INDONLU/blob/main/bs_32_indobert_5e_6.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!git clone https://github.com/IndoNLP/indonlu.git
%cd indonlu

Cloning into 'indonlu'...
remote: Enumerating objects: 509, done.[K
remote: Counting objects: 100% (193/193), done.[K
remote: Compressing objects: 100% (83/83), done.[K
remote: Total 509 (delta 119), reused 139 (delta 110), pack-reused 316 (from 1)[K
Receiving objects: 100% (509/509), 9.46 MiB | 19.53 MiB/s, done.
Resolving deltas: 100% (239/239), done.
/content/indonlu


In [None]:
import os, sys
sys.path.append('/kaggle/working/indonlu')

import random
import numpy as np
import pandas as pd
import torch
from torch import optim
import torch.nn.functional as F
from tqdm import tqdm

# --- PERUBAHAN DI SINI ---
from transformers import RobertaTokenizer, RobertaConfig, RobertaForSequenceClassification, BertTokenizer, BertConfig, BertForSequenceClassification
# -----------------------
from nltk.tokenize import TweetTokenizer

from utils.forward_fn import forward_sequence_classification
from utils.metrics import document_sentiment_metrics_fn
# --- DAN DI SINI (dari permintaan sebelumnya) ---
from utils.data_utils import DocumentSentimentDataset, DocumentSentimentDataLoader
from sklearn.metrics import classification_report
# ---------------------------------------------

In [None]:
LEARNING_RATE= 5e-6
BATCH_SIZE=32
EPOCHS= 20

In [None]:
###
# common functions
###
def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)

def count_param(module, trainable=False):
    if trainable:
        return sum(p.numel() for p in module.parameters() if p.requires_grad)
    else:
        return sum(p.numel() for p in module.parameters())

def get_lr(optimizer):
    for param_group in optimizer.param_groups:
        return param_group['lr']

def metrics_to_string(metric_dict):
    string_list = []
    for key, value in metric_dict.items():
        string_list.append('{}:{:.2f}'.format(key, value))
    return ' '.join(string_list)

In [None]:
# Set random seed
set_seed(26092020)

In [None]:
# --- PERUBAHAN DI SINI ---
# ID Model untuk IndoBERT Base Phase 1
MODEL_NAME = 'indobenchmark/indobert-base-p1'
# -----------------------

# PASTIKAN Anda mengimpor kelas yang benar (BERT, bukan RoBERTa)
# import torch
# from transformers import BertTokenizer, BertConfig, BertForSequenceClassification
# (Asumsikan DocumentSentimentDataset.NUM_LABELS sudah ada)

# Load Tokenizer and Config
tokenizer = BertTokenizer.from_pretrained(MODEL_NAME)
config = BertConfig.from_pretrained(MODEL_NAME)
# Gunakan label dari dataset Sentimen
config.num_labels = DocumentSentimentDataset.NUM_LABELS

# Instantiate model
model = BertForSequenceClassification.from_pretrained(MODEL_NAME, config=config)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/2.00 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json: 0.00B [00:00, ?B/s]

pytorch_model.bin:   0%|          | 0.00/498M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at indobenchmark/indobert-base-p1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
model

BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(50000, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e

In [None]:
count_param(model)

124443651

In [None]:
# --- PERUBAHAN DI SINI (ke SMSA) ---
train_dataset_path = '/content/indonlu/dataset/smsa_doc-sentiment-prosa/train_preprocess.tsv'
valid_dataset_path = '/content/indonlu/dataset/smsa_doc-sentiment-prosa/valid_preprocess.tsv'
# Gunakan file test yang berlabel agar bisa dihitung akurasinya
test_dataset_path = '/content/indonlu/dataset/smsa_doc-sentiment-prosa/test_preprocess.tsv'
# ----------------------------------

In [None]:
# --- PERUBAHAN DI SINI (ke SMSA) ---
train_dataset = DocumentSentimentDataset(train_dataset_path, tokenizer, lowercase=True)
valid_dataset = DocumentSentimentDataset(valid_dataset_path, tokenizer, lowercase=True)
test_dataset = DocumentSentimentDataset(test_dataset_path, tokenizer, lowercase=True)

train_loader = DocumentSentimentDataLoader(dataset=train_dataset, max_seq_len=512, batch_size=BATCH_SIZE, num_workers=4, shuffle=True)
valid_loader = DocumentSentimentDataLoader(dataset=valid_dataset, max_seq_len=512, batch_size=BATCH_SIZE, num_workers=4, shuffle=False)
test_loader = DocumentSentimentDataLoader(dataset=test_dataset, max_seq_len=512, batch_size=BATCH_SIZE, num_workers=4, shuffle=False)
# ----------------------------------



In [None]:
# --- PERUBAHAN DI SINI (ke SMSA) ---
w2i, i2w = DocumentSentimentDataset.LABEL2INDEX, DocumentSentimentDataset.INDEX2LABEL
# ----------------------------------
print(w2i)
print(i2w)

{'positive': 0, 'neutral': 1, 'negative': 2}
{0: 'positive', 1: 'neutral', 2: 'negative'}


In [None]:
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)
model = model.cuda()

model.safetensors:   0%|          | 0.00/498M [00:00<?, ?B/s]

In [None]:
NUM_EPOCHS = EPOCHS  # <-- Ganti angka ini sesuai kebutuhan Anda
# ------------------------------------------

scheduler=None


# (Opsional) Untuk menyimpan model terbaik
best_validation_f1 = -1.0

# --- TAMBAHAN: KONFIGURASI EARLY STOPPING ---
PATIENCE = 3  # Berapa epoch menunggu jika F1 validasi tidak membaik
patience_counter = 0 # Counter untuk menghitung kesabaran
# ------------------------------------------


# --- TAMBAHAN: LOOPING UNTUK SETIAP EPOCH ---
for epoch in range(NUM_EPOCHS):
    print(f"\n--- Epoch {epoch + 1}/{NUM_EPOCHS} ---")

    # ==================================
    # === 1. FASE TRAINING ===
    # ==================================
    model.train()
    torch.set_grad_enabled(True)

    total_train_loss = 0
    # Loop data training
    train_pbar = tqdm(train_loader, leave=True, total=len(train_loader))
    for i, batch_data in enumerate(train_pbar):
        # Kosongkan gradient
        optimizer.zero_grad()

        # Forward pass (training)
        # Kita hanya butuh loss untuk backward pass
        loss, _, _ = forward_sequence_classification(model, batch_data[:-1], i2w=i2w, device='cuda')

        # Backward pass (hitung gradient)
        loss.backward()

        # Update weights
        optimizer.step()
        if scheduler is not None:
            scheduler.step() # Update learning rate scheduler

        total_train_loss += loss.item()
        train_pbar.set_description(f"EPOCH {epoch+1} TRAIN LOSS: {total_train_loss/(i+1):.4f}")

    print(f"\nRata-rata Train Loss Epoch {epoch+1}: {total_train_loss/len(train_loader):.4f}")

    # ==================================
    # === 2. FASE EVALUASI (VALIDASI) ===
    # ==================================
    # Mirip dengan kode Anda, tapi di validation_loader
    print(f"\n--- Evaluasi Validasi Epoch {epoch + 1} ---")
    model.eval()
    torch.set_grad_enabled(False)

    total_val_loss = 0
    list_hyp_val, list_label_val = [], []

    # Ganti test_loader dengan validation_loader
    val_pbar = tqdm(valid_loader, leave=True, total=len(valid_loader))
    for i, batch_data in enumerate(val_pbar):
        loss, batch_hyp, batch_label = forward_sequence_classification(model, batch_data[:-1], i2w=i2w, device='cuda')

        total_val_loss += loss.item()
        list_hyp_val += batch_hyp
        list_label_val += batch_label
        metrics_val = document_sentiment_metrics_fn(list_hyp_val, list_label_val)

        val_pbar.set_description("VAL LOSS:{:.4f} {}".format(total_val_loss/(i+1), metrics_to_string(metrics_val)))

    # Cetak metrik final untuk validation set
    metrics_val = document_sentiment_metrics_fn(list_hyp_val, list_label_val)
    # BENAR: Gunakan f-string dan letakkan variabel di dalam {}
    print(f"\n(FINAL EPOCH {epoch+1}) VAL LOSS:{total_val_loss/(len(valid_loader)):.4f} {metrics_to_string(metrics_val)}")

    # --- PERBAIKAN: LOGIKA EARLY STOPPING & SIMPAN MODEL ---
    current_f1 = metrics_val.get('F1', 0) # Ganti 'F1' jika nama metriknya beda

    if current_f1 > best_validation_f1:
        # Jika F1 membaik: simpan model, reset counter
        best_validation_f1 = current_f1
        torch.save(model.state_dict(), 'best_model.pth')
        print(f"Model terbaik disimpan dengan F1 Validasi: {current_f1:.4f}")
        patience_counter = 0 # Reset counter
    else:
        # Jika F1 tidak membaik: tambah counter
        patience_counter += 1
        print(f"Tidak ada peningkatan F1. Patience counter: {patience_counter}/{PATIENCE}")

    # Cek apakah harus berhenti
    if patience_counter >= PATIENCE:
        print(f"\n--- EARLY STOPPING ---")
        print(f"F1 Validasi tidak membaik selama {PATIENCE} epoch. Pelatihan dihentikan di Epoch {epoch + 1}.")
        break # Keluar dari loop training (for epoch...)
    # -----------------------------------------------------

# --- AKHIR DARI SEMUA EPOCH TRAINING ---
print("\n--- Pelatihan Selesai ---")


# ==================================
# === 3. EVALUASI AKHIR DI TEST SET ===
# ==================================
# Di sinilah Anda meletakkan kode asli Anda.
# Ini hanya dijalankan SEKALI setelah semua epoch selesai.

print("\n--- Evaluasi Final di Test Set ---")

# --- PERBAIKAN: SELALU MUAT MODEL TERBAIK ---
# Ini SANGAT PENTING. Kita ingin menguji model dari epoch terbaik,
# bukan epoch terakhir (yang mungkin sudah overfitting).
print("Memuat model terbaik (best_model.pth) untuk evaluasi test set...")
model.load_state_dict(torch.load('best_model.pth'))
# -----------------------------------------------------

model.eval()
torch.set_grad_enabled(False)

total_loss, total_correct, total_labels = 0, 0, 0
list_hyp, list_label = [], []

# pbar menggunakan test_loader (seperti kode asli Anda)
pbar = tqdm(test_loader, leave=True, total=len(test_loader))
for i, batch_data in enumerate(pbar):
    loss, batch_hyp, batch_label = forward_sequence_classification(model, batch_data[:-1], i2w=i2w, device='cuda')

    test_loss = loss.item()
    total_loss = total_loss + test_loss

    list_hyp += batch_hyp
    list_label += batch_label
    metrics = document_sentiment_metrics_fn(list_hyp, list_label)

    pbar.set_description("TEST LOSS:{:.4f} {}".format(total_loss/(i+1), metrics_to_string(metrics)))

# Cetak metrik final untuk set tes
metrics = document_sentiment_metrics_fn(list_hyp, list_label)
print("\n(FINAL) TEST LOSS:{:.4f} {}".format(
    total_loss/(i+1), metrics_to_string(metrics)))

# --- Laporan Klasifikasi Rinci ---
print("\n--- Laporan Klasifikasi Rinci (Test Set) ---")
report = classification_report(list_label, list_hyp)
print(report)

# Save prediction
df = pd.DataFrame({'label':list_hyp}).reset_index()
df.to_csv('pred_smsa_roberta.txt', index=False)

print(f"\nPrediksi disimpan di 'pred_smsa_roberta.txt'")
print(df.head())


--- Epoch 1/20 ---



  0%|          | 0/344 [00:00<?, ?it/s][A
EPOCH 1 TRAIN LOSS: 1.0897:   0%|          | 0/344 [00:02<?, ?it/s][A
EPOCH 1 TRAIN LOSS: 1.0897:   0%|          | 1/344 [00:02<16:33,  2.90s/it][A
EPOCH 1 TRAIN LOSS: 1.0514:   0%|          | 1/344 [00:03<16:33,  2.90s/it][A
EPOCH 1 TRAIN LOSS: 1.0514:   1%|          | 2/344 [00:03<08:22,  1.47s/it][A
EPOCH 1 TRAIN LOSS: 1.0390:   1%|          | 2/344 [00:03<08:22,  1.47s/it][A
EPOCH 1 TRAIN LOSS: 1.0390:   1%|          | 3/344 [00:03<05:46,  1.02s/it][A
EPOCH 1 TRAIN LOSS: 1.0306:   1%|          | 3/344 [00:04<05:46,  1.02s/it][A
EPOCH 1 TRAIN LOSS: 1.0306:   1%|          | 4/344 [00:04<04:30,  1.26it/s][A
EPOCH 1 TRAIN LOSS: 1.0041:   1%|          | 4/344 [00:04<04:30,  1.26it/s][A
EPOCH 1 TRAIN LOSS: 1.0041:   1%|▏         | 5/344 [00:04<03:39,  1.55it/s][A
EPOCH 1 TRAIN LOSS: 0.9883:   1%|▏         | 5/344 [00:05<03:39,  1.55it/s][A
EPOCH 1 TRAIN LOSS: 0.9883:   2%|▏         | 6/344 [00:05<03:13,  1.75it/s][A
EPOCH 1 TRAIN LO


Rata-rata Train Loss Epoch 1: 0.2847

--- Evaluasi Validasi Epoch 1 ---


VAL LOSS:0.1738 ACC:0.93 F1:0.91 REC:0.90 PRE:0.92: 100%|██████████| 40/40 [00:07<00:00,  5.71it/s]



(FINAL EPOCH 1) VAL LOSS:0.1738 ACC:0.93 F1:0.91 REC:0.90 PRE:0.92
Model terbaik disimpan dengan F1 Validasi: 0.9079

--- Epoch 2/20 ---


EPOCH 2 TRAIN LOSS: 0.1363: 100%|██████████| 344/344 [02:41<00:00,  2.13it/s]



Rata-rata Train Loss Epoch 2: 0.1363

--- Evaluasi Validasi Epoch 2 ---


VAL LOSS:0.1822 ACC:0.94 F1:0.90 REC:0.88 PRE:0.94: 100%|██████████| 40/40 [00:06<00:00,  5.84it/s]



(FINAL EPOCH 2) VAL LOSS:0.1822 ACC:0.94 F1:0.90 REC:0.88 PRE:0.94
Tidak ada peningkatan F1. Patience counter: 1/3

--- Epoch 3/20 ---


EPOCH 3 TRAIN LOSS: 0.0957: 100%|██████████| 344/344 [02:40<00:00,  2.14it/s]



Rata-rata Train Loss Epoch 3: 0.0957

--- Evaluasi Validasi Epoch 3 ---


VAL LOSS:0.2157 ACC:0.93 F1:0.91 REC:0.90 PRE:0.91: 100%|██████████| 40/40 [00:06<00:00,  5.97it/s]



(FINAL EPOCH 3) VAL LOSS:0.2157 ACC:0.93 F1:0.91 REC:0.90 PRE:0.91
Tidak ada peningkatan F1. Patience counter: 2/3

--- Epoch 4/20 ---


EPOCH 4 TRAIN LOSS: 0.0634: 100%|██████████| 344/344 [02:40<00:00,  2.14it/s]



Rata-rata Train Loss Epoch 4: 0.0634

--- Evaluasi Validasi Epoch 4 ---


VAL LOSS:0.2055 ACC:0.93 F1:0.91 REC:0.91 PRE:0.92: 100%|██████████| 40/40 [00:06<00:00,  5.94it/s]



(FINAL EPOCH 4) VAL LOSS:0.2055 ACC:0.93 F1:0.91 REC:0.91 PRE:0.92
Model terbaik disimpan dengan F1 Validasi: 0.9121

--- Epoch 5/20 ---


EPOCH 5 TRAIN LOSS: 0.0406: 100%|██████████| 344/344 [02:42<00:00,  2.12it/s]



Rata-rata Train Loss Epoch 5: 0.0406

--- Evaluasi Validasi Epoch 5 ---


VAL LOSS:0.2051 ACC:0.94 F1:0.92 REC:0.92 PRE:0.92: 100%|██████████| 40/40 [00:06<00:00,  5.88it/s]



(FINAL EPOCH 5) VAL LOSS:0.2051 ACC:0.94 F1:0.92 REC:0.92 PRE:0.92
Model terbaik disimpan dengan F1 Validasi: 0.9168

--- Epoch 6/20 ---


EPOCH 6 TRAIN LOSS: 0.0259: 100%|██████████| 344/344 [02:41<00:00,  2.13it/s]



Rata-rata Train Loss Epoch 6: 0.0259

--- Evaluasi Validasi Epoch 6 ---


VAL LOSS:0.2363 ACC:0.94 F1:0.91 REC:0.90 PRE:0.92: 100%|██████████| 40/40 [00:06<00:00,  5.86it/s]



(FINAL EPOCH 6) VAL LOSS:0.2363 ACC:0.94 F1:0.91 REC:0.90 PRE:0.92
Tidak ada peningkatan F1. Patience counter: 1/3

--- Epoch 7/20 ---


EPOCH 7 TRAIN LOSS: 0.0205: 100%|██████████| 344/344 [02:40<00:00,  2.14it/s]



Rata-rata Train Loss Epoch 7: 0.0205

--- Evaluasi Validasi Epoch 7 ---


VAL LOSS:0.2461 ACC:0.94 F1:0.92 REC:0.91 PRE:0.93: 100%|██████████| 40/40 [00:06<00:00,  5.83it/s]



(FINAL EPOCH 7) VAL LOSS:0.2461 ACC:0.94 F1:0.92 REC:0.91 PRE:0.93
Tidak ada peningkatan F1. Patience counter: 2/3

--- Epoch 8/20 ---


EPOCH 8 TRAIN LOSS: 0.0148: 100%|██████████| 344/344 [02:40<00:00,  2.14it/s]



Rata-rata Train Loss Epoch 8: 0.0148

--- Evaluasi Validasi Epoch 8 ---


VAL LOSS:0.2568 ACC:0.94 F1:0.92 REC:0.91 PRE:0.92: 100%|██████████| 40/40 [00:06<00:00,  5.96it/s]



(FINAL EPOCH 8) VAL LOSS:0.2568 ACC:0.94 F1:0.92 REC:0.91 PRE:0.92
Tidak ada peningkatan F1. Patience counter: 3/3

--- EARLY STOPPING ---
F1 Validasi tidak membaik selama 3 epoch. Pelatihan dihentikan di Epoch 8.

--- Pelatihan Selesai ---

--- Evaluasi Final di Test Set ---
Memuat model terbaik (best_model.pth) untuk evaluasi test set...


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
TEST LOSS:0.3052 ACC:0.91 F1:0.89 REC:0.87 PRE:0.90: 100%|██████████| 16/16 [00:02<00:00,  7.17it/s]



(FINAL) TEST LOSS:0.3052 ACC:0.91 F1:0.89 REC:0.87 PRE:0.90

--- Laporan Klasifikasi Rinci (Test Set) ---
              precision    recall  f1-score   support

    negative       0.93      0.99      0.96       204
     neutral       0.87      0.70      0.78        88
    positive       0.91      0.93      0.92       208

    accuracy                           0.91       500
   macro avg       0.90      0.87      0.89       500
weighted avg       0.91      0.91      0.91       500


Prediksi disimpan di 'pred_smsa_roberta.txt'
   index     label
0      0  negative
1      1  negative
2      2  negative
3      3  negative
4      4  negative


In [None]:
# Evaluate on test
model.eval()
torch.set_grad_enabled(False)

total_loss, total_correct, total_labels = 0, 0, 0
list_hyp, list_label = [], []

pbar = tqdm(test_loader, leave=True, total=len(test_loader))
for i, batch_data in enumerate(pbar):
    _, batch_hyp, _ = forward_sequence_classification(model, batch_data[:-1], i2w=i2w, device='cuda')
    list_hyp += batch_hyp

# Save prediction
df = pd.DataFrame({'label':list_hyp}).reset_index()
df.to_csv('pred.txt', index=False)

print(df)

100%|██████████| 16/16 [00:01<00:00,  8.51it/s]

     index     label
0        0  negative
1        1  negative
2        2  negative
3        3  negative
4        4  negative
..     ...       ...
495    495   neutral
496    496   neutral
497    497   neutral
498    498  positive
499    499  positive

[500 rows x 2 columns]





In [None]:
# === Prediksi singkat ===
input_text = "aku cantik sekali"
model.eval()
with torch.no_grad():
    toks = tokenizer(input_text, return_tensors="pt", truncation=True, max_length=512)
    toks = {k: v.cuda() for k, v in toks.items()}
    probs = torch.softmax(model(**toks).logits, dim=-1)[0]
pred = i2w[int(probs.argmax())]
print(f"{input_text} -> {pred} | probs={probs.cpu().numpy()}")


aku cantik sekali -> positive | probs=[9.9896753e-01 5.0980266e-04 5.2264635e-04]
