In [1]:
!pip uninstall torch-xla transformers -y
!pip install transformers

[0mFound existing installation: transformers 4.57.1
Uninstalling transformers-4.57.1:
  Successfully uninstalled transformers-4.57.1
Collecting transformers
  Downloading transformers-4.57.1-py3-none-any.whl.metadata (43 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.0/44.0 kB[0m [31m1.4 MB/s[0m eta [36m0:00:00[0m
Downloading transformers-4.57.1-py3-none-any.whl (12.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.0/12.0 MB[0m [31m55.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: transformers
Successfully installed transformers-4.57.1


In [2]:
# ======================================================================
# Bagian E - Baseline Zero-Shot (Multi-Model) - VERSI GPU
# ======================================================================

# Import 'pipeline' dari transformers
from transformers import pipeline
import pandas as pd
import torch
from torch.utils.data import random_split
from tqdm.notebook import tqdm
import warnings

# --- 1. Ubah fungsi ini untuk menerima 'model_name' ---
def run_zero_shot_baseline(model_name):
    """
    Fungsi ini menjalankan evaluasi zero-shot dengan model yang ditentukan.
    Args:
        model_name (str): Nama model Hugging Face (mis. "facebook/bart-large-mnli")
    """

    warnings.filterwarnings("ignore")

    # --- GANTI DI SINI ---
    print("\n" + "="*50)
    # --- Cetak nama model yang sedang diuji ---
    print(f"MULAI EKSPERIMEN: E. Baseline Zero-Shot ({model_name})")
    print("="*50)
    # ---------------------

    # --- 1. Load & Prepare Data (Reproducible) ---
    try:
        df = pd.read_csv('7-garudaindonesia_news_cleaned_simple.csv')
    except FileNotFoundError:
        # --- GANTI DI SINI ---
        print("ERROR: File CSV '7-garudaindonesia_news_cleaned_simple.csv' tidak ditemukan.")
        # ---------------------
        return

    df = df.dropna(subset=['konten_normalized', 'sentiment'])
    label_map = {'Positive': 1, 'Negative': 0}
    df['label'] = df['sentiment'].map(label_map)
    df = df.dropna(subset=['label'])
    df['label'] = df['label'].astype(int)

    data_tuples = list(zip(df['konten_normalized'], df['label']))

    # SET SEED agar train/val split-nya sama persis
    torch.manual_seed(42)

    train_size = int(0.8 * len(data_tuples))
    val_size = len(data_tuples) - train_size

    train_list, val_list = random_split(data_tuples, [train_size, val_size])

    val_list = list(val_list)
    val_texts = [str(item[0]) for item in val_list]
    val_labels = [item[1] for item in val_list]

    # --- GANTI DI SINI ---
    print(f"Data validasi (untuk zero-shot) berhasil dimuat: {len(val_texts)} item")
    # ---------------------

    # --- 2. Initialize Pipeline ---
    device = "cuda" if torch.cuda.is_available() else "cpu"

    # --- Gunakan 'model_name' dari argumen fungsi ---
    # --- GANTI DI SINI ---
    print(f"Memuat pipeline zero-shot: {model_name} ke device {device}...")
    # ---------------------

    classifier = pipeline(
        "zero-shot-classification",
        model=model_name,
        device=0 if "cuda" in device else -1 # Gunakan device index 0 untuk GPU
    )
    # --- GANTI DI SINI ---
    print("Pipeline berhasil dimuat.")
    # ---------------------

    # --- 3. Run Evaluation (secara batch) ---
    candidate_labels = ["Positive", "Negative"]
    correct_predictions = 0
    total_predictions = len(val_texts)
    batch_size = 16 # Sesuaikan jika perlu (misal jika XLM-R OOM)

    # --- GANTI DI SINI ---
    print(f"Mulai evaluasi zero-shot dengan batch_size={batch_size}...")
    # ---------------------

    results = []

    # --- GANTI DI SINI: Hapus cek 'torch_xla' ---
    pbar = tqdm(total=total_predictions, desc=f"Evaluasi {model_name}")
    # -------------------------------------------

    for i in range(0, total_predictions, batch_size):
        batch_texts = val_texts[i:i+batch_size]

        # Atur hypothesis_template agar lebih pas untuk model XNLI (opsional tapi bagus)
        template = "This text is {}."
        if "mnli" in model_name:
             template = "Sentimen teks ini adalah {}." # Template lama untuk BART

        batch_results = classifier(
            batch_texts,
            candidate_labels,
            hypothesis_template=template,
            multi_label=False
        )
        results.extend(batch_results)

        if pbar:
            pbar.update(len(batch_texts))

    if pbar:
        pbar.close()

    # --- 4. Hitung Akurasi ---
    for result, true_label_int in zip(results, val_labels):
        predicted_label_str = result['labels'][0]

        # Safety check jika labelnya beda (misal "positive" vs "Positive")
        predicted_label_str = predicted_label_str.capitalize() # Pastikan 'Positive' atau 'Negative'

        if predicted_label_str not in label_map:
            continue

        predicted_label_int = label_map[predicted_label_str]

        if predicted_label_int == true_label_int:
            correct_predictions += 1

    accuracy = (correct_predictions / total_predictions) if total_predictions > 0 else 0.0

    # --- GANTI DI SINI ---
    print("\n" + "-"*50)
    print(f"HASIL EKSPERIMEN E: ZERO-SHOT ({model_name})")
    print(f"Model: {model_name}")
    print(f"Total Sampel Validasi: {total_predictions}")
    print(f"Prediksi Benar: {correct_predictions}")
    print(f"Akurasi Zero-Shot: {accuracy * 100:.2f}%")
    print("-"*50)
    # ---------------------

# --- Jalankan Ekstrakurikuler Bagian E (Multi-Model) ---
if __name__ == '__main__':

    # --- 2. Panggil fungsi untuk tiap model ---

    # Model 1 (BART MNLI)
    run_zero_shot_baseline(model_name="facebook/bart-large-mnli")

    # Model 2 (XLM-R XNLI)
    run_zero_shot_baseline(model_name="joeddav/xlm-roberta-large-xnli")


MULAI EKSPERIMEN: E. Baseline Zero-Shot (facebook/bart-large-mnli)
Data validasi (untuk zero-shot) berhasil dimuat: 77 item
Memuat pipeline zero-shot: facebook/bart-large-mnli ke device cuda...


config.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/1.63G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

Device set to use cuda:0


Pipeline berhasil dimuat.
Mulai evaluasi zero-shot dengan batch_size=16...


Evaluasi facebook/bart-large-mnli:   0%|          | 0/77 [00:00<?, ?it/s]


--------------------------------------------------
HASIL EKSPERIMEN E: ZERO-SHOT (facebook/bart-large-mnli)
Model: facebook/bart-large-mnli
Total Sampel Validasi: 77
Prediksi Benar: 42
Akurasi Zero-Shot: 54.55%
--------------------------------------------------

MULAI EKSPERIMEN: E. Baseline Zero-Shot (joeddav/xlm-roberta-large-xnli)
Data validasi (untuk zero-shot) berhasil dimuat: 77 item
Memuat pipeline zero-shot: joeddav/xlm-roberta-large-xnli ke device cuda...


config.json:   0%|          | 0.00/734 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.24G [00:00<?, ?B/s]

Some weights of the model checkpoint at joeddav/xlm-roberta-large-xnli were not used when initializing XLMRobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

sentencepiece.bpe.model:   0%|          | 0.00/5.07M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/150 [00:00<?, ?B/s]

Device set to use cuda:0


Pipeline berhasil dimuat.
Mulai evaluasi zero-shot dengan batch_size=16...


Evaluasi joeddav/xlm-roberta-large-xnli:   0%|          | 0/77 [00:00<?, ?it/s]


--------------------------------------------------
HASIL EKSPERIMEN E: ZERO-SHOT (joeddav/xlm-roberta-large-xnli)
Model: joeddav/xlm-roberta-large-xnli
Total Sampel Validasi: 77
Prediksi Benar: 45
Akurasi Zero-Shot: 58.44%
--------------------------------------------------
