In [None]:
# Cell 1

# Step 1: Clean up any potentially conflicting versions
print("--> Uninstalling existing versions...")
!pip uninstall -y transformers accelerate datasets torch torchvision

# Step 2: Reinstall a stable, compatible set of the core libraries
print("\n--> Reinstalling core libraries...")
!pip install transformers accelerate datasets torch torchvision

# Step 3: Install the remaining libraries
print("\n--> Installing other required libraries...")
!pip install scikit-learn pandas imbalanced-learn -q

print("\n✅ All libraries have been reinstalled.")

--> Uninstalling existing versions...
Found existing installation: transformers 4.55.1
Uninstalling transformers-4.55.1:
  Successfully uninstalled transformers-4.55.1
Found existing installation: accelerate 1.10.0
Uninstalling accelerate-1.10.0:
  Successfully uninstalled accelerate-1.10.0
Found existing installation: datasets 4.0.0
Uninstalling datasets-4.0.0:
  Successfully uninstalled datasets-4.0.0
Found existing installation: torch 2.6.0+cu124
Uninstalling torch-2.6.0+cu124:
  Successfully uninstalled torch-2.6.0+cu124
Found existing installation: torchvision 0.21.0+cu124
Uninstalling torchvision-0.21.0+cu124:
  Successfully uninstalled torchvision-0.21.0+cu124

--> Reinstalling core libraries...
Collecting transformers
  Downloading transformers-4.55.2-py3-none-any.whl.metadata (41 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.0/42.0 kB[0m [31m3.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting accelerate
  Downloading accelerate-1.10.0-py3-none-any.whl

In [None]:
# Check library versions for the paper's reproducibility section
!pip freeze | grep -E "transformers|torch|scikit-learn|imbalanced-learn|datasets"

datasets==4.0.0
imbalanced-learn==0.13.0
scikit-learn==1.6.1
sentence-transformers==5.1.0
tensorflow-datasets==4.9.9
torch==2.8.0
torchao==0.10.0
torchaudio @ https://download.pytorch.org/whl/cu124/torchaudio-2.6.0%2Bcu124-cp311-cp311-linux_x86_64.whl
torchdata==0.11.0
torchsummary==1.5.1
torchtune==0.6.1
torchvision==0.23.0
transformers==4.55.2
vega-datasets==0.9.0


In [None]:
from google.colab import drive
import pandas as pd
import numpy as np
import pickle
import os
import json
import torch
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import classification_report
from sklearn.utils.class_weight import compute_class_weight
from imblearn.over_sampling import RandomOverSampler
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments
from datasets import Dataset

# Mount Google Drive
drive.mount('/content/drive')

# --- KEY SETTINGS ---
SEED = 42
GDRIVE_PATH = '/content/drive/MyDrive/eecsi_revise/'
MODEL_NAME = "indolem/indobertweet-base-uncased"

# --- NEW: Define a dedicated path for ASC results ---
ASC_RESULTS_PATH = os.path.join(GDRIVE_PATH, 'indobertweet_asc_results/')
# Create the directory if it doesn't exist
os.makedirs(ASC_RESULTS_PATH, exist_ok=True)

# Set seed for reproducibility across all libraries
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)

print(f"✅ Setup complete. Working inside folder: {GDRIVE_PATH}")
print(f"✅ ASC checkpoints and results will be saved to: {ASC_RESULTS_PATH}")

Mounted at /content/drive
✅ Setup complete. Working inside folder: /content/drive/MyDrive/eecsi_revise/
✅ ASC checkpoints and results will be saved to: /content/drive/MyDrive/eecsi_revise/indobertweet_asc_results/


In [None]:
# Define file path for the full dataset
file_path_csv = os.path.join(GDRIVE_PATH, 'final_golden_dataset_eecsi.csv')

try:
    df = pd.read_csv(file_path_csv)
    print(f"Successfully loaded full dataset with {len(df)} rows.")

    # --- CRITICAL CHANGE: Filter for relevant data only ---
    relevant_df = df[df['aspect'] != 'Irrelevant'].copy()
    print(f"Filtered to {len(relevant_df)} relevant rows for sentiment analysis.")

except FileNotFoundError:
    print(f"❌ ERROR: File not found at '{file_path_csv}'.")

Successfully loaded full dataset with 3030 rows.
Filtered to 2037 relevant rows for sentiment analysis.


In [None]:
# Cell 5 (REVISED): Prepare Helper Functions & Classes

# 1. Custom PyTorch Dataset Class
class SentimentDataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)

# 2. Function to compute metrics
def compute_metrics(p):
    preds = np.argmax(p.predictions, axis=1)
    report = classification_report(p.label_ids, preds, output_dict=True, zero_division=0)
    return {"macro_f1": report["macro avg"]["f1-score"]}

# 3. Custom Trainer for Weighted Loss
class WeightedLossTrainer(Trainer):
    def __init__(self, *args, class_weights=None, **kwargs):
        super().__init__(*args, **kwargs)
        self.class_weights = class_weights


    def compute_loss(self, model, inputs, return_outputs=False, **kwargs):
        labels = inputs.get("labels")
        outputs = model(**inputs)
        logits = outputs.get("logits")
        weights = self.class_weights.to(logits.device)
        loss_fct = torch.nn.CrossEntropyLoss(weight=weights)
        loss = loss_fct(logits.view(-1, self.model.config.num_labels), labels.view(-1))
        return (loss, outputs) if return_outputs else loss

print("✅ Helper functions and classes are ready.")

✅ Helper functions and classes are ready.


In [None]:
# List to store the evaluation results from each fold
fold_results = []
# --- CRITICAL CHANGE: Target is now 'sentiment' ---
X = relevant_df['cleaned_text']
y = relevant_df['sentiment']

# Create label mappings for the 3 sentiment classes
labels = np.array(sorted(y.unique()))
label2id = {label: i for i, label in enumerate(labels)}
id2label = {i: label for i, label in enumerate(labels)}

# Define K-Fold splits based on the relevant data
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=SEED)

for i, (train_index, test_index) in enumerate(skf.split(X, y)):
    print(f"--- Running Fold {i+1}/5 ---")

    # 1. Split data for the current fold
    train_df = relevant_df.iloc[train_index]
    test_df = relevant_df.iloc[test_index]

    # 2. Apply Random Oversampling on the training data
    ros = RandomOverSampler(random_state=SEED)
    X_train_resampled, y_train_resampled = ros.fit_resample(train_df[['cleaned_text']], train_df['sentiment'])
    train_df_resampled = pd.concat([X_train_resampled, y_train_resampled], axis=1)

    # 3. Compute Class Weights from the ORIGINAL imbalanced training data
    class_weights = compute_class_weight('balanced', classes=labels, y=train_df['sentiment'])
    class_weights_tensor = torch.tensor(class_weights, dtype=torch.float)

    # 4. Tokenize data
    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
    train_encodings = tokenizer(list(train_df_resampled['cleaned_text']), truncation=True, padding=True, max_length=128)
    test_encodings = tokenizer(list(test_df['cleaned_text']), truncation=True, padding=True, max_length=128)

    train_labels = [label2id[label] for label in train_df_resampled['sentiment']]
    test_labels = [label2id[label] for label in test_df['sentiment']]

    train_dataset = SentimentDataset(train_encodings, train_labels)
    test_dataset = SentimentDataset(test_encodings, test_labels)

    # 5. Initialize the model with 3 labels
    model = AutoModelForSequenceClassification.from_pretrained(
        MODEL_NAME,
        num_labels=len(labels), # Should be 3
        label2id=label2id,
        id2label=id2label
    )

    # 6. Define Training Arguments
    training_args = TrainingArguments(
        output_dir=os.path.join(ASC_RESULTS_PATH, f'fold_{i+1}'),
        num_train_epochs=5,
        per_device_train_batch_size=16,
        per_device_eval_batch_size=16,
        learning_rate=3e-5, # Common to use a slightly higher LR for the second task
        eval_strategy="epoch",
        save_strategy="epoch",
        load_best_model_at_end=True,
        metric_for_best_model="macro_f1",
        weight_decay=0.01,
        logging_dir='./logs',
        logging_steps=10,
        seed=SEED,
    )

    # 7. Use the custom WeightedLossTrainer
    trainer = WeightedLossTrainer(
        model=model,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=test_dataset,
        compute_metrics=compute_metrics,
        class_weights=class_weights_tensor,
    )

    # 8. Train the model
    trainer.train()

    # 9. Evaluate and store the results
    eval_results = trainer.evaluate()
    fold_results.append(eval_results)
    print(f"Fold {i+1} complete. Evaluation results: {eval_results}")

print("\n✅ 5-fold cross-validation process for IndoBERT (ASC) finished.")

--- Running Fold 1/5 ---


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/2.00 [00:00<?, ?B/s]

config.json: 0.00B [00:00, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/445M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at indolem/indobertweet-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


model.safetensors:   0%|          | 0.00/445M [00:00<?, ?B/s]

<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize?ref=models
wandb: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mitaeyeong2532[0m ([33mitaeyeong2532-telkom-university[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Epoch,Training Loss,Validation Loss,Macro F1
1,0.5307,0.561564,0.764444
2,0.1336,0.69686,0.787252
3,0.0451,0.821677,0.795514
4,0.0289,0.976326,0.79446
5,0.001,1.126428,0.767363


Fold 1 complete. Evaluation results: {'eval_loss': 0.8216770887374878, 'eval_macro_f1': 0.7955137599739457, 'eval_runtime': 1.7284, 'eval_samples_per_second': 236.058, 'eval_steps_per_second': 15.043, 'epoch': 5.0}
--- Running Fold 2/5 ---


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at indolem/indobertweet-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Macro F1
1,0.5134,0.700393,0.686369
2,0.1646,0.585599,0.796481
3,0.1141,0.79349,0.791274
4,0.0253,0.913224,0.788619
5,0.0326,0.97846,0.808295


Fold 2 complete. Evaluation results: {'eval_loss': 0.978459894657135, 'eval_macro_f1': 0.8082954099981251, 'eval_runtime': 1.7705, 'eval_samples_per_second': 230.442, 'eval_steps_per_second': 14.685, 'epoch': 5.0}
--- Running Fold 3/5 ---


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at indolem/indobertweet-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Macro F1
1,0.5659,0.653834,0.721294
2,0.2552,0.647284,0.806654
3,0.1022,0.807077,0.795235
4,0.0108,0.913668,0.803402
5,0.0017,0.972222,0.796532


Fold 3 complete. Evaluation results: {'eval_loss': 0.647283673286438, 'eval_macro_f1': 0.8066535552489701, 'eval_runtime': 1.6524, 'eval_samples_per_second': 246.301, 'eval_steps_per_second': 15.734, 'epoch': 5.0}
--- Running Fold 4/5 ---


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at indolem/indobertweet-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Macro F1
1,0.5444,0.579105,0.766891
2,0.1761,0.706403,0.780071
3,0.1397,0.920103,0.806355
4,0.0024,1.051226,0.810142
5,0.0507,1.024879,0.820565


Fold 4 complete. Evaluation results: {'eval_loss': 1.0248794555664062, 'eval_macro_f1': 0.8205652625826879, 'eval_runtime': 1.5385, 'eval_samples_per_second': 264.55, 'eval_steps_per_second': 16.9, 'epoch': 5.0}
--- Running Fold 5/5 ---


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at indolem/indobertweet-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Macro F1
1,0.4347,0.470561,0.824414
2,0.2027,0.605362,0.782978
3,0.1088,0.78088,0.790144
4,0.0275,0.801671,0.817964
5,0.0012,0.822401,0.824953


Fold 5 complete. Evaluation results: {'eval_loss': 0.8224005699157715, 'eval_macro_f1': 0.8249532940374542, 'eval_runtime': 1.452, 'eval_samples_per_second': 280.31, 'eval_steps_per_second': 17.907, 'epoch': 5.0}

✅ 5-fold cross-validation process for IndoBERT (ASC) finished.


In [None]:
# Extract the 'eval_macro_f1' score from each fold's result
macro_f1_scores = [result['eval_macro_f1'] for result in fold_results]

# Calculate the mean and standard deviation
mean_macro_f1 = np.mean(macro_f1_scores)
std_macro_f1 = np.std(macro_f1_scores)

print("--- Final Aggregated Results (5-Fold CV) for IndoBERTweet (ASC) ---")
print(f"Macro F1-Score = {mean_macro_f1:.4f} ± {std_macro_f1:.4f}")

--- Final Aggregated Results (5-Fold CV) for IndoBERTweet (ASC) ---
Macro F1-Score = 0.8112 ± 0.0105


In [None]:
# Prepare the results dictionary for saving
final_results = {
    'model': 'IndoBERTweet (ASC)',
    'mean_macro_f1': mean_macro_f1,
    'std_dev_macro_f1': std_macro_f1,
    'results_per_fold': fold_results
}

# Define the output file path
results_file_path = os.path.join(ASC_RESULTS_PATH, 'results_indobertweet_asc.json')

# Save to a JSON file
with open(results_file_path, 'w') as f:
    json.dump(final_results, f, indent=4)

print(f"\n✅ Final results for IndoBERTweet (ASC) have been saved to: '{results_file_path}'")


✅ Final results for IndoBERTweet (ASC) have been saved to: '/content/drive/MyDrive/eecsi_revise/indobertweet_asc_results/results_indobertweet_asc.json'


In [None]:
import subprocess
import sys
import os

# Stage 1: Install Required Libraries
def install(packages):
    """Installs a list of packages using pip."""
    for package in packages:
        try:
            print(f"📦 Installing {package}...")
            subprocess.check_call([sys.executable, "-m", "pip", "install", "-q", package])
        except subprocess.CalledProcessError as e:
            print(f"❌ Failed to install {package}: {e}")
            raise

print("--- Starting Environment Setup ---")
required_packages = [
    "transformers", "accelerate", "datasets", "scikit-learn",
    "pandas", "torch", "imbalanced-learn"
]
install(required_packages)
print("✅ All libraries installed successfully.\n")


# Stage 2: Imports and Setup
import json
import numpy as np
import pandas as pd
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import classification_report
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer
import torch
from google.colab import drive

print("--- Starting Main Process ---")
print("⚙️ Performing initial setup...")
# Mount Google Drive
try:
    drive.mount('/content/drive', force_remount=True)
except Exception as e:
    print(f"Error mounting drive: {e}")
    raise

# --- DEFINE PATHS AND CONSTANTS ---
# !!! IMPORTANT: Adjust this path to your IndoBERTweet results folder !!!
GDRIVE_PATH = '/content/drive/MyDrive/eecsi_revise/'
ASC_RESULTS_PATH = os.path.join(GDRIVE_PATH, 'indobertweet_asc_results/') # Changed to the IndoBERTweet folder
MODEL_NAME = "indolem/indobertweet-base-uncased" # The tokenizer model remains the same

# Load the dataset
try:
    file_path_csv = os.path.join(GDRIVE_PATH, 'final_golden_dataset_eecsi.csv')
    df = pd.read_csv(file_path_csv)
    relevant_df = df[df['aspect'] != 'Irrelevant'].copy()
    print("✅ Setup complete. Dataset loaded successfully.")
except FileNotFoundError:
    print(f"❌ ERROR: Dataset file not found at '{file_path_csv}'. Please check the path.")
    raise

# --- REDEFINE IMPORTANT CLASSES AND FUNCTIONS ---

class SentimentDataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)

y = relevant_df['sentiment']
labels_list = np.array(sorted(y.unique()))
label2id = {label: i for i, label in enumerate(labels_list)}
id2label = {i: label for i, label in enumerate(labels_list)}

def compute_metrics_final(p):
    preds = np.argmax(p.predictions, axis=1)
    class_labels = [id2label[i] for i in sorted(id2label.keys())]
    detailed_report = classification_report(
        y_true=p.label_ids,
        y_pred=preds,
        labels=list(range(len(class_labels))),
        target_names=class_labels,
        output_dict=True,
        zero_division=0
    )
    return {
        "macro_f1": detailed_report["macro avg"]["f1-score"],
        "detailed_classification_report": detailed_report
    }

# --- RE-EVALUATION AND SAVING PROCESS ---

print("\n🚀 Starting re-evaluation process for detailed reports...")

recovered_fold_results = []
X_relevant = relevant_df['cleaned_text']
y_relevant = relevant_df['sentiment']

skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42) # Use the same SEED as in training
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

for i, (train_index, test_index) in enumerate(skf.split(X_relevant, y_relevant)):
    fold_num = i + 1
    print(f"--- Re-evaluating Fold {fold_num}/5 ---")

    fold_dir = os.path.join(ASC_RESULTS_PATH, f'fold_{fold_num}')
    state_path = os.path.join(fold_dir, 'trainer_state.json')
    best_checkpoint_path = None

    try:
        with open(state_path, 'r') as f:
            state = json.load(f)
        best_checkpoint_path = state['best_model_checkpoint']
        print(f"  ✅ Found best checkpoint at: {os.path.basename(best_checkpoint_path)}")
    except (FileNotFoundError, KeyError):
        print(f"  ⚠️ 'trainer_state.json' not found in Fold {fold_num}. Searching manually...")
        try:
            possible_checkpoints = [d for d in os.listdir(fold_dir) if d.startswith('checkpoint-') and os.path.isdir(os.path.join(fold_dir, d))]
            if possible_checkpoints:
                best_checkpoint_path = os.path.join(fold_dir, sorted(possible_checkpoints, key=lambda x: int(x.split('-')[-1]))[-1])
                print(f"  ✅ Using found checkpoint: {os.path.basename(best_checkpoint_path)}")
            else:
                print(f"  ❌ Failed to find a checkpoint directory in Fold {fold_num}. Skipping this fold.")
                continue
        except FileNotFoundError:
            print(f"  ❌ Directory for Fold {fold_num} not found. Skipping.")
            continue

    test_df = relevant_df.iloc[test_index]
    test_encodings = tokenizer(list(test_df['cleaned_text']), truncation=True, padding=True, max_length=128)
    test_labels = [label2id[label] for label in test_df['sentiment']]
    test_dataset = SentimentDataset(test_encodings, test_labels)

    model = AutoModelForSequenceClassification.from_pretrained(best_checkpoint_path)
    trainer = Trainer(model=model, compute_metrics=compute_metrics_final)

    eval_results = trainer.evaluate(test_dataset)
    recovered_fold_results.append(eval_results)
    print(f"  👍 Evaluation of Fold {fold_num} complete.")

# --- Aggregate and Save Final Results ---
if recovered_fold_results:
    final_results_per_fold = []
    for res in recovered_fold_results:
        clean_res = {
            'eval_loss': res.get('eval_loss'),
            'eval_macro_f1': res.get('eval_macro_f1'),
            'classification_report': res.get('eval_detailed_classification_report'),
            'eval_runtime_seconds': res.get('eval_runtime'),
        }
        final_results_per_fold.append(clean_res)

    macro_f1_scores = [result['eval_macro_f1'] for result in final_results_per_fold if result.get('eval_macro_f1')]
    mean_macro_f1 = np.mean(macro_f1_scores) if macro_f1_scores else 0
    std_macro_f1 = np.std(macro_f1_scores) if macro_f1_scores else 0

    print("\n--- Final Aggregated Results (5-Fold CV) for IndoBERTweet ---")
    print(f"Macro F1-Score = {mean_macro_f1:.4f} ± {std_macro_f1:.4f}")

    final_results_to_save = {
        'model': 'IndoBERTweet (ASC)', # Changed to IndoBERTweet
        'mean_macro_f1': mean_macro_f1,
        'std_dev_macro_f1': std_macro_f1,
        'results_per_fold': final_results_per_fold
    }

    results_file_path = os.path.join(ASC_RESULTS_PATH, 'results_indobertweet_asc_detailed.json')
    with open(results_file_path, 'w') as f:
        json.dump(final_results_to_save, f, indent=4)

    print(f"\n✅ SUCCESS! Final results with per-class details have been saved to: '{results_file_path}'")
else:
    print("\n❌ No results could be processed. Please ensure the checkpoint paths are correct.")

--- Starting Environment Setup ---
📦 Installing transformers...
📦 Installing accelerate...
📦 Installing datasets...
📦 Installing scikit-learn...
📦 Installing pandas...
📦 Installing torch...
📦 Installing imbalanced-learn...
✅ All libraries installed successfully.

--- Starting Main Process ---
⚙️ Performing initial setup...
Mounted at /content/drive
✅ Setup complete. Dataset loaded successfully.

🚀 Starting re-evaluation process for detailed reports...


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/2.00 [00:00<?, ?B/s]

config.json: 0.00B [00:00, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

--- Re-evaluating Fold 1/5 ---
  ⚠️ 'trainer_state.json' not found in Fold 1. Searching manually...
  ✅ Using found checkpoint: checkpoint-685


Trainer is attempting to log a value of "{np.str_('Negative'): {'precision': 0.825, 'recall': 0.9065934065934066, 'f1-score': 0.8638743455497382, 'support': 182.0}, np.str_('Neutral'): {'precision': 0.7636363636363637, 'recall': 0.5, 'f1-score': 0.60431654676259, 'support': 84.0}, np.str_('Positive'): {'precision': 0.803921568627451, 'recall': 0.8661971830985915, 'f1-score': 0.8338983050847457, 'support': 142.0}, 'accuracy': 0.8088235294117647, 'macro avg': {'precision': 0.7975193107546049, 'recall': 0.757596863230666, 'f1-score': 0.7673630657990246, 'support': 408.0}, 'weighted avg': {'precision': 0.8050301894376289, 'recall': 0.8088235294117647, 'f1-score': 0.8000031375983917, 'support': 408.0}}" of type <class 'dict'> for key "eval/detailed_classification_report" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize?ref=models
wandb: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mitaeyeong2532[0m ([33mitaeyeong2532-telkom-university[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


  👍 Evaluation of Fold 1 complete.
--- Re-evaluating Fold 2/5 ---
  ⚠️ 'trainer_state.json' not found in Fold 2. Searching manually...
  ✅ Using found checkpoint: checkpoint-685


Trainer is attempting to log a value of "{np.str_('Negative'): {'precision': 0.8877005347593583, 'recall': 0.9120879120879121, 'f1-score': 0.8997289972899729, 'support': 182.0}, np.str_('Neutral'): {'precision': 0.7333333333333333, 'recall': 0.6547619047619048, 'f1-score': 0.6918238993710691, 'support': 84.0}, np.str_('Positive'): {'precision': 0.821917808219178, 'recall': 0.8450704225352113, 'f1-score': 0.8333333333333334, 'support': 142.0}, 'accuracy': 0.8357843137254902, 'macro avg': {'precision': 0.8143172254372898, 'recall': 0.8039734131283427, 'f1-score': 0.8082954099981251, 'support': 408.0}, 'weighted avg': {'precision': 0.8330240835620748, 'recall': 0.8357843137254902, 'f1-score': 0.833816711733525, 'support': 408.0}}" of type <class 'dict'> for key "eval/detailed_classification_report" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.


  👍 Evaluation of Fold 2 complete.
--- Re-evaluating Fold 3/5 ---
  ⚠️ 'trainer_state.json' not found in Fold 3. Searching manually...
  ✅ Using found checkpoint: checkpoint-685


Trainer is attempting to log a value of "{np.str_('Negative'): {'precision': 0.8695652173913043, 'recall': 0.8791208791208791, 'f1-score': 0.8743169398907104, 'support': 182.0}, np.str_('Neutral'): {'precision': 0.7012987012987013, 'recall': 0.6506024096385542, 'f1-score': 0.675, 'support': 83.0}, np.str_('Positive'): {'precision': 0.8287671232876712, 'recall': 0.852112676056338, 'f1-score': 0.8402777777777778, 'support': 142.0}, 'accuracy': 0.8230958230958231, 'macro avg': {'precision': 0.799877013992559, 'recall': 0.7939453216052571, 'f1-score': 0.7965315725561627, 'support': 407.0}, 'weighted avg': {'precision': 0.8210161997048131, 'recall': 0.8230958230958231, 'f1-score': 0.8217939250726135, 'support': 407.0}}" of type <class 'dict'> for key "eval/detailed_classification_report" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.


  👍 Evaluation of Fold 3 complete.
--- Re-evaluating Fold 4/5 ---
  ⚠️ 'trainer_state.json' not found in Fold 4. Searching manually...
  ✅ Using found checkpoint: checkpoint-685


Trainer is attempting to log a value of "{np.str_('Negative'): {'precision': 0.882051282051282, 'recall': 0.945054945054945, 'f1-score': 0.9124668435013262, 'support': 182.0}, np.str_('Neutral'): {'precision': 0.7222222222222222, 'recall': 0.6190476190476191, 'f1-score': 0.6666666666666666, 'support': 84.0}, np.str_('Positive'): {'precision': 0.8857142857142857, 'recall': 0.8794326241134752, 'f1-score': 0.8825622775800712, 'support': 141.0}, 'accuracy': 0.855036855036855, 'macro avg': {'precision': 0.82999592999593, 'recall': 0.8145117294053464, 'f1-score': 0.8205652625826879, 'support': 407.0}, 'weighted avg': {'precision': 0.8503334503334503, 'recall': 0.855036855036855, 'f1-score': 0.8513765274103966, 'support': 407.0}}" of type <class 'dict'> for key "eval/detailed_classification_report" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.


  👍 Evaluation of Fold 4 complete.
--- Re-evaluating Fold 5/5 ---
  ⚠️ 'trainer_state.json' not found in Fold 5. Searching manually...
  ✅ Using found checkpoint: checkpoint-685


Trainer is attempting to log a value of "{np.str_('Negative'): {'precision': 0.9269662921348315, 'recall': 0.9116022099447514, 'f1-score': 0.9192200557103064, 'support': 181.0}, np.str_('Neutral'): {'precision': 0.7435897435897436, 'recall': 0.6904761904761905, 'f1-score': 0.7160493827160493, 'support': 84.0}, np.str_('Positive'): {'precision': 0.8145695364238411, 'recall': 0.8661971830985915, 'f1-score': 0.8395904436860068, 'support': 142.0}, 'accuracy': 0.8501228501228502, 'macro avg': {'precision': 0.8283751907161387, 'recall': 0.8227585278398445, 'f1-score': 0.8249532940374542, 'support': 407.0}, 'weighted avg': {'precision': 0.8499049422853279, 'recall': 0.8501228501228502, 'f1-score': 0.8495057032804093, 'support': 407.0}}" of type <class 'dict'> for key "eval/detailed_classification_report" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.


  👍 Evaluation of Fold 5 complete.

--- Final Aggregated Results (5-Fold CV) for IndoBERTweet ---
Macro F1-Score = 0.8035 ± 0.0206

✅ SUCCESS! Final results with per-class details have been saved to: '/content/drive/MyDrive/eecsi_revise/indobertweet_asc_results/results_indobertweet_asc_detailed.json'


In [None]:
import subprocess
import sys
import os

# Stage 1: Install Required Libraries
def install(packages):
    """Installs a list of packages using pip."""
    for package in packages:
        try:
            print(f"📦 Installing {package}...")
            subprocess.check_call([sys.executable, "-m", "pip", "install", "-q", package])
        except subprocess.CalledProcessError as e:
            print(f"❌ Failed to install {package}: {e}")
            raise

print("--- Starting Environment Setup ---")
required_packages = [
    "transformers", "accelerate", "datasets", "scikit-learn",
    "pandas", "torch", "imbalanced-learn"
]
install(required_packages)
print("✅ All libraries installed successfully.\n")


# Stage 2: Imports and Setup
import json
import numpy as np
import pandas as pd
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import classification_report
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer
import torch
from google.colab import drive

print("--- Starting Main Process ---")
print("⚙️ Performing initial setup...")
# Mount Google Drive
try:
    drive.mount('/content/drive', force_remount=True)
except Exception as e:
    print(f"Error mounting drive: {e}")
    raise

# --- DEFINE PATHS AND CONSTANTS ---
GDRIVE_PATH = '/content/drive/MyDrive/eecsi_revise/'
ASC_RESULTS_PATH = os.path.join(GDRIVE_PATH, 'indobertweet_asc_results/')
# --- PERBAIKAN: Menggunakan nama model yang konsisten dengan notebook training ---
MODEL_NAME = "indolem/indobertweet-base-uncased"
# --- PERBAIKAN: Mendefinisikan SEED untuk reproduktifitas ---
SEED = 42

# Load the dataset
try:
    file_path_csv = os.path.join(GDRIVE_PATH, 'final_golden_dataset_eecsi.csv')
    df = pd.read_csv(file_path_csv)
    relevant_df = df[df['aspect'] != 'Irrelevant'].copy()
    print("✅ Setup complete. Dataset loaded successfully.")
except FileNotFoundError:
    print(f"❌ ERROR: Dataset file not found at '{file_path_csv}'. Please check the path.")
    raise

# --- REDEFINE IMPORTANT CLASSES AND FUNCTIONS ---

class SentimentDataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)

y = relevant_df['sentiment']
labels_list = np.array(sorted(y.unique()))
label2id = {label: i for i, label in enumerate(labels_list)}
id2label = {i: label for i, label in enumerate(labels_list)}

def compute_metrics_final(p):
    preds = np.argmax(p.predictions, axis=1)
    class_labels = [id2label[i] for i in sorted(id2label.keys())]
    detailed_report = classification_report(
        y_true=p.label_ids,
        y_pred=preds,
        labels=list(range(len(class_labels))),
        target_names=class_labels,
        output_dict=True,
        zero_division=0
    )
    return {
        "macro_f1": detailed_report["macro avg"]["f1-score"],
        "detailed_classification_report": detailed_report
    }

# --- RE-EVALUATION AND SAVING PROCESS ---

print("\n🚀 Starting re-evaluation process for detailed reports...")

recovered_fold_results = []
X_relevant = relevant_df['cleaned_text']
y_relevant = relevant_df['sentiment']

# --- PERBAIKAN: Menggunakan variabel SEED untuk random_state ---
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=SEED)
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

for i, (train_index, test_index) in enumerate(skf.split(X_relevant, y_relevant)):
    fold_num = i + 1
    print(f"--- Re-evaluating Fold {fold_num}/5 ---")

    fold_dir = os.path.join(ASC_RESULTS_PATH, f'fold_{fold_num}')
    state_path = os.path.join(fold_dir, 'trainer_state.json')
    best_checkpoint_path = None

    try:
        with open(state_path, 'r') as f:
            state = json.load(f)
        best_checkpoint_path = state['best_model_checkpoint']
        print(f"  ✅ Found best checkpoint at: {os.path.basename(best_checkpoint_path)}")
    except (FileNotFoundError, KeyError):
        print(f"  ⚠️ 'trainer_state.json' not found in Fold {fold_num}. Searching manually...")
        try:
            possible_checkpoints = [d for d in os.listdir(fold_dir) if d.startswith('checkpoint-') and os.path.isdir(os.path.join(fold_dir, d))]
            if possible_checkpoints:
                best_checkpoint_path = os.path.join(fold_dir, sorted(possible_checkpoints, key=lambda x: int(x.split('-')[-1]))[-1])
                print(f"  ✅ Using found checkpoint: {os.path.basename(best_checkpoint_path)}")
            else:
                print(f"  ❌ Failed to find a checkpoint directory in Fold {fold_num}. Skipping this fold.")
                continue
        except FileNotFoundError:
            print(f"  ❌ Directory for Fold {fold_num} not found. Skipping.")
            continue

    test_df = relevant_df.iloc[test_index]
    test_encodings = tokenizer(list(test_df['cleaned_text']), truncation=True, padding=True, max_length=128)
    test_labels = [label2id[label] for label in test_df['sentiment']]
    test_dataset = SentimentDataset(test_encodings, test_labels)

    model = AutoModelForSequenceClassification.from_pretrained(best_checkpoint_path)
    trainer = Trainer(model=model, compute_metrics=compute_metrics_final)

    eval_results = trainer.evaluate(test_dataset)
    recovered_fold_results.append(eval_results)
    print(f"  👍 Evaluation of Fold {fold_num} complete.")

# --- Aggregate and Save Final Results ---
if recovered_fold_results:
    final_results_per_fold = []
    for res in recovered_fold_results:
        clean_res = {
            'eval_loss': res.get('eval_loss'),
            'eval_macro_f1': res.get('eval_macro_f1'),
            'classification_report': res.get('eval_detailed_classification_report'),
            'eval_runtime_seconds': res.get('eval_runtime'),
        }
        final_results_per_fold.append(clean_res)

    macro_f1_scores = [result['eval_macro_f1'] for result in final_results_per_fold if result.get('eval_macro_f1')]
    mean_macro_f1 = np.mean(macro_f1_scores) if macro_f1_scores else 0
    std_macro_f1 = np.std(macro_f1_scores) if macro_f1_scores else 0

    print("\n--- Final Aggregated Results (5-Fold CV) for IndoBERTweet ---")
    print(f"Macro F1-Score = {mean_macro_f1:.4f} ± {std_macro_f1:.4f}")

    final_results_to_save = {
        'model': 'IndoBERTweet (ASC)',
        'mean_macro_f1': mean_macro_f1,
        'std_dev_macro_f1': std_macro_f1,
        'results_per_fold': final_results_per_fold
    }

    results_file_path = os.path.join(ASC_RESULTS_PATH, 'results_indobertweet_asc_detailed.json')
    with open(results_file_path, 'w') as f:
        json.dump(final_results_to_save, f, indent=4)

    print(f"\n✅ SUCCESS! Final results with per-class details have been saved to: '{results_file_path}'")
else:
    print("\n❌ No results could be processed. Please ensure the checkpoint paths are correct.")

--- Starting Environment Setup ---
📦 Installing transformers...
📦 Installing accelerate...
📦 Installing datasets...
📦 Installing scikit-learn...
📦 Installing pandas...
📦 Installing torch...
📦 Installing imbalanced-learn...
✅ All libraries installed successfully.

--- Starting Main Process ---
⚙️ Performing initial setup...
Mounted at /content/drive
✅ Setup complete. Dataset loaded successfully.

🚀 Starting re-evaluation process for detailed reports...


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/2.00 [00:00<?, ?B/s]

config.json: 0.00B [00:00, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

--- Re-evaluating Fold 1/5 ---
  ⚠️ 'trainer_state.json' not found in Fold 1. Searching manually...
  ✅ Using found checkpoint: checkpoint-685


Trainer is attempting to log a value of "{np.str_('Negative'): {'precision': 0.825, 'recall': 0.9065934065934066, 'f1-score': 0.8638743455497382, 'support': 182.0}, np.str_('Neutral'): {'precision': 0.7636363636363637, 'recall': 0.5, 'f1-score': 0.60431654676259, 'support': 84.0}, np.str_('Positive'): {'precision': 0.803921568627451, 'recall': 0.8661971830985915, 'f1-score': 0.8338983050847457, 'support': 142.0}, 'accuracy': 0.8088235294117647, 'macro avg': {'precision': 0.7975193107546049, 'recall': 0.757596863230666, 'f1-score': 0.7673630657990246, 'support': 408.0}, 'weighted avg': {'precision': 0.8050301894376289, 'recall': 0.8088235294117647, 'f1-score': 0.8000031375983917, 'support': 408.0}}" of type <class 'dict'> for key "eval/detailed_classification_report" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize?ref=models
wandb: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mitaeyeong2532[0m ([33mitaeyeong2532-telkom-university[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


  👍 Evaluation of Fold 1 complete.
--- Re-evaluating Fold 2/5 ---
  ⚠️ 'trainer_state.json' not found in Fold 2. Searching manually...
  ✅ Using found checkpoint: checkpoint-685


Trainer is attempting to log a value of "{np.str_('Negative'): {'precision': 0.8877005347593583, 'recall': 0.9120879120879121, 'f1-score': 0.8997289972899729, 'support': 182.0}, np.str_('Neutral'): {'precision': 0.7333333333333333, 'recall': 0.6547619047619048, 'f1-score': 0.6918238993710691, 'support': 84.0}, np.str_('Positive'): {'precision': 0.821917808219178, 'recall': 0.8450704225352113, 'f1-score': 0.8333333333333334, 'support': 142.0}, 'accuracy': 0.8357843137254902, 'macro avg': {'precision': 0.8143172254372898, 'recall': 0.8039734131283427, 'f1-score': 0.8082954099981251, 'support': 408.0}, 'weighted avg': {'precision': 0.8330240835620748, 'recall': 0.8357843137254902, 'f1-score': 0.833816711733525, 'support': 408.0}}" of type <class 'dict'> for key "eval/detailed_classification_report" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.


  👍 Evaluation of Fold 2 complete.
--- Re-evaluating Fold 3/5 ---
  ⚠️ 'trainer_state.json' not found in Fold 3. Searching manually...
  ✅ Using found checkpoint: checkpoint-685


Trainer is attempting to log a value of "{np.str_('Negative'): {'precision': 0.8695652173913043, 'recall': 0.8791208791208791, 'f1-score': 0.8743169398907104, 'support': 182.0}, np.str_('Neutral'): {'precision': 0.7012987012987013, 'recall': 0.6506024096385542, 'f1-score': 0.675, 'support': 83.0}, np.str_('Positive'): {'precision': 0.8287671232876712, 'recall': 0.852112676056338, 'f1-score': 0.8402777777777778, 'support': 142.0}, 'accuracy': 0.8230958230958231, 'macro avg': {'precision': 0.799877013992559, 'recall': 0.7939453216052571, 'f1-score': 0.7965315725561627, 'support': 407.0}, 'weighted avg': {'precision': 0.8210161997048131, 'recall': 0.8230958230958231, 'f1-score': 0.8217939250726135, 'support': 407.0}}" of type <class 'dict'> for key "eval/detailed_classification_report" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.


  👍 Evaluation of Fold 3 complete.
--- Re-evaluating Fold 4/5 ---
  ⚠️ 'trainer_state.json' not found in Fold 4. Searching manually...
  ✅ Using found checkpoint: checkpoint-685


Trainer is attempting to log a value of "{np.str_('Negative'): {'precision': 0.882051282051282, 'recall': 0.945054945054945, 'f1-score': 0.9124668435013262, 'support': 182.0}, np.str_('Neutral'): {'precision': 0.7222222222222222, 'recall': 0.6190476190476191, 'f1-score': 0.6666666666666666, 'support': 84.0}, np.str_('Positive'): {'precision': 0.8857142857142857, 'recall': 0.8794326241134752, 'f1-score': 0.8825622775800712, 'support': 141.0}, 'accuracy': 0.855036855036855, 'macro avg': {'precision': 0.82999592999593, 'recall': 0.8145117294053464, 'f1-score': 0.8205652625826879, 'support': 407.0}, 'weighted avg': {'precision': 0.8503334503334503, 'recall': 0.855036855036855, 'f1-score': 0.8513765274103966, 'support': 407.0}}" of type <class 'dict'> for key "eval/detailed_classification_report" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.


  👍 Evaluation of Fold 4 complete.
--- Re-evaluating Fold 5/5 ---
  ⚠️ 'trainer_state.json' not found in Fold 5. Searching manually...
  ✅ Using found checkpoint: checkpoint-685


Trainer is attempting to log a value of "{np.str_('Negative'): {'precision': 0.9269662921348315, 'recall': 0.9116022099447514, 'f1-score': 0.9192200557103064, 'support': 181.0}, np.str_('Neutral'): {'precision': 0.7435897435897436, 'recall': 0.6904761904761905, 'f1-score': 0.7160493827160493, 'support': 84.0}, np.str_('Positive'): {'precision': 0.8145695364238411, 'recall': 0.8661971830985915, 'f1-score': 0.8395904436860068, 'support': 142.0}, 'accuracy': 0.8501228501228502, 'macro avg': {'precision': 0.8283751907161387, 'recall': 0.8227585278398445, 'f1-score': 0.8249532940374542, 'support': 407.0}, 'weighted avg': {'precision': 0.8499049422853279, 'recall': 0.8501228501228502, 'f1-score': 0.8495057032804093, 'support': 407.0}}" of type <class 'dict'> for key "eval/detailed_classification_report" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.


  👍 Evaluation of Fold 5 complete.

--- Final Aggregated Results (5-Fold CV) for IndoBERTweet ---
Macro F1-Score = 0.8035 ± 0.0206

✅ SUCCESS! Final results with per-class details have been saved to: '/content/drive/MyDrive/eecsi_revise/indobertweet_asc_results/results_indobertweet_asc_detailed.json'
