In [None]:
# Cell 1

# Step 1: Clean up any potentially conflicting versions
print("--> Uninstalling existing versions...")
!pip uninstall -y transformers accelerate datasets torch torchvision

# Step 2: Reinstall a stable, compatible set of the core libraries
print("\n--> Reinstalling core libraries...")
!pip install transformers accelerate datasets torch torchvision

# Step 3: Install the remaining libraries
print("\n--> Installing other required libraries...")
!pip install scikit-learn pandas imbalanced-learn -q

print("\n✅ All libraries have been reinstalled.")

--> Uninstalling existing versions...
Found existing installation: transformers 4.55.1
Uninstalling transformers-4.55.1:
  Successfully uninstalled transformers-4.55.1
Found existing installation: accelerate 1.10.0
Uninstalling accelerate-1.10.0:
  Successfully uninstalled accelerate-1.10.0
Found existing installation: datasets 4.0.0
Uninstalling datasets-4.0.0:
  Successfully uninstalled datasets-4.0.0
Found existing installation: torch 2.6.0+cu124
Uninstalling torch-2.6.0+cu124:
  Successfully uninstalled torch-2.6.0+cu124
Found existing installation: torchvision 0.21.0+cu124
Uninstalling torchvision-0.21.0+cu124:
  Successfully uninstalled torchvision-0.21.0+cu124

--> Reinstalling core libraries...
Collecting transformers
  Downloading transformers-4.55.2-py3-none-any.whl.metadata (41 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.0/42.0 kB[0m [31m2.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting accelerate
  Downloading accelerate-1.10.0-py3-none-any.whl

In [None]:
# Check library versions for the paper's reproducibility section
!pip freeze | grep -E "transformers|torch|scikit-learn|imbalanced-learn|datasets"

datasets==4.0.0
imbalanced-learn==0.13.0
scikit-learn==1.6.1
sentence-transformers==5.1.0
tensorflow-datasets==4.9.9
torch==2.8.0
torchao==0.10.0
torchaudio @ https://download.pytorch.org/whl/cu124/torchaudio-2.6.0%2Bcu124-cp311-cp311-linux_x86_64.whl
torchdata==0.11.0
torchsummary==1.5.1
torchtune==0.6.1
torchvision==0.23.0
transformers==4.55.2
vega-datasets==0.9.0


In [None]:
from google.colab import drive
import pandas as pd
import numpy as np
import pickle
import os
import json
import torch
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import classification_report
from sklearn.utils.class_weight import compute_class_weight
from imblearn.over_sampling import RandomOverSampler
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments
from datasets import Dataset

# Mount Google Drive
drive.mount('/content/drive')

# --- KEY SETTINGS ---
SEED = 42
GDRIVE_PATH = '/content/drive/MyDrive/eecsi_revise/'
MODEL_NAME = "indobenchmark/indobert-base-p1"

# --- NEW: Define a dedicated path for ASC results ---
ASC_RESULTS_PATH = os.path.join(GDRIVE_PATH, 'indobert_asc_results/')
# Create the directory if it doesn't exist
os.makedirs(ASC_RESULTS_PATH, exist_ok=True)

# Set seed for reproducibility across all libraries
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)

print(f"✅ Setup complete. Working inside folder: {GDRIVE_PATH}")
print(f"✅ ASC checkpoints and results will be saved to: {ASC_RESULTS_PATH}")

Mounted at /content/drive
✅ Setup complete. Working inside folder: /content/drive/MyDrive/eecsi_revise/
✅ ASC checkpoints and results will be saved to: /content/drive/MyDrive/eecsi_revise/indobert_asc_results/


In [None]:
# Define file path for the full dataset
file_path_csv = os.path.join(GDRIVE_PATH, 'final_golden_dataset_eecsi.csv')

try:
    df = pd.read_csv(file_path_csv)
    print(f"Successfully loaded full dataset with {len(df)} rows.")

    # --- CRITICAL CHANGE: Filter for relevant data only ---
    relevant_df = df[df['aspect'] != 'Irrelevant'].copy()
    print(f"Filtered to {len(relevant_df)} relevant rows for sentiment analysis.")

except FileNotFoundError:
    print(f"❌ ERROR: File not found at '{file_path_csv}'.")

Successfully loaded full dataset with 3030 rows.
Filtered to 2037 relevant rows for sentiment analysis.


In [None]:
# Cell 5 (REVISED): Prepare Helper Functions & Classes

# 1. Custom PyTorch Dataset Class (No changes here)
class SentimentDataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)

# 2. Function to compute metrics (No changes here)
def compute_metrics(p):
    preds = np.argmax(p.predictions, axis=1)
    report = classification_report(p.label_ids, preds, output_dict=True, zero_division=0)
    return {"macro_f1": report["macro avg"]["f1-score"]}

# 3. Custom Trainer for Weighted Loss
class WeightedLossTrainer(Trainer):
    def __init__(self, *args, class_weights=None, **kwargs):
        super().__init__(*args, **kwargs)
        self.class_weights = class_weights

    # --- THE FIX IS HERE: Added **kwargs to accept new arguments ---
    def compute_loss(self, model, inputs, return_outputs=False, **kwargs):
        labels = inputs.get("labels")
        outputs = model(**inputs)
        logits = outputs.get("logits")
        weights = self.class_weights.to(logits.device)
        loss_fct = torch.nn.CrossEntropyLoss(weight=weights)
        loss = loss_fct(logits.view(-1, self.model.config.num_labels), labels.view(-1))
        return (loss, outputs) if return_outputs else loss

print("✅ Helper functions and classes are ready.")

✅ Helper functions and classes are ready.


In [None]:
# List to store the evaluation results from each fold
fold_results = []
# --- CRITICAL CHANGE: Target is now 'sentiment' ---
X = relevant_df['cleaned_text']
y = relevant_df['sentiment']

# Create label mappings for the 3 sentiment classes
labels = np.array(sorted(y.unique()))
label2id = {label: i for i, label in enumerate(labels)}
id2label = {i: label for i, label in enumerate(labels)}

# Define K-Fold splits based on the relevant data
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=SEED)

for i, (train_index, test_index) in enumerate(skf.split(X, y)):
    print(f"--- Running Fold {i+1}/5 ---")

    # 1. Split data for the current fold
    train_df = relevant_df.iloc[train_index]
    test_df = relevant_df.iloc[test_index]

    # 2. Apply Random Oversampling on the training data
    ros = RandomOverSampler(random_state=SEED)
    X_train_resampled, y_train_resampled = ros.fit_resample(train_df[['cleaned_text']], train_df['sentiment'])
    train_df_resampled = pd.concat([X_train_resampled, y_train_resampled], axis=1)

    # 3. Compute Class Weights from the ORIGINAL imbalanced training data
    class_weights = compute_class_weight('balanced', classes=labels, y=train_df['sentiment'])
    class_weights_tensor = torch.tensor(class_weights, dtype=torch.float)

    # 4. Tokenize data
    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
    train_encodings = tokenizer(list(train_df_resampled['cleaned_text']), truncation=True, padding=True, max_length=128)
    test_encodings = tokenizer(list(test_df['cleaned_text']), truncation=True, padding=True, max_length=128)

    train_labels = [label2id[label] for label in train_df_resampled['sentiment']]
    test_labels = [label2id[label] for label in test_df['sentiment']]

    train_dataset = SentimentDataset(train_encodings, train_labels)
    test_dataset = SentimentDataset(test_encodings, test_labels)

    # 5. Initialize the model with 3 labels
    model = AutoModelForSequenceClassification.from_pretrained(
        MODEL_NAME,
        num_labels=len(labels), # Should be 3
        label2id=label2id,
        id2label=id2label
    )

    # 6. Define Training Arguments
    training_args = TrainingArguments(
        output_dir=os.path.join(ASC_RESULTS_PATH, f'fold_{i+1}'),
        num_train_epochs=5,
        per_device_train_batch_size=16,
        per_device_eval_batch_size=16,
        learning_rate=3e-5, # Common to use a slightly higher LR for the second task
        eval_strategy="epoch",
        save_strategy="epoch",
        load_best_model_at_end=True,
        metric_for_best_model="macro_f1",
        weight_decay=0.01,
        logging_dir='./logs',
        logging_steps=10,
        seed=SEED,
    )

    # 7. Use the custom WeightedLossTrainer
    trainer = WeightedLossTrainer(
        model=model,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=test_dataset,
        compute_metrics=compute_metrics,
        class_weights=class_weights_tensor,
    )

    # 8. Train the model
    trainer.train()

    # 9. Evaluate and store the results
    eval_results = trainer.evaluate()
    fold_results.append(eval_results)
    print(f"Fold {i+1} complete. Evaluation results: {eval_results}")

print("\n✅ 5-fold cross-validation process for IndoBERT (ASC) finished.")

--- Running Fold 1/5 ---


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/2.00 [00:00<?, ?B/s]

config.json: 0.00B [00:00, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/498M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at indobenchmark/indobert-base-p1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


model.safetensors:   0%|          | 0.00/498M [00:00<?, ?B/s]

<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize?ref=models
wandb: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mitaeyeong2532[0m ([33mitaeyeong2532-telkom-university[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Epoch,Training Loss,Validation Loss,Macro F1
1,0.4787,0.602476,0.767692
2,0.1123,0.926587,0.782329
3,0.0764,1.234073,0.777515
4,0.0028,1.236024,0.776509
5,0.0005,1.297682,0.790033


Fold 1 complete. Evaluation results: {'eval_loss': 1.2976824045181274, 'eval_macro_f1': 0.7900329482636314, 'eval_runtime': 1.8897, 'eval_samples_per_second': 215.913, 'eval_steps_per_second': 13.759, 'epoch': 5.0}
--- Running Fold 2/5 ---


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at indobenchmark/indobert-base-p1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Macro F1
1,0.417,0.813425,0.706337
2,0.1012,0.962631,0.741655
3,0.0525,1.148405,0.76114
4,0.0495,1.242624,0.760751
5,0.0005,1.399604,0.771923


Fold 2 complete. Evaluation results: {'eval_loss': 1.3996038436889648, 'eval_macro_f1': 0.7719231672720044, 'eval_runtime': 1.7819, 'eval_samples_per_second': 228.971, 'eval_steps_per_second': 14.591, 'epoch': 5.0}
--- Running Fold 3/5 ---


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at indobenchmark/indobert-base-p1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Macro F1
1,0.4232,0.694711,0.748564
2,0.1446,0.82495,0.80958
3,0.0583,0.860994,0.834245
4,0.0136,0.954426,0.829747
5,0.001,0.98454,0.841127


Fold 3 complete. Evaluation results: {'eval_loss': 0.984539806842804, 'eval_macro_f1': 0.8411266844312918, 'eval_runtime': 1.7188, 'eval_samples_per_second': 236.793, 'eval_steps_per_second': 15.127, 'epoch': 5.0}
--- Running Fold 4/5 ---


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at indobenchmark/indobert-base-p1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Macro F1
1,0.483,0.671126,0.717768
2,0.1584,1.006389,0.735179
3,0.0625,1.254168,0.775121
4,0.0092,1.292184,0.787556
5,0.0006,1.354998,0.768308


Fold 4 complete. Evaluation results: {'eval_loss': 1.2921838760375977, 'eval_macro_f1': 0.7875560711982952, 'eval_runtime': 1.8125, 'eval_samples_per_second': 224.551, 'eval_steps_per_second': 14.345, 'epoch': 5.0}
--- Running Fold 5/5 ---


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at indobenchmark/indobert-base-p1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Macro F1
1,0.3647,0.46557,0.832151
2,0.1749,0.577392,0.8197
3,0.07,0.911437,0.826312
4,0.0397,1.016857,0.817026
5,0.0007,0.966936,0.815827


Fold 5 complete. Evaluation results: {'eval_loss': 0.465570330619812, 'eval_macro_f1': 0.8321511455555631, 'eval_runtime': 1.4705, 'eval_samples_per_second': 276.774, 'eval_steps_per_second': 17.681, 'epoch': 5.0}

✅ 5-fold cross-validation process for IndoBERT (ASC) finished.


In [None]:
# Extract the 'eval_macro_f1' score from each fold's result
macro_f1_scores = [result['eval_macro_f1'] for result in fold_results]

# Calculate the mean and standard deviation
mean_macro_f1 = np.mean(macro_f1_scores)
std_macro_f1 = np.std(macro_f1_scores)

print("--- Final Aggregated Results (5-Fold CV) for IndoBERT (ASC) ---")
print(f"Macro F1-Score = {mean_macro_f1:.4f} ± {std_macro_f1:.4f}")

--- Final Aggregated Results (5-Fold CV) for IndoBERT (ASC) ---
Macro F1-Score = 0.8046 ± 0.0271


In [None]:
# Prepare the results dictionary for saving
final_results = {
    'model': 'IndoBERT (ASC)',
    'mean_macro_f1': mean_macro_f1,
    'std_dev_macro_f1': std_macro_f1,
    'results_per_fold': fold_results
}

# Define the output file path
results_file_path = os.path.join(ASC_RESULTS_PATH, 'results_indobert_asc.json')

# Save to a JSON file
with open(results_file_path, 'w') as f:
    json.dump(final_results, f, indent=4)

print(f"\n✅ Final results for IndoBERT (ASC) have been saved to: '{results_file_path}'")


✅ Final results for IndoBERT (ASC) have been saved to: '/content/drive/MyDrive/eecsi_revise/indobert_asc_results/results_indobert_asc.json'


In [None]:
import os
import json
import numpy as np
import pandas as pd
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import classification_report
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer
import torch

# Make sure all variables from the previous cells are loaded
print("--- Ensuring variables and data are ready ---")

try:
    # Try to access variables that should already be in the notebook's memory
    _ = relevant_df
    _ = ASC_RESULTS_PATH
    _ = SEED
    _ = MODEL_NAME
    _ = label2id
    _ = id2label
    print("✅ Variables from the active session were found successfully.")
except NameError:
    # If they don't exist (e.g., runtime was restarted), reload from scratch
    print("⚠️ Variables not found, attempting to reload data and setup...")
    GDRIVE_PATH = '/content/drive/MyDrive/eecsi_revise/'
    ASC_RESULTS_PATH = os.path.join(GDRIVE_PATH, 'indobert_asc_results/')
    SEED = 42
    MODEL_NAME = "indobenchmark/indobert-base-p1"

    file_path_csv = os.path.join(GDRIVE_PATH, 'final_golden_dataset_eecsi.csv')
    df = pd.read_csv(file_path_csv)
    relevant_df = df[df['aspect'] != 'Irrelevant'].copy()

    y = relevant_df['sentiment']
    labels_list = np.array(sorted(y.unique()))
    label2id = {label: i for i, label in enumerate(labels_list)}
    id2label = {i: label for i, label in enumerate(labels_list)}
    print("✅ Data and setup were reloaded successfully.")


# --- REDEFINE IMPORTANT CLASSES AND FUNCTIONS ---

# 1. Custom PyTorch Dataset Class
class SentimentDataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)

# 2. New, more detailed metrics function
def compute_metrics_final(p):
    preds = np.argmax(p.predictions, axis=1)
    class_labels = [id2label[i] for i in sorted(id2label.keys())]
    detailed_report = classification_report(
        y_true=p.label_ids,
        y_pred=preds,
        labels=list(range(len(class_labels))),
        target_names=class_labels,
        output_dict=True,
        zero_division=0
    )
    return {
        "macro_f1": detailed_report["macro avg"]["f1-score"],
        "detailed_classification_report": detailed_report
    }

# --- RE-EVALUATION AND SAVING PROCESS ---

print("\n🚀 Starting re-evaluation process to get detailed reports...")

recovered_fold_results = []
X_relevant = relevant_df['cleaned_text']
y_relevant = relevant_df['sentiment']

skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=SEED)
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

for i, (train_index, test_index) in enumerate(skf.split(X_relevant, y_relevant)):
    fold_num = i + 1
    print(f"--- Re-evaluating Fold {fold_num}/5 ---")

    fold_dir = os.path.join(ASC_RESULTS_PATH, f'fold_{fold_num}')
    state_path = os.path.join(fold_dir, 'trainer_state.json')
    best_checkpoint_path = None

    try:
        with open(state_path, 'r') as f:
            state = json.load(f)
        best_checkpoint_path = state['best_model_checkpoint']
        print(f"  ✅ Found best checkpoint at: {os.path.basename(best_checkpoint_path)}")
    except (FileNotFoundError, KeyError):
        print(f"  ⚠️ 'trainer_state.json' not found in Fold {fold_num}. Searching manually...")
        try:
            possible_checkpoints = [d for d in os.listdir(fold_dir) if d.startswith('checkpoint-') and os.path.isdir(os.path.join(fold_dir, d))]
            if possible_checkpoints:
                best_checkpoint_path = os.path.join(fold_dir, sorted(possible_checkpoints, key=lambda x: int(x.split('-')[-1]))[-1])
                print(f"  ✅ Using found checkpoint: {os.path.basename(best_checkpoint_path)}")
            else:
                print(f"  ❌ Failed to find a checkpoint directory in Fold {fold_num}. Skipping this fold.")
                continue
        except FileNotFoundError:
            print(f"  ❌ Directory for Fold {fold_num} not found. Skipping.")
            continue

    test_df = relevant_df.iloc[test_index]
    test_encodings = tokenizer(list(test_df['cleaned_text']), truncation=True, padding=True, max_length=128)
    test_labels = [label2id[label] for label in test_df['sentiment']]
    test_dataset = SentimentDataset(test_encodings, test_labels)

    model = AutoModelForSequenceClassification.from_pretrained(best_checkpoint_path)
    trainer = Trainer(model=model, compute_metrics=compute_metrics_final)

    eval_results = trainer.evaluate(test_dataset)
    recovered_fold_results.append(eval_results)
    print(f"  👍 Evaluation of Fold {fold_num} complete.")

# --- Aggregate and Save Final Results ---
if recovered_fold_results:
    final_results_per_fold = []
    for res in recovered_fold_results:
        clean_res = {
            'eval_loss': res.get('eval_loss'),
            'eval_macro_f1': res.get('eval_macro_f1'),
            'classification_report': res.get('eval_detailed_classification_report'),
            'eval_runtime_seconds': res.get('eval_runtime'),
        }
        final_results_per_fold.append(clean_res)

    macro_f1_scores = [result['eval_macro_f1'] for result in final_results_per_fold if result.get('eval_macro_f1')]
    mean_macro_f1 = np.mean(macro_f1_scores) if macro_f1_scores else 0
    std_macro_f1 = np.std(macro_f1_scores) if macro_f1_scores else 0

    print("\n--- Final Aggregated Results (5-Fold CV) ---")
    print(f"Macro F1-Score = {mean_macro_f1:.4f} ± {std_macro_f1:.4f}")

    final_results_to_save = {
        'model': 'IndoBERT (ASC)',
        'mean_macro_f1': mean_macro_f1,
        'std_dev_macro_f1': std_macro_f1,
        'results_per_fold': final_results_per_fold
    }

    results_file_path = os.path.join(ASC_RESULTS_PATH, 'results_indobert_asc_detailed.json')
    with open(results_file_path, 'w') as f:
        json.dump(final_results_to_save, f, indent=4)

    print(f"\n✅ SUCCESS! Final results with per-class details have been saved to: '{results_file_path}'")
else:
    print("\n❌ No results could be processed. Please ensure the checkpoint paths are correct.")

--- Ensuring variables and data are ready ---
✅ Variables from the active session were found successfully.

🚀 Starting re-evaluation process to get detailed reports...
--- Re-evaluating Fold 1/5 ---
  ⚠️ 'trainer_state.json' not found in Fold 1. Searching manually...
  ✅ Using found checkpoint: checkpoint-685


Trainer is attempting to log a value of "{np.str_('Negative'): {'precision': 0.8232323232323232, 'recall': 0.8956043956043956, 'f1-score': 0.8578947368421053, 'support': 182.0}, np.str_('Neutral'): {'precision': 0.7428571428571429, 'recall': 0.6190476190476191, 'f1-score': 0.6753246753246753, 'support': 84.0}, np.str_('Positive'): {'precision': 0.8428571428571429, 'recall': 0.8309859154929577, 'f1-score': 0.8368794326241135, 'support': 142.0}, 'accuracy': 0.8161764705882353, 'macro avg': {'precision': 0.802982202982203, 'recall': 0.7818793100483242, 'f1-score': 0.7900329482636314, 'support': 408.0}, 'weighted avg': {'precision': 0.8135146988088163, 'recall': 0.8161764705882353, 'f1-score': 0.8129926330028431, 'support': 408.0}}" of type <class 'dict'> for key "eval/detailed_classification_report" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.


  👍 Evaluation of Fold 1 complete.
--- Re-evaluating Fold 2/5 ---
  ⚠️ 'trainer_state.json' not found in Fold 2. Searching manually...
  ✅ Using found checkpoint: checkpoint-685


Trainer is attempting to log a value of "{np.str_('Negative'): {'precision': 0.8195121951219512, 'recall': 0.9230769230769231, 'f1-score': 0.8682170542635659, 'support': 182.0}, np.str_('Neutral'): {'precision': 0.7796610169491526, 'recall': 0.5476190476190477, 'f1-score': 0.6433566433566433, 'support': 84.0}, np.str_('Positive'): {'precision': 0.7986111111111112, 'recall': 0.8098591549295775, 'f1-score': 0.8041958041958042, 'support': 142.0}, 'accuracy': 0.8063725490196079, 'macro avg': {'precision': 0.7992614410607383, 'recall': 0.7601850418751828, 'f1-score': 0.7719231672720044, 'support': 408.0}, 'weighted avg': {'precision': 0.8040331439061316, 'recall': 0.8063725490196079, 'f1-score': 0.7996403581218903, 'support': 408.0}}" of type <class 'dict'> for key "eval/detailed_classification_report" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.


  👍 Evaluation of Fold 2 complete.
--- Re-evaluating Fold 3/5 ---
  ⚠️ 'trainer_state.json' not found in Fold 3. Searching manually...
  ✅ Using found checkpoint: checkpoint-685


Trainer is attempting to log a value of "{np.str_('Negative'): {'precision': 0.8789473684210526, 'recall': 0.9175824175824175, 'f1-score': 0.8978494623655914, 'support': 182.0}, np.str_('Neutral'): {'precision': 0.7763157894736842, 'recall': 0.7108433734939759, 'f1-score': 0.7421383647798742, 'support': 83.0}, np.str_('Positive'): {'precision': 0.8865248226950354, 'recall': 0.8802816901408451, 'f1-score': 0.8833922261484098, 'support': 142.0}, 'accuracy': 0.8624078624078624, 'macro avg': {'precision': 0.8472626601965908, 'recall': 0.8362358270724127, 'f1-score': 0.8411266844312918, 'support': 407.0}, 'weighted avg': {'precision': 0.8606613179401533, 'recall': 0.8624078624078624, 'f1-score': 0.8610510627526815, 'support': 407.0}}" of type <class 'dict'> for key "eval/detailed_classification_report" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.


  👍 Evaluation of Fold 3 complete.
--- Re-evaluating Fold 4/5 ---
  ⚠️ 'trainer_state.json' not found in Fold 4. Searching manually...
  ✅ Using found checkpoint: checkpoint-685


Trainer is attempting to log a value of "{np.str_('Negative'): {'precision': 0.8702702702702703, 'recall': 0.8846153846153846, 'f1-score': 0.8773841961852861, 'support': 182.0}, np.str_('Neutral'): {'precision': 0.7049180327868853, 'recall': 0.5119047619047619, 'f1-score': 0.593103448275862, 'support': 84.0}, np.str_('Positive'): {'precision': 0.782608695652174, 'recall': 0.8936170212765957, 'f1-score': 0.8344370860927153, 'support': 141.0}, 'accuracy': 0.8108108108108109, 'macro avg': {'precision': 0.7859323329031098, 'recall': 0.7633790559322474, 'f1-score': 0.7683082435179545, 'support': 407.0}, 'weighted avg': {'precision': 0.8057742752585849, 'recall': 0.8108108108108109, 'f1-score': 0.8038335196559394, 'support': 407.0}}" of type <class 'dict'> for key "eval/detailed_classification_report" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.


  👍 Evaluation of Fold 4 complete.
--- Re-evaluating Fold 5/5 ---
  ⚠️ 'trainer_state.json' not found in Fold 5. Searching manually...
  ✅ Using found checkpoint: checkpoint-685


Trainer is attempting to log a value of "{np.str_('Negative'): {'precision': 0.9021739130434783, 'recall': 0.9171270718232044, 'f1-score': 0.9095890410958904, 'support': 181.0}, np.str_('Neutral'): {'precision': 0.7638888888888888, 'recall': 0.6547619047619048, 'f1-score': 0.7051282051282052, 'support': 84.0}, np.str_('Positive'): {'precision': 0.8079470198675497, 'recall': 0.8591549295774648, 'f1-score': 0.8327645051194539, 'support': 142.0}, 'accuracy': 0.8427518427518428, 'macro avg': {'precision': 0.8246699405999722, 'recall': 0.810347968720858, 'f1-score': 0.8158272504478498, 'support': 407.0}, 'weighted avg': {'precision': 0.8407582843949097, 'recall': 0.8427518427518428, 'f1-score': 0.8405870894252773, 'support': 407.0}}" of type <class 'dict'> for key "eval/detailed_classification_report" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.


  👍 Evaluation of Fold 5 complete.

--- Final Aggregated Results (5-Fold CV) ---
Macro F1-Score = 0.7974 ± 0.0276

✅ SUCCESS! Final results with per-class details have been saved to: '/content/drive/MyDrive/eecsi_revise/indobert_asc_results/results_indobert_asc_detailed.json'
