In [None]:
# Cell 1 (REVISED): Uninstall and Reinstall Key Libraries

# Step 1: Clean up any potentially conflicting versions
print("--> Uninstalling existing versions...")
!pip uninstall -y transformers accelerate datasets

# Step 2: Reinstall a stable, compatible set of the core libraries
print("\n--> Reinstalling core libraries...")
!pip install transformers accelerate datasets

# Step 3: Install the remaining libraries
print("\n--> Installing other required libraries...")
!pip install scikit-learn pandas imbalanced-learn -q

print("\n✅ All libraries have been reinstalled.")

--> Uninstalling existing versions...
Found existing installation: transformers 4.55.1
Uninstalling transformers-4.55.1:
  Successfully uninstalled transformers-4.55.1
Found existing installation: accelerate 1.10.0
Uninstalling accelerate-1.10.0:
  Successfully uninstalled accelerate-1.10.0
Found existing installation: datasets 4.0.0
Uninstalling datasets-4.0.0:
  Successfully uninstalled datasets-4.0.0

--> Reinstalling core libraries...
Collecting transformers
  Downloading transformers-4.55.2-py3-none-any.whl.metadata (41 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.0/42.0 kB[0m [31m2.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting accelerate
  Downloading accelerate-1.10.0-py3-none-any.whl.metadata (19 kB)
Collecting datasets
  Downloading datasets-4.0.0-py3-none-any.whl.metadata (19 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=2.0.0->accelerate)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.

In [None]:
# <-- NEW: Check library versions
# This output should be noted for the 'reproducibility' section of the paper
!pip freeze | grep -E "transformers|torch|scikit-learn|imbalanced-learn|datasets"

datasets==4.0.0
imbalanced-learn==0.13.0
scikit-learn==1.6.1
sentence-transformers==5.1.0
tensorflow-datasets==4.9.9
torch @ https://download.pytorch.org/whl/cu124/torch-2.6.0%2Bcu124-cp311-cp311-linux_x86_64.whl
torchao==0.10.0
torchaudio @ https://download.pytorch.org/whl/cu124/torchaudio-2.6.0%2Bcu124-cp311-cp311-linux_x86_64.whl
torchdata==0.11.0
torchsummary==1.5.1
torchtune==0.6.1
torchvision @ https://download.pytorch.org/whl/cu124/torchvision-0.21.0%2Bcu124-cp311-cp311-linux_x86_64.whl
transformers==4.55.2
vega-datasets==0.9.0


In [None]:
from google.colab import drive
import pandas as pd
import numpy as np
import pickle
import os
import json
import torch
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import classification_report
from sklearn.utils.class_weight import compute_class_weight
from imblearn.over_sampling import RandomOverSampler
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments
from datasets import Dataset

# Mount Google Drive
drive.mount('/content/drive')

# --- KEY SETTINGS ---
SEED = 42
GDRIVE_PATH = '/content/drive/MyDrive/eecsi_revise/'
MODEL_NAME = "indobenchmark/indobert-base-p1"

# Set seed for reproducibility across all libraries
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)

print(f"✅ Setup complete. Working inside folder: {GDRIVE_PATH}")

Mounted at /content/drive
✅ Setup complete. Working inside folder: /content/drive/MyDrive/eecsi_revise/


In [None]:
# Define file paths
file_path_csv = os.path.join(GDRIVE_PATH, 'final_golden_dataset_eecsi.csv')
file_path_split = os.path.join(GDRIVE_PATH, 'kfold_splits.pkl')

# Load the dataset and the split file
try:
    df = pd.read_csv(file_path_csv)
    with open(file_path_split, 'rb') as f:
        kfold_splits = pickle.load(f)
    print("✅ Successfully loaded dataset and 5-fold splits.")
except FileNotFoundError as e:
    print(f"❌ ERROR: File not found. Please ensure '{e.filename}' is in the 'eecsi_revise' folder in your Google Drive.")

✅ Successfully loaded dataset and 5-fold splits.


In [None]:
# Cell 5: Prepare Helper Functions & Classes

# 1. Custom PyTorch Dataset Class
class AspectDataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)

# 2. Function to compute metrics
def compute_metrics(p):
    preds = np.argmax(p.predictions, axis=1)
    report = classification_report(p.label_ids, preds, output_dict=True, zero_division=0)
    # The labels parameter is needed for a consistent report structure
    class_labels = [id2label[i] for i in range(len(id2label))]
    detailed_report = classification_report(p.label_ids, preds, labels=list(range(len(class_labels))), target_names=class_labels, output_dict=True, zero_division=0)
    return {"macro_f1": detailed_report["macro avg"]["f1-score"], "detailed_report": detailed_report}

# 3. Custom Trainer for Weighted Loss
class WeightedLossTrainer(Trainer):
    def __init__(self, *args, class_weights=None, **kwargs):
        super().__init__(*args, **kwargs)
        self.class_weights = class_weights

    # --- THE CHANGE IS HERE: Added **kwargs ---
    def compute_loss(self, model, inputs, return_outputs=False, **kwargs):
        labels = inputs.get("labels")
        outputs = model(**inputs)
        logits = outputs.get("logits")
        # Move class_weights to the same device as logits
        weights = self.class_weights.to(logits.device)
        loss_fct = torch.nn.CrossEntropyLoss(weight=weights)
        loss = loss_fct(logits.view(-1, self.model.config.num_labels), labels.view(-1))
        return (loss, outputs) if return_outputs else loss

print("✅ Helper functions and classes are ready.")

✅ Helper functions and classes are ready.


In [None]:
# List to store the evaluation results from each fold
fold_results = []
# Create label mappings for consistency
labels = np.array(sorted(df['aspect'].unique()))
label2id = {label: i for i, label in enumerate(labels)}
id2label = {i: label for i, label in enumerate(labels)}

for i, fold in enumerate(kfold_splits):
    print(f"--- Running Fold {i+1}/5 ---")

    # 1. Split data for the current fold
    train_index, test_index = fold['train'], fold['test']
    train_df = df.iloc[train_index]
    test_df = df.iloc[test_index]

    # 2. Apply Random Oversampling ONLY on the training data
    ros = RandomOverSampler(random_state=SEED)
    X_train_resampled, y_train_resampled = ros.fit_resample(train_df[['cleaned_text']], train_df['aspect'])
    train_df_resampled = pd.concat([X_train_resampled, y_train_resampled], axis=1)

    # 3. Compute Class Weights from the ORIGINAL imbalanced training data for this fold
    class_weights = compute_class_weight('balanced', classes=labels, y=train_df['aspect'])
    class_weights_tensor = torch.tensor(class_weights, dtype=torch.float)

    # 4. Tokenize data
    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
    train_encodings = tokenizer(list(train_df_resampled['cleaned_text']), truncation=True, padding=True, max_length=128)
    test_encodings = tokenizer(list(test_df['cleaned_text']), truncation=True, padding=True, max_length=128)

    # Convert labels to integers
    train_labels = [label2id[label] for label in train_df_resampled['aspect']]
    test_labels = [label2id[label] for label in test_df['aspect']]

    # Create PyTorch Datasets
    train_dataset = AspectDataset(train_encodings, train_labels)
    test_dataset = AspectDataset(test_encodings, test_labels)

    # 5. Initialize the model for each fold to prevent weight leakage
    model = AutoModelForSequenceClassification.from_pretrained(
        MODEL_NAME,
        num_labels=len(labels),
        label2id=label2id,
        id2label=id2label
    )

    # <-- NEW: Print model parameter count (only for the first fold)
    if i == 0:
        print(f"Model Parameters: {model.num_parameters()/1e6:.2f}M")

    # 6. Define Training Arguments
    training_args = TrainingArguments(
        output_dir=os.path.join(GDRIVE_PATH, f'results_fold_{i+1}'),
        num_train_epochs=5,
        per_device_train_batch_size=16,
        per_device_eval_batch_size=16,
        learning_rate=2e-5,
        eval_strategy="epoch",
        save_strategy="epoch",
        load_best_model_at_end=True,
        metric_for_best_model="macro_f1",
        weight_decay=0.01,
        logging_dir='./logs',
        logging_steps=10,
        seed=SEED,
    )

    # 7. Use the custom WeightedLossTrainer
    trainer = WeightedLossTrainer(
        model=model,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=test_dataset,
        compute_metrics=compute_metrics,
        class_weights=class_weights_tensor,
    )

    # 8. Train the model
    trainer.train()

    # 9. Evaluate and store the results
    eval_results = trainer.evaluate()
    fold_results.append(eval_results)
    print(f"Fold {i+1} complete. Evaluation results: {eval_results}")

print("\n✅ 5-fold cross-validation process for IndoBERT finished.")

--- Running Fold 1/5 ---


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at indobenchmark/indobert-base-p1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Model Parameters: 124.45M


Epoch,Training Loss,Validation Loss,Macro F1,Detailed Report
1,0.2416,0.898412,0.702406,"{np.str_('Irrelevant'): {'precision': 0.8768115942028986, 'recall': 0.6080402010050251, 'f1-score': 0.7181008902077152, 'support': 199.0}, np.str_('Smart Economy'): {'precision': 0.5656565656565656, 'recall': 0.9180327868852459, 'f1-score': 0.7, 'support': 61.0}, np.str_('Smart Environment'): {'precision': 0.8, 'recall': 0.8301886792452831, 'f1-score': 0.8148148148148148, 'support': 53.0}, np.str_('Smart Governance'): {'precision': 0.7246376811594203, 'recall': 0.6944444444444444, 'f1-score': 0.7092198581560284, 'support': 144.0}, np.str_('Smart Living'): {'precision': 0.4823529411764706, 'recall': 0.7068965517241379, 'f1-score': 0.5734265734265734, 'support': 58.0}, np.str_('Smart Mobility'): {'precision': 0.8421052631578947, 'recall': 0.9230769230769231, 'f1-score': 0.8807339449541285, 'support': 52.0}, np.str_('Smart People'): {'precision': 0.5588235294117647, 'recall': 0.48717948717948717, 'f1-score': 0.5205479452054794, 'support': 39.0}, 'accuracy': 0.7079207920792079, 'macro avg': {'precision': 0.6929125106807162, 'recall': 0.738265581937221, 'f1-score': 0.70240628953782, 'support': 606.0}, 'weighted avg': {'precision': 0.741416577158232, 'recall': 0.7079207920792079, 'f1-score': 0.710021779159007, 'support': 606.0}}"
2,0.0526,1.062671,0.733793,"{np.str_('Irrelevant'): {'precision': 0.8535031847133758, 'recall': 0.6733668341708543, 'f1-score': 0.7528089887640449, 'support': 199.0}, np.str_('Smart Economy'): {'precision': 0.6956521739130435, 'recall': 0.7868852459016393, 'f1-score': 0.7384615384615385, 'support': 61.0}, np.str_('Smart Environment'): {'precision': 0.8269230769230769, 'recall': 0.8113207547169812, 'f1-score': 0.819047619047619, 'support': 53.0}, np.str_('Smart Governance'): {'precision': 0.65, 'recall': 0.8125, 'f1-score': 0.7222222222222222, 'support': 144.0}, np.str_('Smart Living'): {'precision': 0.6727272727272727, 'recall': 0.6379310344827587, 'f1-score': 0.6548672566371682, 'support': 58.0}, np.str_('Smart Mobility'): {'precision': 0.8103448275862069, 'recall': 0.9038461538461539, 'f1-score': 0.8545454545454545, 'support': 52.0}, np.str_('Smart People'): {'precision': 0.6285714285714286, 'recall': 0.5641025641025641, 'f1-score': 0.5945945945945946, 'support': 39.0}, 'accuracy': 0.7392739273927392, 'macro avg': {'precision': 0.7339602806334862, 'recall': 0.7414217981744217, 'f1-score': 0.7337925248960916, 'support': 606.0}, 'weighted avg': {'precision': 0.7514508878061564, 'recall': 0.7392739273927392, 'f1-score': 0.7390635645713386, 'support': 606.0}}"
3,0.004,1.106318,0.734518,"{np.str_('Irrelevant'): {'precision': 0.7783018867924528, 'recall': 0.8291457286432161, 'f1-score': 0.8029197080291971, 'support': 199.0}, np.str_('Smart Economy'): {'precision': 0.7142857142857143, 'recall': 0.7377049180327869, 'f1-score': 0.7258064516129032, 'support': 61.0}, np.str_('Smart Environment'): {'precision': 0.8148148148148148, 'recall': 0.8301886792452831, 'f1-score': 0.822429906542056, 'support': 53.0}, np.str_('Smart Governance'): {'precision': 0.7222222222222222, 'recall': 0.7222222222222222, 'f1-score': 0.7222222222222222, 'support': 144.0}, np.str_('Smart Living'): {'precision': 0.6976744186046512, 'recall': 0.5172413793103449, 'f1-score': 0.594059405940594, 'support': 58.0}, np.str_('Smart Mobility'): {'precision': 0.8867924528301887, 'recall': 0.9038461538461539, 'f1-score': 0.8952380952380953, 'support': 52.0}, np.str_('Smart People'): {'precision': 0.5945945945945946, 'recall': 0.5641025641025641, 'f1-score': 0.5789473684210527, 'support': 39.0}, 'accuracy': 0.7541254125412541, 'macro avg': {'precision': 0.7440980148778056, 'recall': 0.729207377914653, 'f1-score': 0.7345175940008745, 'support': 606.0}, 'weighted avg': {'precision': 0.7514953832404959, 'recall': 0.7541254125412541, 'f1-score': 0.7512058982809946, 'support': 606.0}}"
4,0.0171,1.271081,0.741087,"{np.str_('Irrelevant'): {'precision': 0.8457142857142858, 'recall': 0.7437185929648241, 'f1-score': 0.7914438502673797, 'support': 199.0}, np.str_('Smart Economy'): {'precision': 0.71875, 'recall': 0.7540983606557377, 'f1-score': 0.736, 'support': 61.0}, np.str_('Smart Environment'): {'precision': 0.8113207547169812, 'recall': 0.8113207547169812, 'f1-score': 0.8113207547169812, 'support': 53.0}, np.str_('Smart Governance'): {'precision': 0.6557377049180327, 'recall': 0.8333333333333334, 'f1-score': 0.7339449541284404, 'support': 144.0}, np.str_('Smart Living'): {'precision': 0.6666666666666666, 'recall': 0.5862068965517241, 'f1-score': 0.6238532110091743, 'support': 58.0}, np.str_('Smart Mobility'): {'precision': 0.9, 'recall': 0.8653846153846154, 'f1-score': 0.8823529411764706, 'support': 52.0}, np.str_('Smart People'): {'precision': 0.7, 'recall': 0.5384615384615384, 'f1-score': 0.6086956521739131, 'support': 39.0}, 'accuracy': 0.7541254125412541, 'macro avg': {'precision': 0.7568842017165666, 'recall': 0.7332177274383935, 'f1-score': 0.7410873376389084, 'support': 606.0}, 'weighted avg': {'precision': 0.7629270446072711, 'recall': 0.7541254125412541, 'f1-score': 0.7539379029904211, 'support': 606.0}}"
5,0.0015,1.258981,0.738437,"{np.str_('Irrelevant'): {'precision': 0.8142076502732241, 'recall': 0.7487437185929648, 'f1-score': 0.7801047120418848, 'support': 199.0}, np.str_('Smart Economy'): {'precision': 0.71875, 'recall': 0.7540983606557377, 'f1-score': 0.736, 'support': 61.0}, np.str_('Smart Environment'): {'precision': 0.8181818181818182, 'recall': 0.8490566037735849, 'f1-score': 0.8333333333333334, 'support': 53.0}, np.str_('Smart Governance'): {'precision': 0.6686390532544378, 'recall': 0.7847222222222222, 'f1-score': 0.7220447284345048, 'support': 144.0}, np.str_('Smart Living'): {'precision': 0.68, 'recall': 0.5862068965517241, 'f1-score': 0.6296296296296297, 'support': 58.0}, np.str_('Smart Mobility'): {'precision': 0.8846153846153846, 'recall': 0.8846153846153846, 'f1-score': 0.8846153846153846, 'support': 52.0}, np.str_('Smart People'): {'precision': 0.6363636363636364, 'recall': 0.5384615384615384, 'f1-score': 0.5833333333333334, 'support': 39.0}, 'accuracy': 0.7491749174917491, 'macro avg': {'precision': 0.7458225060983573, 'recall': 0.735129246410451, 'f1-score': 0.7384373030554388, 'support': 606.0}, 'weighted avg': {'precision': 0.7521071192323907, 'recall': 0.7491749174917491, 'f1-score': 0.748426507881335, 'support': 606.0}}"


Trainer is attempting to log a value of "{np.str_('Irrelevant'): {'precision': 0.8768115942028986, 'recall': 0.6080402010050251, 'f1-score': 0.7181008902077152, 'support': 199.0}, np.str_('Smart Economy'): {'precision': 0.5656565656565656, 'recall': 0.9180327868852459, 'f1-score': 0.7, 'support': 61.0}, np.str_('Smart Environment'): {'precision': 0.8, 'recall': 0.8301886792452831, 'f1-score': 0.8148148148148148, 'support': 53.0}, np.str_('Smart Governance'): {'precision': 0.7246376811594203, 'recall': 0.6944444444444444, 'f1-score': 0.7092198581560284, 'support': 144.0}, np.str_('Smart Living'): {'precision': 0.4823529411764706, 'recall': 0.7068965517241379, 'f1-score': 0.5734265734265734, 'support': 58.0}, np.str_('Smart Mobility'): {'precision': 0.8421052631578947, 'recall': 0.9230769230769231, 'f1-score': 0.8807339449541285, 'support': 52.0}, np.str_('Smart People'): {'precision': 0.5588235294117647, 'recall': 0.48717948717948717, 'f1-score': 0.5205479452054794, 'support': 39.0}, 'a

Trainer is attempting to log a value of "{np.str_('Irrelevant'): {'precision': 0.8457142857142858, 'recall': 0.7437185929648241, 'f1-score': 0.7914438502673797, 'support': 199.0}, np.str_('Smart Economy'): {'precision': 0.71875, 'recall': 0.7540983606557377, 'f1-score': 0.736, 'support': 61.0}, np.str_('Smart Environment'): {'precision': 0.8113207547169812, 'recall': 0.8113207547169812, 'f1-score': 0.8113207547169812, 'support': 53.0}, np.str_('Smart Governance'): {'precision': 0.6557377049180327, 'recall': 0.8333333333333334, 'f1-score': 0.7339449541284404, 'support': 144.0}, np.str_('Smart Living'): {'precision': 0.6666666666666666, 'recall': 0.5862068965517241, 'f1-score': 0.6238532110091743, 'support': 58.0}, np.str_('Smart Mobility'): {'precision': 0.9, 'recall': 0.8653846153846154, 'f1-score': 0.8823529411764706, 'support': 52.0}, np.str_('Smart People'): {'precision': 0.7, 'recall': 0.5384615384615384, 'f1-score': 0.6086956521739131, 'support': 39.0}, 'accuracy': 0.7541254125412

Fold 1 complete. Evaluation results: {'eval_loss': 1.271080732345581, 'eval_macro_f1': 0.7410873376389084, 'eval_detailed_report': {np.str_('Irrelevant'): {'precision': 0.8457142857142858, 'recall': 0.7437185929648241, 'f1-score': 0.7914438502673797, 'support': 199.0}, np.str_('Smart Economy'): {'precision': 0.71875, 'recall': 0.7540983606557377, 'f1-score': 0.736, 'support': 61.0}, np.str_('Smart Environment'): {'precision': 0.8113207547169812, 'recall': 0.8113207547169812, 'f1-score': 0.8113207547169812, 'support': 53.0}, np.str_('Smart Governance'): {'precision': 0.6557377049180327, 'recall': 0.8333333333333334, 'f1-score': 0.7339449541284404, 'support': 144.0}, np.str_('Smart Living'): {'precision': 0.6666666666666666, 'recall': 0.5862068965517241, 'f1-score': 0.6238532110091743, 'support': 58.0}, np.str_('Smart Mobility'): {'precision': 0.9, 'recall': 0.8653846153846154, 'f1-score': 0.8823529411764706, 'support': 52.0}, np.str_('Smart People'): {'precision': 0.7, 'recall': 0.53846

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at indobenchmark/indobert-base-p1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Macro F1,Detailed Report
1,0.2111,0.787389,0.724403,"{np.str_('Irrelevant'): {'precision': 0.8546511627906976, 'recall': 0.7386934673366834, 'f1-score': 0.7924528301886793, 'support': 199.0}, np.str_('Smart Economy'): {'precision': 0.7164179104477612, 'recall': 0.7868852459016393, 'f1-score': 0.75, 'support': 61.0}, np.str_('Smart Environment'): {'precision': 0.7678571428571429, 'recall': 0.8113207547169812, 'f1-score': 0.7889908256880734, 'support': 53.0}, np.str_('Smart Governance'): {'precision': 0.7421875, 'recall': 0.6597222222222222, 'f1-score': 0.6985294117647058, 'support': 144.0}, np.str_('Smart Living'): {'precision': 0.48936170212765956, 'recall': 0.7931034482758621, 'f1-score': 0.6052631578947368, 'support': 58.0}, np.str_('Smart Mobility'): {'precision': 0.8214285714285714, 'recall': 0.8679245283018868, 'f1-score': 0.8440366972477065, 'support': 53.0}, np.str_('Smart People'): {'precision': 0.6363636363636364, 'recall': 0.5526315789473685, 'f1-score': 0.5915492957746479, 'support': 38.0}, 'accuracy': 0.735973597359736, 'macro avg': {'precision': 0.7183239465736385, 'recall': 0.7443258922432348, 'f1-score': 0.7244031740797928, 'support': 606.0}, 'weighted avg': {'precision': 0.7548663592327187, 'recall': 0.735973597359736, 'f1-score': 0.7395560125653341, 'support': 606.0}}"
2,0.0643,0.94638,0.733799,"{np.str_('Irrelevant'): {'precision': 0.8853503184713376, 'recall': 0.6984924623115578, 'f1-score': 0.7808988764044944, 'support': 199.0}, np.str_('Smart Economy'): {'precision': 0.6708860759493671, 'recall': 0.8688524590163934, 'f1-score': 0.7571428571428571, 'support': 61.0}, np.str_('Smart Environment'): {'precision': 0.7014925373134329, 'recall': 0.8867924528301887, 'f1-score': 0.7833333333333333, 'support': 53.0}, np.str_('Smart Governance'): {'precision': 0.6807228915662651, 'recall': 0.7847222222222222, 'f1-score': 0.7290322580645161, 'support': 144.0}, np.str_('Smart Living'): {'precision': 0.6792452830188679, 'recall': 0.6206896551724138, 'f1-score': 0.6486486486486487, 'support': 58.0}, np.str_('Smart Mobility'): {'precision': 0.8545454545454545, 'recall': 0.8867924528301887, 'f1-score': 0.8703703703703703, 'support': 53.0}, np.str_('Smart People'): {'precision': 0.6551724137931034, 'recall': 0.5, 'f1-score': 0.5671641791044776, 'support': 38.0}, 'accuracy': 0.7491749174917491, 'macro avg': {'precision': 0.7324878535225469, 'recall': 0.7494773863404235, 'f1-score': 0.7337986461526711, 'support': 606.0}, 'weighted avg': {'precision': 0.7622040463729424, 'recall': 0.7491749174917491, 'f1-score': 0.7481607138207707, 'support': 606.0}}"
3,0.0313,1.073018,0.735344,"{np.str_('Irrelevant'): {'precision': 0.8578947368421053, 'recall': 0.8190954773869347, 'f1-score': 0.8380462724935732, 'support': 199.0}, np.str_('Smart Economy'): {'precision': 0.7419354838709677, 'recall': 0.7540983606557377, 'f1-score': 0.7479674796747967, 'support': 61.0}, np.str_('Smart Environment'): {'precision': 0.75, 'recall': 0.7924528301886793, 'f1-score': 0.7706422018348624, 'support': 53.0}, np.str_('Smart Governance'): {'precision': 0.7018633540372671, 'recall': 0.7847222222222222, 'f1-score': 0.740983606557377, 'support': 144.0}, np.str_('Smart Living'): {'precision': 0.6785714285714286, 'recall': 0.6551724137931034, 'f1-score': 0.6666666666666666, 'support': 58.0}, np.str_('Smart Mobility'): {'precision': 0.8363636363636363, 'recall': 0.8679245283018868, 'f1-score': 0.8518518518518519, 'support': 53.0}, np.str_('Smart People'): {'precision': 0.6538461538461539, 'recall': 0.4473684210526316, 'f1-score': 0.53125, 'support': 38.0}, 'accuracy': 0.7673267326732673, 'macro avg': {'precision': 0.7457821133616512, 'recall': 0.7315477505144566, 'f1-score': 0.7353440112970182, 'support': 606.0}, 'weighted avg': {'precision': 0.7678680025736697, 'recall': 0.7673267326732673, 'f1-score': 0.7655861639318622, 'support': 606.0}}"
4,0.0054,1.139531,0.741411,"{np.str_('Irrelevant'): {'precision': 0.8195121951219512, 'recall': 0.8442211055276382, 'f1-score': 0.8316831683168316, 'support': 199.0}, np.str_('Smart Economy'): {'precision': 0.7758620689655172, 'recall': 0.7377049180327869, 'f1-score': 0.7563025210084033, 'support': 61.0}, np.str_('Smart Environment'): {'precision': 0.7377049180327869, 'recall': 0.8490566037735849, 'f1-score': 0.7894736842105263, 'support': 53.0}, np.str_('Smart Governance'): {'precision': 0.75, 'recall': 0.75, 'f1-score': 0.75, 'support': 144.0}, np.str_('Smart Living'): {'precision': 0.6349206349206349, 'recall': 0.6896551724137931, 'f1-score': 0.6611570247933884, 'support': 58.0}, np.str_('Smart Mobility'): {'precision': 0.8679245283018868, 'recall': 0.8679245283018868, 'f1-score': 0.8679245283018868, 'support': 53.0}, np.str_('Smart People'): {'precision': 0.7272727272727273, 'recall': 0.42105263157894735, 'f1-score': 0.5333333333333333, 'support': 38.0}, 'accuracy': 0.7722772277227723, 'macro avg': {'precision': 0.7590281532307863, 'recall': 0.7370878513755196, 'f1-score': 0.7414106085663386, 'support': 606.0}, 'weighted avg': {'precision': 0.7722287692304669, 'recall': 0.7722772277227723, 'f1-score': 0.7691341314264079, 'support': 606.0}}"
5,0.0016,1.153539,0.741105,"{np.str_('Irrelevant'): {'precision': 0.8195121951219512, 'recall': 0.8442211055276382, 'f1-score': 0.8316831683168316, 'support': 199.0}, np.str_('Smart Economy'): {'precision': 0.7580645161290323, 'recall': 0.7704918032786885, 'f1-score': 0.7642276422764228, 'support': 61.0}, np.str_('Smart Environment'): {'precision': 0.7377049180327869, 'recall': 0.8490566037735849, 'f1-score': 0.7894736842105263, 'support': 53.0}, np.str_('Smart Governance'): {'precision': 0.7622377622377622, 'recall': 0.7569444444444444, 'f1-score': 0.759581881533101, 'support': 144.0}, np.str_('Smart Living'): {'precision': 0.6557377049180327, 'recall': 0.6896551724137931, 'f1-score': 0.6722689075630253, 'support': 58.0}, np.str_('Smart Mobility'): {'precision': 0.88, 'recall': 0.8301886792452831, 'f1-score': 0.8543689320388349, 'support': 53.0}, np.str_('Smart People'): {'precision': 0.6666666666666666, 'recall': 0.42105263157894735, 'f1-score': 0.5161290322580645, 'support': 38.0}, 'accuracy': 0.7739273927392739, 'macro avg': {'precision': 0.7542748233008904, 'recall': 0.7373729200374827, 'f1-score': 0.7411047497424009, 'support': 606.0}, 'weighted avg': {'precision': 0.7725933679037854, 'recall': 0.7739273927392739, 'f1-score': 0.7710078979213794, 'support': 606.0}}"


Trainer is attempting to log a value of "{np.str_('Irrelevant'): {'precision': 0.8546511627906976, 'recall': 0.7386934673366834, 'f1-score': 0.7924528301886793, 'support': 199.0}, np.str_('Smart Economy'): {'precision': 0.7164179104477612, 'recall': 0.7868852459016393, 'f1-score': 0.75, 'support': 61.0}, np.str_('Smart Environment'): {'precision': 0.7678571428571429, 'recall': 0.8113207547169812, 'f1-score': 0.7889908256880734, 'support': 53.0}, np.str_('Smart Governance'): {'precision': 0.7421875, 'recall': 0.6597222222222222, 'f1-score': 0.6985294117647058, 'support': 144.0}, np.str_('Smart Living'): {'precision': 0.48936170212765956, 'recall': 0.7931034482758621, 'f1-score': 0.6052631578947368, 'support': 58.0}, np.str_('Smart Mobility'): {'precision': 0.8214285714285714, 'recall': 0.8679245283018868, 'f1-score': 0.8440366972477065, 'support': 53.0}, np.str_('Smart People'): {'precision': 0.6363636363636364, 'recall': 0.5526315789473685, 'f1-score': 0.5915492957746479, 'support': 38

Trainer is attempting to log a value of "{np.str_('Irrelevant'): {'precision': 0.8195121951219512, 'recall': 0.8442211055276382, 'f1-score': 0.8316831683168316, 'support': 199.0}, np.str_('Smart Economy'): {'precision': 0.7758620689655172, 'recall': 0.7377049180327869, 'f1-score': 0.7563025210084033, 'support': 61.0}, np.str_('Smart Environment'): {'precision': 0.7377049180327869, 'recall': 0.8490566037735849, 'f1-score': 0.7894736842105263, 'support': 53.0}, np.str_('Smart Governance'): {'precision': 0.75, 'recall': 0.75, 'f1-score': 0.75, 'support': 144.0}, np.str_('Smart Living'): {'precision': 0.6349206349206349, 'recall': 0.6896551724137931, 'f1-score': 0.6611570247933884, 'support': 58.0}, np.str_('Smart Mobility'): {'precision': 0.8679245283018868, 'recall': 0.8679245283018868, 'f1-score': 0.8679245283018868, 'support': 53.0}, np.str_('Smart People'): {'precision': 0.7272727272727273, 'recall': 0.42105263157894735, 'f1-score': 0.5333333333333333, 'support': 38.0}, 'accuracy': 0.

Fold 2 complete. Evaluation results: {'eval_loss': 1.139531135559082, 'eval_macro_f1': 0.7414106085663386, 'eval_detailed_report': {np.str_('Irrelevant'): {'precision': 0.8195121951219512, 'recall': 0.8442211055276382, 'f1-score': 0.8316831683168316, 'support': 199.0}, np.str_('Smart Economy'): {'precision': 0.7758620689655172, 'recall': 0.7377049180327869, 'f1-score': 0.7563025210084033, 'support': 61.0}, np.str_('Smart Environment'): {'precision': 0.7377049180327869, 'recall': 0.8490566037735849, 'f1-score': 0.7894736842105263, 'support': 53.0}, np.str_('Smart Governance'): {'precision': 0.75, 'recall': 0.75, 'f1-score': 0.75, 'support': 144.0}, np.str_('Smart Living'): {'precision': 0.6349206349206349, 'recall': 0.6896551724137931, 'f1-score': 0.6611570247933884, 'support': 58.0}, np.str_('Smart Mobility'): {'precision': 0.8679245283018868, 'recall': 0.8679245283018868, 'f1-score': 0.8679245283018868, 'support': 53.0}, np.str_('Smart People'): {'precision': 0.7272727272727273, 'reca

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at indobenchmark/indobert-base-p1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Macro F1,Detailed Report
1,0.2966,0.835847,0.744393,"{np.str_('Irrelevant'): {'precision': 0.8218390804597702, 'recall': 0.7185929648241206, 'f1-score': 0.7667560321715817, 'support': 199.0}, np.str_('Smart Economy'): {'precision': 0.7538461538461538, 'recall': 0.8032786885245902, 'f1-score': 0.7777777777777778, 'support': 61.0}, np.str_('Smart Environment'): {'precision': 0.7666666666666667, 'recall': 0.8679245283018868, 'f1-score': 0.8141592920353983, 'support': 53.0}, np.str_('Smart Governance'): {'precision': 0.8155339805825242, 'recall': 0.5793103448275863, 'f1-score': 0.6774193548387096, 'support': 145.0}, np.str_('Smart Living'): {'precision': 0.5684210526315789, 'recall': 0.9473684210526315, 'f1-score': 0.7105263157894737, 'support': 57.0}, np.str_('Smart Mobility'): {'precision': 0.765625, 'recall': 0.9245283018867925, 'f1-score': 0.8376068376068376, 'support': 53.0}, np.str_('Smart People'): {'precision': 0.5777777777777777, 'recall': 0.6842105263157895, 'f1-score': 0.6265060240963856, 'support': 38.0}, 'accuracy': 0.7442244224422442, 'macro avg': {'precision': 0.724244244566353, 'recall': 0.7893162536761995, 'f1-score': 0.7443930906165948, 'support': 606.0}, 'weighted avg': {'precision': 0.7646040156261792, 'recall': 0.7442244224422442, 'f1-score': 0.7427484077308635, 'support': 606.0}}"
2,0.076,1.016939,0.739964,"{np.str_('Irrelevant'): {'precision': 0.8105263157894737, 'recall': 0.7738693467336684, 'f1-score': 0.7917737789203085, 'support': 199.0}, np.str_('Smart Economy'): {'precision': 0.6626506024096386, 'recall': 0.9016393442622951, 'f1-score': 0.7638888888888888, 'support': 61.0}, np.str_('Smart Environment'): {'precision': 0.7666666666666667, 'recall': 0.8679245283018868, 'f1-score': 0.8141592920353983, 'support': 53.0}, np.str_('Smart Governance'): {'precision': 0.8095238095238095, 'recall': 0.5862068965517241, 'f1-score': 0.68, 'support': 145.0}, np.str_('Smart Living'): {'precision': 0.5783132530120482, 'recall': 0.8421052631578947, 'f1-score': 0.6857142857142857, 'support': 57.0}, np.str_('Smart Mobility'): {'precision': 0.7931034482758621, 'recall': 0.8679245283018868, 'f1-score': 0.8288288288288288, 'support': 53.0}, np.str_('Smart People'): {'precision': 0.7407407407407407, 'recall': 0.5263157894736842, 'f1-score': 0.6153846153846154, 'support': 38.0}, 'accuracy': 0.7491749174917491, 'macro avg': {'precision': 0.7373606909168914, 'recall': 0.7665693852547201, 'f1-score': 0.7399642413960466, 'support': 606.0}, 'weighted avg': {'precision': 0.7638237551680438, 'recall': 0.7491749174917491, 'f1-score': 0.746384330533824, 'support': 606.0}}"
3,0.0408,1.038042,0.731698,"{np.str_('Irrelevant'): {'precision': 0.8192090395480226, 'recall': 0.7286432160804021, 'f1-score': 0.7712765957446809, 'support': 199.0}, np.str_('Smart Economy'): {'precision': 0.8163265306122449, 'recall': 0.6557377049180327, 'f1-score': 0.7272727272727273, 'support': 61.0}, np.str_('Smart Environment'): {'precision': 0.7704918032786885, 'recall': 0.8867924528301887, 'f1-score': 0.8245614035087719, 'support': 53.0}, np.str_('Smart Governance'): {'precision': 0.6385542168674698, 'recall': 0.7310344827586207, 'f1-score': 0.6816720257234726, 'support': 145.0}, np.str_('Smart Living'): {'precision': 0.6865671641791045, 'recall': 0.8070175438596491, 'f1-score': 0.7419354838709677, 'support': 57.0}, np.str_('Smart Mobility'): {'precision': 0.7962962962962963, 'recall': 0.8113207547169812, 'f1-score': 0.8037383177570093, 'support': 53.0}, np.str_('Smart People'): {'precision': 0.625, 'recall': 0.5263157894736842, 'f1-score': 0.5714285714285714, 'support': 38.0}, 'accuracy': 0.7376237623762376, 'macro avg': {'precision': 0.7360635786831181, 'recall': 0.7352659920910797, 'f1-score': 0.731697875043743, 'support': 606.0}, 'weighted avg': {'precision': 0.7447738883149665, 'recall': 0.7376237623762376, 'f1-score': 0.7376148781662519, 'support': 606.0}}"
4,0.0502,1.182932,0.738307,"{np.str_('Irrelevant'): {'precision': 0.8043478260869565, 'recall': 0.7437185929648241, 'f1-score': 0.7728459530026109, 'support': 199.0}, np.str_('Smart Economy'): {'precision': 0.8695652173913043, 'recall': 0.6557377049180327, 'f1-score': 0.7476635514018691, 'support': 61.0}, np.str_('Smart Environment'): {'precision': 0.8518518518518519, 'recall': 0.8679245283018868, 'f1-score': 0.8598130841121495, 'support': 53.0}, np.str_('Smart Governance'): {'precision': 0.6120218579234973, 'recall': 0.7724137931034483, 'f1-score': 0.6829268292682927, 'support': 145.0}, np.str_('Smart Living'): {'precision': 0.6935483870967742, 'recall': 0.7543859649122807, 'f1-score': 0.7226890756302521, 'support': 57.0}, np.str_('Smart Mobility'): {'precision': 0.803921568627451, 'recall': 0.7735849056603774, 'f1-score': 0.7884615384615384, 'support': 53.0}, np.str_('Smart People'): {'precision': 0.7307692307692307, 'recall': 0.5, 'f1-score': 0.59375, 'support': 38.0}, 'accuracy': 0.740924092409241, 'macro avg': {'precision': 0.7665751342495808, 'recall': 0.72396649855155, 'f1-score': 0.738307147410959, 'support': 606.0}, 'weighted avg': {'precision': 0.7539758171125923, 'recall': 0.740924092409241, 'f1-score': 0.7418193792644652, 'support': 606.0}}"
5,0.0013,1.12358,0.749622,"{np.str_('Irrelevant'): {'precision': 0.7777777777777778, 'recall': 0.8090452261306532, 'f1-score': 0.7931034482758621, 'support': 199.0}, np.str_('Smart Economy'): {'precision': 0.84, 'recall': 0.6885245901639344, 'f1-score': 0.7567567567567568, 'support': 61.0}, np.str_('Smart Environment'): {'precision': 0.8245614035087719, 'recall': 0.8867924528301887, 'f1-score': 0.8545454545454545, 'support': 53.0}, np.str_('Smart Governance'): {'precision': 0.7027027027027027, 'recall': 0.7172413793103448, 'f1-score': 0.7098976109215017, 'support': 145.0}, np.str_('Smart Living'): {'precision': 0.7333333333333333, 'recall': 0.7719298245614035, 'f1-score': 0.7521367521367521, 'support': 57.0}, np.str_('Smart Mobility'): {'precision': 0.8301886792452831, 'recall': 0.8301886792452831, 'f1-score': 0.8301886792452831, 'support': 53.0}, np.str_('Smart People'): {'precision': 0.6129032258064516, 'recall': 0.5, 'f1-score': 0.5507246376811594, 'support': 38.0}, 'accuracy': 0.7607260726072608, 'macro avg': {'precision': 0.7602095889106172, 'recall': 0.7433888788916868, 'f1-score': 0.7496219056518242, 'support': 606.0}, 'weighted avg': {'precision': 0.7602339053403957, 'recall': 0.7607260726072608, 'f1-score': 0.7591008946324497, 'support': 606.0}}"


Trainer is attempting to log a value of "{np.str_('Irrelevant'): {'precision': 0.8218390804597702, 'recall': 0.7185929648241206, 'f1-score': 0.7667560321715817, 'support': 199.0}, np.str_('Smart Economy'): {'precision': 0.7538461538461538, 'recall': 0.8032786885245902, 'f1-score': 0.7777777777777778, 'support': 61.0}, np.str_('Smart Environment'): {'precision': 0.7666666666666667, 'recall': 0.8679245283018868, 'f1-score': 0.8141592920353983, 'support': 53.0}, np.str_('Smart Governance'): {'precision': 0.8155339805825242, 'recall': 0.5793103448275863, 'f1-score': 0.6774193548387096, 'support': 145.0}, np.str_('Smart Living'): {'precision': 0.5684210526315789, 'recall': 0.9473684210526315, 'f1-score': 0.7105263157894737, 'support': 57.0}, np.str_('Smart Mobility'): {'precision': 0.765625, 'recall': 0.9245283018867925, 'f1-score': 0.8376068376068376, 'support': 53.0}, np.str_('Smart People'): {'precision': 0.5777777777777777, 'recall': 0.6842105263157895, 'f1-score': 0.6265060240963856, '

Trainer is attempting to log a value of "{np.str_('Irrelevant'): {'precision': 0.7777777777777778, 'recall': 0.8090452261306532, 'f1-score': 0.7931034482758621, 'support': 199.0}, np.str_('Smart Economy'): {'precision': 0.84, 'recall': 0.6885245901639344, 'f1-score': 0.7567567567567568, 'support': 61.0}, np.str_('Smart Environment'): {'precision': 0.8245614035087719, 'recall': 0.8867924528301887, 'f1-score': 0.8545454545454545, 'support': 53.0}, np.str_('Smart Governance'): {'precision': 0.7027027027027027, 'recall': 0.7172413793103448, 'f1-score': 0.7098976109215017, 'support': 145.0}, np.str_('Smart Living'): {'precision': 0.7333333333333333, 'recall': 0.7719298245614035, 'f1-score': 0.7521367521367521, 'support': 57.0}, np.str_('Smart Mobility'): {'precision': 0.8301886792452831, 'recall': 0.8301886792452831, 'f1-score': 0.8301886792452831, 'support': 53.0}, np.str_('Smart People'): {'precision': 0.6129032258064516, 'recall': 0.5, 'f1-score': 0.5507246376811594, 'support': 38.0}, 'a

Fold 3 complete. Evaluation results: {'eval_loss': 1.123579502105713, 'eval_macro_f1': 0.7496219056518242, 'eval_detailed_report': {np.str_('Irrelevant'): {'precision': 0.7777777777777778, 'recall': 0.8090452261306532, 'f1-score': 0.7931034482758621, 'support': 199.0}, np.str_('Smart Economy'): {'precision': 0.84, 'recall': 0.6885245901639344, 'f1-score': 0.7567567567567568, 'support': 61.0}, np.str_('Smart Environment'): {'precision': 0.8245614035087719, 'recall': 0.8867924528301887, 'f1-score': 0.8545454545454545, 'support': 53.0}, np.str_('Smart Governance'): {'precision': 0.7027027027027027, 'recall': 0.7172413793103448, 'f1-score': 0.7098976109215017, 'support': 145.0}, np.str_('Smart Living'): {'precision': 0.7333333333333333, 'recall': 0.7719298245614035, 'f1-score': 0.7521367521367521, 'support': 57.0}, np.str_('Smart Mobility'): {'precision': 0.8301886792452831, 'recall': 0.8301886792452831, 'f1-score': 0.8301886792452831, 'support': 53.0}, np.str_('Smart People'): {'precision

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at indobenchmark/indobert-base-p1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Macro F1,Detailed Report
1,0.2823,1.001855,0.690265,"{np.str_('Irrelevant'): {'precision': 0.7988505747126436, 'recall': 0.702020202020202, 'f1-score': 0.7473118279569892, 'support': 198.0}, np.str_('Smart Economy'): {'precision': 0.5974025974025974, 'recall': 0.7419354838709677, 'f1-score': 0.6618705035971223, 'support': 62.0}, np.str_('Smart Environment'): {'precision': 0.8070175438596491, 'recall': 0.8679245283018868, 'f1-score': 0.8363636363636363, 'support': 53.0}, np.str_('Smart Governance'): {'precision': 0.8372093023255814, 'recall': 0.5, 'f1-score': 0.6260869565217392, 'support': 144.0}, np.str_('Smart Living'): {'precision': 0.43373493975903615, 'recall': 0.631578947368421, 'f1-score': 0.5142857142857142, 'support': 57.0}, np.str_('Smart Mobility'): {'precision': 0.7246376811594203, 'recall': 0.9433962264150944, 'f1-score': 0.819672131147541, 'support': 53.0}, np.str_('Smart People'): {'precision': 0.5166666666666667, 'recall': 0.7948717948717948, 'f1-score': 0.6262626262626263, 'support': 39.0}, 'accuracy': 0.693069306930693, 'macro avg': {'precision': 0.6736456151265136, 'recall': 0.7402467404069096, 'f1-score': 0.6902647708764812, 'support': 606.0}, 'weighted avg': {'precision': 0.7290761268304025, 'recall': 0.693069306930693, 'f1-score': 0.694172704148938, 'support': 606.0}}"
2,0.1326,0.949177,0.752696,"{np.str_('Irrelevant'): {'precision': 0.7375565610859729, 'recall': 0.8232323232323232, 'f1-score': 0.7780429594272077, 'support': 198.0}, np.str_('Smart Economy'): {'precision': 0.75, 'recall': 0.6290322580645161, 'f1-score': 0.6842105263157895, 'support': 62.0}, np.str_('Smart Environment'): {'precision': 0.84, 'recall': 0.7924528301886793, 'f1-score': 0.8155339805825242, 'support': 53.0}, np.str_('Smart Governance'): {'precision': 0.7948717948717948, 'recall': 0.6458333333333334, 'f1-score': 0.7126436781609196, 'support': 144.0}, np.str_('Smart Living'): {'precision': 0.6285714285714286, 'recall': 0.7719298245614035, 'f1-score': 0.6929133858267716, 'support': 57.0}, np.str_('Smart Mobility'): {'precision': 0.8305084745762712, 'recall': 0.9245283018867925, 'f1-score': 0.875, 'support': 53.0}, np.str_('Smart People'): {'precision': 0.7297297297297297, 'recall': 0.6923076923076923, 'f1-score': 0.7105263157894737, 'support': 39.0}, 'accuracy': 0.7541254125412541, 'macro avg': {'precision': 0.7587482841193138, 'recall': 0.7541880805106772, 'f1-score': 0.7526958351575265, 'support': 606.0}, 'weighted avg': {'precision': 0.7587833623715089, 'recall': 0.7541254125412541, 'f1-score': 0.7523088094589566, 'support': 606.0}}"
3,0.0157,1.084889,0.73722,"{np.str_('Irrelevant'): {'precision': 0.7755102040816326, 'recall': 0.7676767676767676, 'f1-score': 0.7715736040609137, 'support': 198.0}, np.str_('Smart Economy'): {'precision': 0.7, 'recall': 0.6774193548387096, 'f1-score': 0.6885245901639344, 'support': 62.0}, np.str_('Smart Environment'): {'precision': 0.6811594202898551, 'recall': 0.8867924528301887, 'f1-score': 0.7704918032786885, 'support': 53.0}, np.str_('Smart Governance'): {'precision': 0.7786259541984732, 'recall': 0.7083333333333334, 'f1-score': 0.7418181818181818, 'support': 144.0}, np.str_('Smart Living'): {'precision': 0.6037735849056604, 'recall': 0.5614035087719298, 'f1-score': 0.5818181818181818, 'support': 57.0}, np.str_('Smart Mobility'): {'precision': 0.8032786885245902, 'recall': 0.9245283018867925, 'f1-score': 0.8596491228070176, 'support': 53.0}, np.str_('Smart People'): {'precision': 0.7777777777777778, 'recall': 0.717948717948718, 'f1-score': 0.7466666666666667, 'support': 39.0}, 'accuracy': 0.7458745874587459, 'macro avg': {'precision': 0.7314465185397127, 'recall': 0.74915749104092, 'f1-score': 0.737220307230512, 'support': 606.0}, 'weighted avg': {'precision': 0.746694398107038, 'recall': 0.7458745874587459, 'f1-score': 0.7441634023469007, 'support': 606.0}}"
4,0.0024,1.181189,0.737738,"{np.str_('Irrelevant'): {'precision': 0.8165680473372781, 'recall': 0.696969696969697, 'f1-score': 0.7520435967302452, 'support': 198.0}, np.str_('Smart Economy'): {'precision': 0.6774193548387096, 'recall': 0.6774193548387096, 'f1-score': 0.6774193548387096, 'support': 62.0}, np.str_('Smart Environment'): {'precision': 0.7592592592592593, 'recall': 0.7735849056603774, 'f1-score': 0.7663551401869159, 'support': 53.0}, np.str_('Smart Governance'): {'precision': 0.7414965986394558, 'recall': 0.7569444444444444, 'f1-score': 0.7491408934707904, 'support': 144.0}, np.str_('Smart Living'): {'precision': 0.5714285714285714, 'recall': 0.7017543859649122, 'f1-score': 0.6299212598425197, 'support': 57.0}, np.str_('Smart Mobility'): {'precision': 0.8305084745762712, 'recall': 0.9245283018867925, 'f1-score': 0.875, 'support': 53.0}, np.str_('Smart People'): {'precision': 0.6666666666666666, 'recall': 0.7692307692307693, 'f1-score': 0.7142857142857143, 'support': 39.0}, 'accuracy': 0.740924092409241, 'macro avg': {'precision': 0.7233352818208875, 'recall': 0.7572045512851002, 'f1-score': 0.7377379941935563, 'support': 606.0}, 'weighted avg': {'precision': 0.7479952178903867, 'recall': 0.740924092409241, 'f1-score': 0.7418075873109825, 'support': 606.0}}"
5,0.0015,1.16816,0.735785,"{np.str_('Irrelevant'): {'precision': 0.7801047120418848, 'recall': 0.7525252525252525, 'f1-score': 0.7660668380462725, 'support': 198.0}, np.str_('Smart Economy'): {'precision': 0.7169811320754716, 'recall': 0.6129032258064516, 'f1-score': 0.6608695652173913, 'support': 62.0}, np.str_('Smart Environment'): {'precision': 0.7547169811320755, 'recall': 0.7547169811320755, 'f1-score': 0.7547169811320755, 'support': 53.0}, np.str_('Smart Governance'): {'precision': 0.7397260273972602, 'recall': 0.75, 'f1-score': 0.7448275862068966, 'support': 144.0}, np.str_('Smart Living'): {'precision': 0.578125, 'recall': 0.6491228070175439, 'f1-score': 0.6115702479338843, 'support': 57.0}, np.str_('Smart Mobility'): {'precision': 0.8571428571428571, 'recall': 0.9056603773584906, 'f1-score': 0.8807339449541285, 'support': 53.0}, np.str_('Smart People'): {'precision': 0.6976744186046512, 'recall': 0.7692307692307693, 'f1-score': 0.7317073170731707, 'support': 39.0}, 'accuracy': 0.7425742574257426, 'macro avg': {'precision': 0.7320673040563144, 'recall': 0.7420227732957976, 'f1-score': 0.7357846400805456, 'support': 606.0}, 'weighted avg': {'precision': 0.7442658578751332, 'recall': 0.7425742574257426, 'f1-score': 0.7425500131536092, 'support': 606.0}}"


Trainer is attempting to log a value of "{np.str_('Irrelevant'): {'precision': 0.7988505747126436, 'recall': 0.702020202020202, 'f1-score': 0.7473118279569892, 'support': 198.0}, np.str_('Smart Economy'): {'precision': 0.5974025974025974, 'recall': 0.7419354838709677, 'f1-score': 0.6618705035971223, 'support': 62.0}, np.str_('Smart Environment'): {'precision': 0.8070175438596491, 'recall': 0.8679245283018868, 'f1-score': 0.8363636363636363, 'support': 53.0}, np.str_('Smart Governance'): {'precision': 0.8372093023255814, 'recall': 0.5, 'f1-score': 0.6260869565217392, 'support': 144.0}, np.str_('Smart Living'): {'precision': 0.43373493975903615, 'recall': 0.631578947368421, 'f1-score': 0.5142857142857142, 'support': 57.0}, np.str_('Smart Mobility'): {'precision': 0.7246376811594203, 'recall': 0.9433962264150944, 'f1-score': 0.819672131147541, 'support': 53.0}, np.str_('Smart People'): {'precision': 0.5166666666666667, 'recall': 0.7948717948717948, 'f1-score': 0.6262626262626263, 'support

Trainer is attempting to log a value of "{np.str_('Irrelevant'): {'precision': 0.7375565610859729, 'recall': 0.8232323232323232, 'f1-score': 0.7780429594272077, 'support': 198.0}, np.str_('Smart Economy'): {'precision': 0.75, 'recall': 0.6290322580645161, 'f1-score': 0.6842105263157895, 'support': 62.0}, np.str_('Smart Environment'): {'precision': 0.84, 'recall': 0.7924528301886793, 'f1-score': 0.8155339805825242, 'support': 53.0}, np.str_('Smart Governance'): {'precision': 0.7948717948717948, 'recall': 0.6458333333333334, 'f1-score': 0.7126436781609196, 'support': 144.0}, np.str_('Smart Living'): {'precision': 0.6285714285714286, 'recall': 0.7719298245614035, 'f1-score': 0.6929133858267716, 'support': 57.0}, np.str_('Smart Mobility'): {'precision': 0.8305084745762712, 'recall': 0.9245283018867925, 'f1-score': 0.875, 'support': 53.0}, np.str_('Smart People'): {'precision': 0.7297297297297297, 'recall': 0.6923076923076923, 'f1-score': 0.7105263157894737, 'support': 39.0}, 'accuracy': 0.

Fold 4 complete. Evaluation results: {'eval_loss': 0.9491768479347229, 'eval_macro_f1': 0.7526958351575265, 'eval_detailed_report': {np.str_('Irrelevant'): {'precision': 0.7375565610859729, 'recall': 0.8232323232323232, 'f1-score': 0.7780429594272077, 'support': 198.0}, np.str_('Smart Economy'): {'precision': 0.75, 'recall': 0.6290322580645161, 'f1-score': 0.6842105263157895, 'support': 62.0}, np.str_('Smart Environment'): {'precision': 0.84, 'recall': 0.7924528301886793, 'f1-score': 0.8155339805825242, 'support': 53.0}, np.str_('Smart Governance'): {'precision': 0.7948717948717948, 'recall': 0.6458333333333334, 'f1-score': 0.7126436781609196, 'support': 144.0}, np.str_('Smart Living'): {'precision': 0.6285714285714286, 'recall': 0.7719298245614035, 'f1-score': 0.6929133858267716, 'support': 57.0}, np.str_('Smart Mobility'): {'precision': 0.8305084745762712, 'recall': 0.9245283018867925, 'f1-score': 0.875, 'support': 53.0}, np.str_('Smart People'): {'precision': 0.7297297297297297, 're

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at indobenchmark/indobert-base-p1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Macro F1,Detailed Report
1,0.2394,0.903596,0.7037,"{np.str_('Irrelevant'): {'precision': 0.7953216374269005, 'recall': 0.6868686868686869, 'f1-score': 0.7371273712737128, 'support': 198.0}, np.str_('Smart Economy'): {'precision': 0.6363636363636364, 'recall': 0.8032786885245902, 'f1-score': 0.7101449275362319, 'support': 61.0}, np.str_('Smart Environment'): {'precision': 0.7796610169491526, 'recall': 0.8679245283018868, 'f1-score': 0.8214285714285714, 'support': 53.0}, np.str_('Smart Governance'): {'precision': 0.83, 'recall': 0.5763888888888888, 'f1-score': 0.680327868852459, 'support': 144.0}, np.str_('Smart Living'): {'precision': 0.4927536231884058, 'recall': 0.5862068965517241, 'f1-score': 0.5354330708661418, 'support': 58.0}, np.str_('Smart Mobility'): {'precision': 0.8103448275862069, 'recall': 0.8867924528301887, 'f1-score': 0.8468468468468469, 'support': 53.0}, np.str_('Smart People'): {'precision': 0.4583333333333333, 'recall': 0.8461538461538461, 'f1-score': 0.5945945945945946, 'support': 39.0}, 'accuracy': 0.7062706270627063, 'macro avg': {'precision': 0.6861111535496621, 'recall': 0.7505162840171159, 'f1-score': 0.7037004644855083, 'support': 606.0}, 'weighted avg': {'precision': 0.7368595477458906, 'recall': 0.7062706270627063, 'f1-score': 0.7094062337866038, 'support': 606.0}}"
2,0.1264,1.049845,0.722179,"{np.str_('Irrelevant'): {'precision': 0.8414634146341463, 'recall': 0.696969696969697, 'f1-score': 0.7624309392265194, 'support': 198.0}, np.str_('Smart Economy'): {'precision': 0.7241379310344828, 'recall': 0.6885245901639344, 'f1-score': 0.7058823529411765, 'support': 61.0}, np.str_('Smart Environment'): {'precision': 0.7619047619047619, 'recall': 0.9056603773584906, 'f1-score': 0.8275862068965517, 'support': 53.0}, np.str_('Smart Governance'): {'precision': 0.6470588235294118, 'recall': 0.8402777777777778, 'f1-score': 0.7311178247734139, 'support': 144.0}, np.str_('Smart Living'): {'precision': 0.6666666666666666, 'recall': 0.5172413793103449, 'f1-score': 0.5825242718446602, 'support': 58.0}, np.str_('Smart Mobility'): {'precision': 0.8545454545454545, 'recall': 0.8867924528301887, 'f1-score': 0.8703703703703703, 'support': 53.0}, np.str_('Smart People'): {'precision': 0.6176470588235294, 'recall': 0.5384615384615384, 'f1-score': 0.5753424657534246, 'support': 39.0}, 'accuracy': 0.7376237623762376, 'macro avg': {'precision': 0.7304891587340647, 'recall': 0.7248468304102816, 'f1-score': 0.7221792045437309, 'support': 606.0}, 'weighted avg': {'precision': 0.7465105675108012, 'recall': 0.7376237623762376, 'f1-score': 0.7351775227560312, 'support': 606.0}}"
3,0.0142,1.119253,0.720429,"{np.str_('Irrelevant'): {'precision': 0.7892156862745098, 'recall': 0.8131313131313131, 'f1-score': 0.8009950248756219, 'support': 198.0}, np.str_('Smart Economy'): {'precision': 0.6875, 'recall': 0.7213114754098361, 'f1-score': 0.704, 'support': 61.0}, np.str_('Smart Environment'): {'precision': 0.7586206896551724, 'recall': 0.8301886792452831, 'f1-score': 0.7927927927927928, 'support': 53.0}, np.str_('Smart Governance'): {'precision': 0.7769230769230769, 'recall': 0.7013888888888888, 'f1-score': 0.7372262773722628, 'support': 144.0}, np.str_('Smart Living'): {'precision': 0.5689655172413793, 'recall': 0.5689655172413793, 'f1-score': 0.5689655172413793, 'support': 58.0}, np.str_('Smart Mobility'): {'precision': 0.8035714285714286, 'recall': 0.8490566037735849, 'f1-score': 0.8256880733944955, 'support': 53.0}, np.str_('Smart People'): {'precision': 0.6388888888888888, 'recall': 0.5897435897435898, 'f1-score': 0.6133333333333333, 'support': 39.0}, 'accuracy': 0.7442244224422442, 'macro avg': {'precision': 0.7176693267934936, 'recall': 0.7248265810619821, 'f1-score': 0.7204287170014122, 'support': 606.0}, 'weighted avg': {'precision': 0.7438811516368854, 'recall': 0.7442244224422442, 'f1-score': 0.7432361134899427, 'support': 606.0}}"
4,0.004,1.182754,0.741785,"{np.str_('Irrelevant'): {'precision': 0.8279569892473119, 'recall': 0.7777777777777778, 'f1-score': 0.8020833333333334, 'support': 198.0}, np.str_('Smart Economy'): {'precision': 0.7301587301587301, 'recall': 0.7540983606557377, 'f1-score': 0.7419354838709677, 'support': 61.0}, np.str_('Smart Environment'): {'precision': 0.7619047619047619, 'recall': 0.9056603773584906, 'f1-score': 0.8275862068965517, 'support': 53.0}, np.str_('Smart Governance'): {'precision': 0.7569444444444444, 'recall': 0.7569444444444444, 'f1-score': 0.7569444444444444, 'support': 144.0}, np.str_('Smart Living'): {'precision': 0.5932203389830508, 'recall': 0.603448275862069, 'f1-score': 0.5982905982905983, 'support': 58.0}, np.str_('Smart Mobility'): {'precision': 0.8214285714285714, 'recall': 0.8679245283018868, 'f1-score': 0.8440366972477065, 'support': 53.0}, np.str_('Smart People'): {'precision': 0.6571428571428571, 'recall': 0.5897435897435898, 'f1-score': 0.6216216216216216, 'support': 39.0}, 'accuracy': 0.7607260726072608, 'macro avg': {'precision': 0.7355366704728183, 'recall': 0.7507996220205708, 'f1-score': 0.7417854836721748, 'support': 606.0}, 'weighted avg': {'precision': 0.7614309969750913, 'recall': 0.7607260726072608, 'f1-score': 0.7600836243892287, 'support': 606.0}}"


Trainer is attempting to log a value of "{np.str_('Irrelevant'): {'precision': 0.7953216374269005, 'recall': 0.6868686868686869, 'f1-score': 0.7371273712737128, 'support': 198.0}, np.str_('Smart Economy'): {'precision': 0.6363636363636364, 'recall': 0.8032786885245902, 'f1-score': 0.7101449275362319, 'support': 61.0}, np.str_('Smart Environment'): {'precision': 0.7796610169491526, 'recall': 0.8679245283018868, 'f1-score': 0.8214285714285714, 'support': 53.0}, np.str_('Smart Governance'): {'precision': 0.83, 'recall': 0.5763888888888888, 'f1-score': 0.680327868852459, 'support': 144.0}, np.str_('Smart Living'): {'precision': 0.4927536231884058, 'recall': 0.5862068965517241, 'f1-score': 0.5354330708661418, 'support': 58.0}, np.str_('Smart Mobility'): {'precision': 0.8103448275862069, 'recall': 0.8867924528301887, 'f1-score': 0.8468468468468469, 'support': 53.0}, np.str_('Smart People'): {'precision': 0.4583333333333333, 'recall': 0.8461538461538461, 'f1-score': 0.5945945945945946, 'suppo

Epoch,Training Loss,Validation Loss,Macro F1,Detailed Report
1,0.2394,0.903596,0.7037,"{np.str_('Irrelevant'): {'precision': 0.7953216374269005, 'recall': 0.6868686868686869, 'f1-score': 0.7371273712737128, 'support': 198.0}, np.str_('Smart Economy'): {'precision': 0.6363636363636364, 'recall': 0.8032786885245902, 'f1-score': 0.7101449275362319, 'support': 61.0}, np.str_('Smart Environment'): {'precision': 0.7796610169491526, 'recall': 0.8679245283018868, 'f1-score': 0.8214285714285714, 'support': 53.0}, np.str_('Smart Governance'): {'precision': 0.83, 'recall': 0.5763888888888888, 'f1-score': 0.680327868852459, 'support': 144.0}, np.str_('Smart Living'): {'precision': 0.4927536231884058, 'recall': 0.5862068965517241, 'f1-score': 0.5354330708661418, 'support': 58.0}, np.str_('Smart Mobility'): {'precision': 0.8103448275862069, 'recall': 0.8867924528301887, 'f1-score': 0.8468468468468469, 'support': 53.0}, np.str_('Smart People'): {'precision': 0.4583333333333333, 'recall': 0.8461538461538461, 'f1-score': 0.5945945945945946, 'support': 39.0}, 'accuracy': 0.7062706270627063, 'macro avg': {'precision': 0.6861111535496621, 'recall': 0.7505162840171159, 'f1-score': 0.7037004644855083, 'support': 606.0}, 'weighted avg': {'precision': 0.7368595477458906, 'recall': 0.7062706270627063, 'f1-score': 0.7094062337866038, 'support': 606.0}}"
2,0.1264,1.049845,0.722179,"{np.str_('Irrelevant'): {'precision': 0.8414634146341463, 'recall': 0.696969696969697, 'f1-score': 0.7624309392265194, 'support': 198.0}, np.str_('Smart Economy'): {'precision': 0.7241379310344828, 'recall': 0.6885245901639344, 'f1-score': 0.7058823529411765, 'support': 61.0}, np.str_('Smart Environment'): {'precision': 0.7619047619047619, 'recall': 0.9056603773584906, 'f1-score': 0.8275862068965517, 'support': 53.0}, np.str_('Smart Governance'): {'precision': 0.6470588235294118, 'recall': 0.8402777777777778, 'f1-score': 0.7311178247734139, 'support': 144.0}, np.str_('Smart Living'): {'precision': 0.6666666666666666, 'recall': 0.5172413793103449, 'f1-score': 0.5825242718446602, 'support': 58.0}, np.str_('Smart Mobility'): {'precision': 0.8545454545454545, 'recall': 0.8867924528301887, 'f1-score': 0.8703703703703703, 'support': 53.0}, np.str_('Smart People'): {'precision': 0.6176470588235294, 'recall': 0.5384615384615384, 'f1-score': 0.5753424657534246, 'support': 39.0}, 'accuracy': 0.7376237623762376, 'macro avg': {'precision': 0.7304891587340647, 'recall': 0.7248468304102816, 'f1-score': 0.7221792045437309, 'support': 606.0}, 'weighted avg': {'precision': 0.7465105675108012, 'recall': 0.7376237623762376, 'f1-score': 0.7351775227560312, 'support': 606.0}}"
3,0.0142,1.119253,0.720429,"{np.str_('Irrelevant'): {'precision': 0.7892156862745098, 'recall': 0.8131313131313131, 'f1-score': 0.8009950248756219, 'support': 198.0}, np.str_('Smart Economy'): {'precision': 0.6875, 'recall': 0.7213114754098361, 'f1-score': 0.704, 'support': 61.0}, np.str_('Smart Environment'): {'precision': 0.7586206896551724, 'recall': 0.8301886792452831, 'f1-score': 0.7927927927927928, 'support': 53.0}, np.str_('Smart Governance'): {'precision': 0.7769230769230769, 'recall': 0.7013888888888888, 'f1-score': 0.7372262773722628, 'support': 144.0}, np.str_('Smart Living'): {'precision': 0.5689655172413793, 'recall': 0.5689655172413793, 'f1-score': 0.5689655172413793, 'support': 58.0}, np.str_('Smart Mobility'): {'precision': 0.8035714285714286, 'recall': 0.8490566037735849, 'f1-score': 0.8256880733944955, 'support': 53.0}, np.str_('Smart People'): {'precision': 0.6388888888888888, 'recall': 0.5897435897435898, 'f1-score': 0.6133333333333333, 'support': 39.0}, 'accuracy': 0.7442244224422442, 'macro avg': {'precision': 0.7176693267934936, 'recall': 0.7248265810619821, 'f1-score': 0.7204287170014122, 'support': 606.0}, 'weighted avg': {'precision': 0.7438811516368854, 'recall': 0.7442244224422442, 'f1-score': 0.7432361134899427, 'support': 606.0}}"
4,0.004,1.182754,0.741785,"{np.str_('Irrelevant'): {'precision': 0.8279569892473119, 'recall': 0.7777777777777778, 'f1-score': 0.8020833333333334, 'support': 198.0}, np.str_('Smart Economy'): {'precision': 0.7301587301587301, 'recall': 0.7540983606557377, 'f1-score': 0.7419354838709677, 'support': 61.0}, np.str_('Smart Environment'): {'precision': 0.7619047619047619, 'recall': 0.9056603773584906, 'f1-score': 0.8275862068965517, 'support': 53.0}, np.str_('Smart Governance'): {'precision': 0.7569444444444444, 'recall': 0.7569444444444444, 'f1-score': 0.7569444444444444, 'support': 144.0}, np.str_('Smart Living'): {'precision': 0.5932203389830508, 'recall': 0.603448275862069, 'f1-score': 0.5982905982905983, 'support': 58.0}, np.str_('Smart Mobility'): {'precision': 0.8214285714285714, 'recall': 0.8679245283018868, 'f1-score': 0.8440366972477065, 'support': 53.0}, np.str_('Smart People'): {'precision': 0.6571428571428571, 'recall': 0.5897435897435898, 'f1-score': 0.6216216216216216, 'support': 39.0}, 'accuracy': 0.7607260726072608, 'macro avg': {'precision': 0.7355366704728183, 'recall': 0.7507996220205708, 'f1-score': 0.7417854836721748, 'support': 606.0}, 'weighted avg': {'precision': 0.7614309969750913, 'recall': 0.7607260726072608, 'f1-score': 0.7600836243892287, 'support': 606.0}}"
5,0.0012,1.174307,0.735685,"{np.str_('Irrelevant'): {'precision': 0.8109452736318408, 'recall': 0.8232323232323232, 'f1-score': 0.8170426065162907, 'support': 198.0}, np.str_('Smart Economy'): {'precision': 0.7192982456140351, 'recall': 0.6721311475409836, 'f1-score': 0.6949152542372882, 'support': 61.0}, np.str_('Smart Environment'): {'precision': 0.7586206896551724, 'recall': 0.8301886792452831, 'f1-score': 0.7927927927927928, 'support': 53.0}, np.str_('Smart Governance'): {'precision': 0.782608695652174, 'recall': 0.75, 'f1-score': 0.7659574468085106, 'support': 144.0}, np.str_('Smart Living'): {'precision': 0.5862068965517241, 'recall': 0.5862068965517241, 'f1-score': 0.5862068965517241, 'support': 58.0}, np.str_('Smart Mobility'): {'precision': 0.8363636363636363, 'recall': 0.8679245283018868, 'f1-score': 0.8518518518518519, 'support': 53.0}, np.str_('Smart People'): {'precision': 0.6410256410256411, 'recall': 0.6410256410256411, 'f1-score': 0.6410256410256411, 'support': 39.0}, 'accuracy': 0.7607260726072608, 'macro avg': {'precision': 0.7335812969277463, 'recall': 0.7386727451282632, 'f1-score': 0.7356846413977286, 'support': 606.0}, 'weighted avg': {'precision': 0.7601884135552319, 'recall': 0.7607260726072608, 'f1-score': 0.760112714695201, 'support': 606.0}}"


Trainer is attempting to log a value of "{np.str_('Irrelevant'): {'precision': 0.8109452736318408, 'recall': 0.8232323232323232, 'f1-score': 0.8170426065162907, 'support': 198.0}, np.str_('Smart Economy'): {'precision': 0.7192982456140351, 'recall': 0.6721311475409836, 'f1-score': 0.6949152542372882, 'support': 61.0}, np.str_('Smart Environment'): {'precision': 0.7586206896551724, 'recall': 0.8301886792452831, 'f1-score': 0.7927927927927928, 'support': 53.0}, np.str_('Smart Governance'): {'precision': 0.782608695652174, 'recall': 0.75, 'f1-score': 0.7659574468085106, 'support': 144.0}, np.str_('Smart Living'): {'precision': 0.5862068965517241, 'recall': 0.5862068965517241, 'f1-score': 0.5862068965517241, 'support': 58.0}, np.str_('Smart Mobility'): {'precision': 0.8363636363636363, 'recall': 0.8679245283018868, 'f1-score': 0.8518518518518519, 'support': 53.0}, np.str_('Smart People'): {'precision': 0.6410256410256411, 'recall': 0.6410256410256411, 'f1-score': 0.6410256410256411, 'suppo

Trainer is attempting to log a value of "{np.str_('Irrelevant'): {'precision': 0.8279569892473119, 'recall': 0.7777777777777778, 'f1-score': 0.8020833333333334, 'support': 198.0}, np.str_('Smart Economy'): {'precision': 0.7301587301587301, 'recall': 0.7540983606557377, 'f1-score': 0.7419354838709677, 'support': 61.0}, np.str_('Smart Environment'): {'precision': 0.7619047619047619, 'recall': 0.9056603773584906, 'f1-score': 0.8275862068965517, 'support': 53.0}, np.str_('Smart Governance'): {'precision': 0.7569444444444444, 'recall': 0.7569444444444444, 'f1-score': 0.7569444444444444, 'support': 144.0}, np.str_('Smart Living'): {'precision': 0.5932203389830508, 'recall': 0.603448275862069, 'f1-score': 0.5982905982905983, 'support': 58.0}, np.str_('Smart Mobility'): {'precision': 0.8214285714285714, 'recall': 0.8679245283018868, 'f1-score': 0.8440366972477065, 'support': 53.0}, np.str_('Smart People'): {'precision': 0.6571428571428571, 'recall': 0.5897435897435898, 'f1-score': 0.6216216216

Fold 5 complete. Evaluation results: {'eval_loss': 1.1827536821365356, 'eval_macro_f1': 0.7417854836721748, 'eval_detailed_report': {np.str_('Irrelevant'): {'precision': 0.8279569892473119, 'recall': 0.7777777777777778, 'f1-score': 0.8020833333333334, 'support': 198.0}, np.str_('Smart Economy'): {'precision': 0.7301587301587301, 'recall': 0.7540983606557377, 'f1-score': 0.7419354838709677, 'support': 61.0}, np.str_('Smart Environment'): {'precision': 0.7619047619047619, 'recall': 0.9056603773584906, 'f1-score': 0.8275862068965517, 'support': 53.0}, np.str_('Smart Governance'): {'precision': 0.7569444444444444, 'recall': 0.7569444444444444, 'f1-score': 0.7569444444444444, 'support': 144.0}, np.str_('Smart Living'): {'precision': 0.5932203389830508, 'recall': 0.603448275862069, 'f1-score': 0.5982905982905983, 'support': 58.0}, np.str_('Smart Mobility'): {'precision': 0.8214285714285714, 'recall': 0.8679245283018868, 'f1-score': 0.8440366972477065, 'support': 53.0}, np.str_('Smart People'

In [None]:
# Extract the 'eval_macro_f1' score from each fold's result
macro_f1_scores = [result['eval_macro_f1'] for result in fold_results]

# Calculate the mean and standard deviation
mean_macro_f1 = np.mean(macro_f1_scores)
std_macro_f1 = np.std(macro_f1_scores)

print("--- Final Aggregated Results (5-Fold CV) for IndoBERT (Improved) ---")
print(f"Macro F1-Score = {mean_macro_f1:.4f} ± {std_macro_f1:.4f}")

--- Final Aggregated Results (5-Fold CV) for IndoBERT (Improved) ---
Macro F1-Score = 0.7453 ± 0.0049


In [None]:
# Prepare the results dictionary for saving
final_results = {
    'model': 'IndoBERT (Improved)',
    'mean_macro_f1': mean_macro_f1,
    'std_dev_macro_f1': std_macro_f1,
    'results_per_fold': fold_results
}

# Define the output file path
results_file_path = os.path.join(GDRIVE_PATH, 'results_indobert.json')

# Save to a JSON file
with open(results_file_path, 'w') as f:
    json.dump(final_results, f, indent=4)

print(f"\n✅ Final results for IndoBERT have been saved to: '{results_file_path}'")


✅ Final results for IndoBERT have been saved to: '/content/drive/MyDrive/eecsi_revise/results_indobert.json'
