In [None]:
# Cell 1 (REVISED): Uninstall and Reinstall Key Libraries

# Step 1: Clean up any potentially conflicting versions
print("--> Uninstalling existing versions...")
!pip uninstall -y transformers accelerate datasets

# Step 2: Reinstall a stable, compatible set of the core libraries
print("\n--> Reinstalling core libraries...")
!pip install transformers accelerate datasets

# Step 3: Install the remaining libraries
print("\n--> Installing other required libraries...")
!pip install scikit-learn pandas imbalanced-learn -q

print("\n✅ All libraries have been reinstalled.")

--> Uninstalling existing versions...
Found existing installation: transformers 4.55.1
Uninstalling transformers-4.55.1:
  Successfully uninstalled transformers-4.55.1
Found existing installation: accelerate 1.10.0
Uninstalling accelerate-1.10.0:
  Successfully uninstalled accelerate-1.10.0
Found existing installation: datasets 4.0.0
Uninstalling datasets-4.0.0:
  Successfully uninstalled datasets-4.0.0

--> Reinstalling core libraries...
Collecting transformers
  Downloading transformers-4.55.2-py3-none-any.whl.metadata (41 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.0/42.0 kB[0m [31m1.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting accelerate
  Downloading accelerate-1.10.0-py3-none-any.whl.metadata (19 kB)
Collecting datasets
  Downloading datasets-4.0.0-py3-none-any.whl.metadata (19 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=2.0.0->accelerate)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.

In [None]:
# <-- NEW: Check library versions
# This output should be noted for the 'reproducibility' section of the paper
!pip freeze | grep -E "transformers|torch|scikit-learn|imbalanced-learn|datasets"

datasets==4.0.0
imbalanced-learn==0.13.0
scikit-learn==1.6.1
sentence-transformers==5.1.0
tensorflow-datasets==4.9.9
torch @ https://download.pytorch.org/whl/cu124/torch-2.6.0%2Bcu124-cp311-cp311-linux_x86_64.whl
torchao==0.10.0
torchaudio @ https://download.pytorch.org/whl/cu124/torchaudio-2.6.0%2Bcu124-cp311-cp311-linux_x86_64.whl
torchdata==0.11.0
torchsummary==1.5.1
torchtune==0.6.1
torchvision @ https://download.pytorch.org/whl/cu124/torchvision-0.21.0%2Bcu124-cp311-cp311-linux_x86_64.whl
transformers==4.55.2
vega-datasets==0.9.0


In [None]:
from google.colab import drive
import pandas as pd
import numpy as np
import pickle
import os
import json
import torch
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import classification_report
from sklearn.utils.class_weight import compute_class_weight
from imblearn.over_sampling import RandomOverSampler
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments
from datasets import Dataset

# Mount Google Drive
drive.mount('/content/drive')

# --- KEY SETTINGS ---
SEED = 42
GDRIVE_PATH = '/content/drive/MyDrive/eecsi_revise/'
MODEL_NAME = "indolem/indobertweet-base-uncased"

# Set seed for reproducibility across all libraries
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)

print(f"✅ Setup complete. Working inside folder: {GDRIVE_PATH}")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
✅ Setup complete. Working inside folder: /content/drive/MyDrive/eecsi_revise/


In [None]:
# Define file paths
file_path_csv = os.path.join(GDRIVE_PATH, 'final_golden_dataset_eecsi.csv')
file_path_split = os.path.join(GDRIVE_PATH, 'kfold_splits.pkl')

# Load the dataset and the split file
try:
    df = pd.read_csv(file_path_csv)
    with open(file_path_split, 'rb') as f:
        kfold_splits = pickle.load(f)
    print("✅ Successfully loaded dataset and 5-fold splits.")
except FileNotFoundError as e:
    print(f"❌ ERROR: File not found. Please ensure '{e.filename}' is in the 'eecsi_revise' folder in your Google Drive.")

✅ Successfully loaded dataset and 5-fold splits.


In [None]:
# Cell 5: Prepare Helper Functions & Classes

# 1. Custom PyTorch Dataset Class
class AspectDataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)

# 2. Function to compute metrics
def compute_metrics(p):
    preds = np.argmax(p.predictions, axis=1)
    report = classification_report(p.label_ids, preds, output_dict=True, zero_division=0)
    # The labels parameter is needed for a consistent report structure
    class_labels = [id2label[i] for i in range(len(id2label))]
    detailed_report = classification_report(p.label_ids, preds, labels=list(range(len(class_labels))), target_names=class_labels, output_dict=True, zero_division=0)
    return {"macro_f1": detailed_report["macro avg"]["f1-score"], "detailed_report": detailed_report}

# 3. Custom Trainer for Weighted Loss
class WeightedLossTrainer(Trainer):
    def __init__(self, *args, class_weights=None, **kwargs):
        super().__init__(*args, **kwargs)
        self.class_weights = class_weights

    # --- THE CHANGE IS HERE: Added **kwargs ---
    def compute_loss(self, model, inputs, return_outputs=False, **kwargs):
        labels = inputs.get("labels")
        outputs = model(**inputs)
        logits = outputs.get("logits")
        # Move class_weights to the same device as logits
        weights = self.class_weights.to(logits.device)
        loss_fct = torch.nn.CrossEntropyLoss(weight=weights)
        loss = loss_fct(logits.view(-1, self.model.config.num_labels), labels.view(-1))
        return (loss, outputs) if return_outputs else loss

print("✅ Helper functions and classes are ready.")

✅ Helper functions and classes are ready.


In [None]:
# List to store the evaluation results from each fold
fold_results = []
# Create label mappings for consistency
labels = np.array(sorted(df['aspect'].unique()))
label2id = {label: i for i, label in enumerate(labels)}
id2label = {i: label for i, label in enumerate(labels)}

for i, fold in enumerate(kfold_splits):
    print(f"--- Running Fold {i+1}/5 ---")

    # 1. Split data for the current fold
    train_index, test_index = fold['train'], fold['test']
    train_df = df.iloc[train_index]
    test_df = df.iloc[test_index]

    # 2. Apply Random Oversampling ONLY on the training data
    ros = RandomOverSampler(random_state=SEED)
    X_train_resampled, y_train_resampled = ros.fit_resample(train_df[['cleaned_text']], train_df['aspect'])
    train_df_resampled = pd.concat([X_train_resampled, y_train_resampled], axis=1)

    # 3. Compute Class Weights from the ORIGINAL imbalanced training data for this fold
    class_weights = compute_class_weight('balanced', classes=labels, y=train_df['aspect'])
    class_weights_tensor = torch.tensor(class_weights, dtype=torch.float)

    # 4. Tokenize data
    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
    train_encodings = tokenizer(list(train_df_resampled['cleaned_text']), truncation=True, padding=True, max_length=128)
    test_encodings = tokenizer(list(test_df['cleaned_text']), truncation=True, padding=True, max_length=128)

    # Convert labels to integers
    train_labels = [label2id[label] for label in train_df_resampled['aspect']]
    test_labels = [label2id[label] for label in test_df['aspect']]

    # Create PyTorch Datasets
    train_dataset = AspectDataset(train_encodings, train_labels)
    test_dataset = AspectDataset(test_encodings, test_labels)

    # 5. Initialize the model for each fold to prevent weight leakage
    model = AutoModelForSequenceClassification.from_pretrained(
        MODEL_NAME,
        num_labels=len(labels),
        label2id=label2id,
        id2label=id2label
    )

    # <-- NEW: Print model parameter count (only for the first fold)
    if i == 0:
        print(f"Model Parameters: {model.num_parameters()/1e6:.2f}M")

    # 6. Define Training Arguments
    training_args = TrainingArguments(
        output_dir=os.path.join(GDRIVE_PATH, f'results_fold_{i+1}'),
        num_train_epochs=5,
        per_device_train_batch_size=16,
        per_device_eval_batch_size=16,
        learning_rate=2e-5,
        eval_strategy="epoch",
        save_strategy="epoch",
        load_best_model_at_end=True,
        metric_for_best_model="macro_f1",
        weight_decay=0.01,
        logging_dir='./logs',
        logging_steps=10,
        seed=SEED,
    )

    # 7. Use the custom WeightedLossTrainer
    trainer = WeightedLossTrainer(
        model=model,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=test_dataset,
        compute_metrics=compute_metrics,
        class_weights=class_weights_tensor,
    )

    # 8. Train the model
    trainer.train()

    # 9. Evaluate and store the results
    eval_results = trainer.evaluate()
    fold_results.append(eval_results)
    print(f"Fold {i+1} complete. Evaluation results: {eval_results}")

print("\n✅ 5-fold cross-validation process for IndoBERT finished.")

--- Running Fold 1/5 ---


tokenizer_config.json:   0%|          | 0.00/2.00 [00:00<?, ?B/s]

config.json: 0.00B [00:00, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/445M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at indolem/indobertweet-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Model Parameters: 110.56M


model.safetensors:   0%|          | 0.00/445M [00:00<?, ?B/s]

<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize?ref=models
[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mitaeyeong2532[0m ([33mitaeyeong2532-telkom-university[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Epoch,Training Loss,Validation Loss,Macro F1,Detailed Report
1,0.2697,1.06022,0.660523,"{np.str_('Irrelevant'): {'precision': 0.9387755102040817, 'recall': 0.4623115577889447, 'f1-score': 0.6195286195286195, 'support': 199.0}, np.str_('Smart Economy'): {'precision': 0.5934065934065934, 'recall': 0.8852459016393442, 'f1-score': 0.7105263157894737, 'support': 61.0}, np.str_('Smart Environment'): {'precision': 0.7586206896551724, 'recall': 0.8301886792452831, 'f1-score': 0.7927927927927928, 'support': 53.0}, np.str_('Smart Governance'): {'precision': 0.75, 'recall': 0.5625, 'f1-score': 0.6428571428571429, 'support': 144.0}, np.str_('Smart Living'): {'precision': 0.34328358208955223, 'recall': 0.7931034482758621, 'f1-score': 0.4791666666666667, 'support': 58.0}, np.str_('Smart Mobility'): {'precision': 0.7352941176470589, 'recall': 0.9615384615384616, 'f1-score': 0.8333333333333334, 'support': 52.0}, np.str_('Smart People'): {'precision': 0.4897959183673469, 'recall': 0.6153846153846154, 'f1-score': 0.5454545454545454, 'support': 39.0}, 'accuracy': 0.6452145214521452, 'macro avg': {'precision': 0.6584537730528293, 'recall': 0.7300389519817873, 'f1-score': 0.6605227737746535, 'support': 606.0}, 'weighted avg': {'precision': 0.7400475379130464, 'recall': 0.6452145214521452, 'f1-score': 0.6495304858276023, 'support': 606.0}}"
2,0.1009,0.932051,0.745176,"{np.str_('Irrelevant'): {'precision': 0.9084507042253521, 'recall': 0.6482412060301508, 'f1-score': 0.7565982404692082, 'support': 199.0}, np.str_('Smart Economy'): {'precision': 0.6901408450704225, 'recall': 0.8032786885245902, 'f1-score': 0.7424242424242424, 'support': 61.0}, np.str_('Smart Environment'): {'precision': 0.8, 'recall': 0.8301886792452831, 'f1-score': 0.8148148148148148, 'support': 53.0}, np.str_('Smart Governance'): {'precision': 0.6927710843373494, 'recall': 0.7986111111111112, 'f1-score': 0.7419354838709677, 'support': 144.0}, np.str_('Smart Living'): {'precision': 0.6612903225806451, 'recall': 0.7068965517241379, 'f1-score': 0.6833333333333333, 'support': 58.0}, np.str_('Smart Mobility'): {'precision': 0.8333333333333334, 'recall': 0.9615384615384616, 'f1-score': 0.8928571428571429, 'support': 52.0}, np.str_('Smart People'): {'precision': 0.52, 'recall': 0.6666666666666666, 'f1-score': 0.5842696629213483, 'support': 39.0}, 'accuracy': 0.7491749174917491, 'macro avg': {'precision': 0.7294266127924433, 'recall': 0.7736316235486288, 'f1-score': 0.745176131527294, 'support': 606.0}, 'weighted avg': {'precision': 0.7706394222404785, 'recall': 0.7491749174917491, 'f1-score': 0.7503683912866222, 'support': 606.0}}"
3,0.0169,0.964995,0.762684,"{np.str_('Irrelevant'): {'precision': 0.8855421686746988, 'recall': 0.7386934673366834, 'f1-score': 0.8054794520547945, 'support': 199.0}, np.str_('Smart Economy'): {'precision': 0.6911764705882353, 'recall': 0.7704918032786885, 'f1-score': 0.7286821705426356, 'support': 61.0}, np.str_('Smart Environment'): {'precision': 0.7857142857142857, 'recall': 0.8301886792452831, 'f1-score': 0.8073394495412844, 'support': 53.0}, np.str_('Smart Governance'): {'precision': 0.7261146496815286, 'recall': 0.7916666666666666, 'f1-score': 0.7574750830564784, 'support': 144.0}, np.str_('Smart Living'): {'precision': 0.7090909090909091, 'recall': 0.6724137931034483, 'f1-score': 0.6902654867256637, 'support': 58.0}, np.str_('Smart Mobility'): {'precision': 0.8305084745762712, 'recall': 0.9423076923076923, 'f1-score': 0.8828828828828829, 'support': 52.0}, np.str_('Smart People'): {'precision': 0.6222222222222222, 'recall': 0.717948717948718, 'f1-score': 0.6666666666666666, 'support': 39.0}, 'accuracy': 0.7722772277227723, 'macro avg': {'precision': 0.7500527400783071, 'recall': 0.78053011712674, 'f1-score': 0.7626844559243438, 'support': 606.0}, 'weighted avg': {'precision': 0.7808059456122941, 'recall': 0.7722772277227723, 'f1-score': 0.7731860301779278, 'support': 606.0}}"
4,0.0294,1.020185,0.772127,"{np.str_('Irrelevant'): {'precision': 0.8722222222222222, 'recall': 0.7889447236180904, 'f1-score': 0.8284960422163589, 'support': 199.0}, np.str_('Smart Economy'): {'precision': 0.7014925373134329, 'recall': 0.7704918032786885, 'f1-score': 0.734375, 'support': 61.0}, np.str_('Smart Environment'): {'precision': 0.8113207547169812, 'recall': 0.8113207547169812, 'f1-score': 0.8113207547169812, 'support': 53.0}, np.str_('Smart Governance'): {'precision': 0.7295597484276729, 'recall': 0.8055555555555556, 'f1-score': 0.7656765676567657, 'support': 144.0}, np.str_('Smart Living'): {'precision': 0.7090909090909091, 'recall': 0.6724137931034483, 'f1-score': 0.6902654867256637, 'support': 58.0}, np.str_('Smart Mobility'): {'precision': 0.8596491228070176, 'recall': 0.9423076923076923, 'f1-score': 0.8990825688073395, 'support': 52.0}, np.str_('Smart People'): {'precision': 0.7142857142857143, 'recall': 0.6410256410256411, 'f1-score': 0.6756756756756757, 'support': 39.0}, 'accuracy': 0.7854785478547854, 'macro avg': {'precision': 0.7710887155519928, 'recall': 0.7760085662294426, 'f1-score': 0.7721274422569692, 'support': 606.0}, 'weighted avg': {'precision': 0.7889538626110677, 'recall': 0.7854785478547854, 'f1-score': 0.7855842513251671, 'support': 606.0}}"
5,0.0033,1.067838,0.761899,"{np.str_('Irrelevant'): {'precision': 0.893491124260355, 'recall': 0.7587939698492462, 'f1-score': 0.8206521739130435, 'support': 199.0}, np.str_('Smart Economy'): {'precision': 0.7121212121212122, 'recall': 0.7704918032786885, 'f1-score': 0.7401574803149606, 'support': 61.0}, np.str_('Smart Environment'): {'precision': 0.8269230769230769, 'recall': 0.8113207547169812, 'f1-score': 0.819047619047619, 'support': 53.0}, np.str_('Smart Governance'): {'precision': 0.725, 'recall': 0.8055555555555556, 'f1-score': 0.7631578947368421, 'support': 144.0}, np.str_('Smart Living'): {'precision': 0.6724137931034483, 'recall': 0.6724137931034483, 'f1-score': 0.6724137931034483, 'support': 58.0}, np.str_('Smart Mobility'): {'precision': 0.819672131147541, 'recall': 0.9615384615384616, 'f1-score': 0.8849557522123894, 'support': 52.0}, np.str_('Smart People'): {'precision': 0.625, 'recall': 0.6410256410256411, 'f1-score': 0.6329113924050633, 'support': 39.0}, 'accuracy': 0.7772277227722773, 'macro avg': {'precision': 0.7535173339365191, 'recall': 0.774448568438289, 'f1-score': 0.7618994436761951, 'support': 606.0}, 'weighted avg': {'precision': 0.7846023128115508, 'recall': 0.7772277227722773, 'f1-score': 0.7779948729016156, 'support': 606.0}}"


Trainer is attempting to log a value of "{np.str_('Irrelevant'): {'precision': 0.9387755102040817, 'recall': 0.4623115577889447, 'f1-score': 0.6195286195286195, 'support': 199.0}, np.str_('Smart Economy'): {'precision': 0.5934065934065934, 'recall': 0.8852459016393442, 'f1-score': 0.7105263157894737, 'support': 61.0}, np.str_('Smart Environment'): {'precision': 0.7586206896551724, 'recall': 0.8301886792452831, 'f1-score': 0.7927927927927928, 'support': 53.0}, np.str_('Smart Governance'): {'precision': 0.75, 'recall': 0.5625, 'f1-score': 0.6428571428571429, 'support': 144.0}, np.str_('Smart Living'): {'precision': 0.34328358208955223, 'recall': 0.7931034482758621, 'f1-score': 0.4791666666666667, 'support': 58.0}, np.str_('Smart Mobility'): {'precision': 0.7352941176470589, 'recall': 0.9615384615384616, 'f1-score': 0.8333333333333334, 'support': 52.0}, np.str_('Smart People'): {'precision': 0.4897959183673469, 'recall': 0.6153846153846154, 'f1-score': 0.5454545454545454, 'support': 39.0}

Trainer is attempting to log a value of "{np.str_('Irrelevant'): {'precision': 0.8722222222222222, 'recall': 0.7889447236180904, 'f1-score': 0.8284960422163589, 'support': 199.0}, np.str_('Smart Economy'): {'precision': 0.7014925373134329, 'recall': 0.7704918032786885, 'f1-score': 0.734375, 'support': 61.0}, np.str_('Smart Environment'): {'precision': 0.8113207547169812, 'recall': 0.8113207547169812, 'f1-score': 0.8113207547169812, 'support': 53.0}, np.str_('Smart Governance'): {'precision': 0.7295597484276729, 'recall': 0.8055555555555556, 'f1-score': 0.7656765676567657, 'support': 144.0}, np.str_('Smart Living'): {'precision': 0.7090909090909091, 'recall': 0.6724137931034483, 'f1-score': 0.6902654867256637, 'support': 58.0}, np.str_('Smart Mobility'): {'precision': 0.8596491228070176, 'recall': 0.9423076923076923, 'f1-score': 0.8990825688073395, 'support': 52.0}, np.str_('Smart People'): {'precision': 0.7142857142857143, 'recall': 0.6410256410256411, 'f1-score': 0.6756756756756757, '

Fold 1 complete. Evaluation results: {'eval_loss': 1.0201854705810547, 'eval_macro_f1': 0.7721274422569692, 'eval_detailed_report': {np.str_('Irrelevant'): {'precision': 0.8722222222222222, 'recall': 0.7889447236180904, 'f1-score': 0.8284960422163589, 'support': 199.0}, np.str_('Smart Economy'): {'precision': 0.7014925373134329, 'recall': 0.7704918032786885, 'f1-score': 0.734375, 'support': 61.0}, np.str_('Smart Environment'): {'precision': 0.8113207547169812, 'recall': 0.8113207547169812, 'f1-score': 0.8113207547169812, 'support': 53.0}, np.str_('Smart Governance'): {'precision': 0.7295597484276729, 'recall': 0.8055555555555556, 'f1-score': 0.7656765676567657, 'support': 144.0}, np.str_('Smart Living'): {'precision': 0.7090909090909091, 'recall': 0.6724137931034483, 'f1-score': 0.6902654867256637, 'support': 58.0}, np.str_('Smart Mobility'): {'precision': 0.8596491228070176, 'recall': 0.9423076923076923, 'f1-score': 0.8990825688073395, 'support': 52.0}, np.str_('Smart People'): {'prec

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at indolem/indobertweet-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Macro F1,Detailed Report
1,0.1952,0.869223,0.708238,"{np.str_('Irrelevant'): {'precision': 0.8811188811188811, 'recall': 0.6331658291457286, 'f1-score': 0.7368421052631579, 'support': 199.0}, np.str_('Smart Economy'): {'precision': 0.6043956043956044, 'recall': 0.9016393442622951, 'f1-score': 0.7236842105263158, 'support': 61.0}, np.str_('Smart Environment'): {'precision': 0.7818181818181819, 'recall': 0.8113207547169812, 'f1-score': 0.7962962962962963, 'support': 53.0}, np.str_('Smart Governance'): {'precision': 0.8130841121495327, 'recall': 0.6041666666666666, 'f1-score': 0.6932270916334662, 'support': 144.0}, np.str_('Smart Living'): {'precision': 0.4368932038834951, 'recall': 0.7758620689655172, 'f1-score': 0.5590062111801242, 'support': 58.0}, np.str_('Smart Mobility'): {'precision': 0.9166666666666666, 'recall': 0.8301886792452831, 'f1-score': 0.8712871287128713, 'support': 53.0}, np.str_('Smart People'): {'precision': 0.4745762711864407, 'recall': 0.7368421052631579, 'f1-score': 0.5773195876288659, 'support': 38.0}, 'accuracy': 0.7062706270627063, 'macro avg': {'precision': 0.7012218458884003, 'recall': 0.7561693497522327, 'f1-score': 0.7082375187487282, 'support': 606.0}, 'weighted avg': {'precision': 0.7635120502645979, 'recall': 0.7062706270627063, 'f1-score': 0.7150881899150542, 'support': 606.0}}"
2,0.0719,0.973601,0.706129,"{np.str_('Irrelevant'): {'precision': 0.9230769230769231, 'recall': 0.6633165829145728, 'f1-score': 0.7719298245614035, 'support': 199.0}, np.str_('Smart Economy'): {'precision': 0.6125, 'recall': 0.8032786885245902, 'f1-score': 0.6950354609929078, 'support': 61.0}, np.str_('Smart Environment'): {'precision': 0.7192982456140351, 'recall': 0.7735849056603774, 'f1-score': 0.7454545454545455, 'support': 53.0}, np.str_('Smart Governance'): {'precision': 0.6182795698924731, 'recall': 0.7986111111111112, 'f1-score': 0.696969696969697, 'support': 144.0}, np.str_('Smart Living'): {'precision': 0.5862068965517241, 'recall': 0.5862068965517241, 'f1-score': 0.5862068965517241, 'support': 58.0}, np.str_('Smart Mobility'): {'precision': 0.8135593220338984, 'recall': 0.9056603773584906, 'f1-score': 0.8571428571428571, 'support': 53.0}, np.str_('Smart People'): {'precision': 0.782608695652174, 'recall': 0.47368421052631576, 'f1-score': 0.5901639344262295, 'support': 38.0}, 'accuracy': 0.7211221122112211, 'macro avg': {'precision': 0.7222185218316041, 'recall': 0.7149061103781689, 'f1-score': 0.7061290308713378, 'support': 606.0}, 'weighted avg': {'precision': 0.7509367116781303, 'recall': 0.7211221122112211, 'f1-score': 0.7223411327026108, 'support': 606.0}}"
3,0.0321,1.044998,0.742724,"{np.str_('Irrelevant'): {'precision': 0.888268156424581, 'recall': 0.7989949748743719, 'f1-score': 0.8412698412698413, 'support': 199.0}, np.str_('Smart Economy'): {'precision': 0.5777777777777777, 'recall': 0.8524590163934426, 'f1-score': 0.6887417218543046, 'support': 61.0}, np.str_('Smart Environment'): {'precision': 0.75, 'recall': 0.7924528301886793, 'f1-score': 0.7706422018348624, 'support': 53.0}, np.str_('Smart Governance'): {'precision': 0.7350993377483444, 'recall': 0.7708333333333334, 'f1-score': 0.752542372881356, 'support': 144.0}, np.str_('Smart Living'): {'precision': 0.6545454545454545, 'recall': 0.6206896551724138, 'f1-score': 0.6371681415929203, 'support': 58.0}, np.str_('Smart Mobility'): {'precision': 0.9, 'recall': 0.8490566037735849, 'f1-score': 0.8737864077669902, 'support': 53.0}, np.str_('Smart People'): {'precision': 0.8, 'recall': 0.5263157894736842, 'f1-score': 0.6349206349206349, 'support': 38.0}, 'accuracy': 0.7673267326732673, 'macro avg': {'precision': 0.7579558180708796, 'recall': 0.7444003147442156, 'f1-score': 0.7427244745887014, 'support': 606.0}, 'weighted avg': {'precision': 0.78164644978933, 'recall': 0.7673267326732673, 'f1-score': 0.7690255739092374, 'support': 606.0}}"
4,0.0107,1.088552,0.753183,"{np.str_('Irrelevant'): {'precision': 0.8723404255319149, 'recall': 0.8241206030150754, 'f1-score': 0.8475452196382429, 'support': 199.0}, np.str_('Smart Economy'): {'precision': 0.6911764705882353, 'recall': 0.7704918032786885, 'f1-score': 0.7286821705426356, 'support': 61.0}, np.str_('Smart Environment'): {'precision': 0.711864406779661, 'recall': 0.7924528301886793, 'f1-score': 0.75, 'support': 53.0}, np.str_('Smart Governance'): {'precision': 0.71875, 'recall': 0.7986111111111112, 'f1-score': 0.756578947368421, 'support': 144.0}, np.str_('Smart Living'): {'precision': 0.6730769230769231, 'recall': 0.603448275862069, 'f1-score': 0.6363636363636364, 'support': 58.0}, np.str_('Smart Mobility'): {'precision': 0.8846153846153846, 'recall': 0.8679245283018868, 'f1-score': 0.8761904761904762, 'support': 53.0}, np.str_('Smart People'): {'precision': 0.8148148148148148, 'recall': 0.5789473684210527, 'f1-score': 0.676923076923077, 'support': 38.0}, 'accuracy': 0.7772277227722773, 'macro avg': {'precision': 0.7666626322009906, 'recall': 0.7479995028826519, 'f1-score': 0.7531833610037841, 'support': 606.0}, 'weighted avg': {'precision': 0.7819675954325005, 'recall': 0.7772277227722773, 'f1-score': 0.7770276280568098, 'support': 606.0}}"
5,0.0152,1.093274,0.747553,"{np.str_('Irrelevant'): {'precision': 0.868421052631579, 'recall': 0.8291457286432161, 'f1-score': 0.8483290488431876, 'support': 199.0}, np.str_('Smart Economy'): {'precision': 0.7014925373134329, 'recall': 0.7704918032786885, 'f1-score': 0.734375, 'support': 61.0}, np.str_('Smart Environment'): {'precision': 0.711864406779661, 'recall': 0.7924528301886793, 'f1-score': 0.75, 'support': 53.0}, np.str_('Smart Governance'): {'precision': 0.7354838709677419, 'recall': 0.7916666666666666, 'f1-score': 0.7625418060200669, 'support': 144.0}, np.str_('Smart Living'): {'precision': 0.631578947368421, 'recall': 0.6206896551724138, 'f1-score': 0.6260869565217392, 'support': 58.0}, np.str_('Smart Mobility'): {'precision': 0.8823529411764706, 'recall': 0.8490566037735849, 'f1-score': 0.8653846153846154, 'support': 53.0}, np.str_('Smart People'): {'precision': 0.7777777777777778, 'recall': 0.5526315789473685, 'f1-score': 0.6461538461538462, 'support': 38.0}, 'accuracy': 0.7755775577557755, 'macro avg': {'precision': 0.7584245048592976, 'recall': 0.7437335523815166, 'f1-score': 0.7475530389890651, 'support': 606.0}, 'weighted avg': {'precision': 0.7792032435870585, 'recall': 0.7755775577557755, 'f1-score': 0.7754169142478146, 'support': 606.0}}"


Trainer is attempting to log a value of "{np.str_('Irrelevant'): {'precision': 0.8811188811188811, 'recall': 0.6331658291457286, 'f1-score': 0.7368421052631579, 'support': 199.0}, np.str_('Smart Economy'): {'precision': 0.6043956043956044, 'recall': 0.9016393442622951, 'f1-score': 0.7236842105263158, 'support': 61.0}, np.str_('Smart Environment'): {'precision': 0.7818181818181819, 'recall': 0.8113207547169812, 'f1-score': 0.7962962962962963, 'support': 53.0}, np.str_('Smart Governance'): {'precision': 0.8130841121495327, 'recall': 0.6041666666666666, 'f1-score': 0.6932270916334662, 'support': 144.0}, np.str_('Smart Living'): {'precision': 0.4368932038834951, 'recall': 0.7758620689655172, 'f1-score': 0.5590062111801242, 'support': 58.0}, np.str_('Smart Mobility'): {'precision': 0.9166666666666666, 'recall': 0.8301886792452831, 'f1-score': 0.8712871287128713, 'support': 53.0}, np.str_('Smart People'): {'precision': 0.4745762711864407, 'recall': 0.7368421052631579, 'f1-score': 0.577319587

Trainer is attempting to log a value of "{np.str_('Irrelevant'): {'precision': 0.8723404255319149, 'recall': 0.8241206030150754, 'f1-score': 0.8475452196382429, 'support': 199.0}, np.str_('Smart Economy'): {'precision': 0.6911764705882353, 'recall': 0.7704918032786885, 'f1-score': 0.7286821705426356, 'support': 61.0}, np.str_('Smart Environment'): {'precision': 0.711864406779661, 'recall': 0.7924528301886793, 'f1-score': 0.75, 'support': 53.0}, np.str_('Smart Governance'): {'precision': 0.71875, 'recall': 0.7986111111111112, 'f1-score': 0.756578947368421, 'support': 144.0}, np.str_('Smart Living'): {'precision': 0.6730769230769231, 'recall': 0.603448275862069, 'f1-score': 0.6363636363636364, 'support': 58.0}, np.str_('Smart Mobility'): {'precision': 0.8846153846153846, 'recall': 0.8679245283018868, 'f1-score': 0.8761904761904762, 'support': 53.0}, np.str_('Smart People'): {'precision': 0.8148148148148148, 'recall': 0.5789473684210527, 'f1-score': 0.676923076923077, 'support': 38.0}, 'a

Fold 2 complete. Evaluation results: {'eval_loss': 1.0885519981384277, 'eval_macro_f1': 0.7531833610037841, 'eval_detailed_report': {np.str_('Irrelevant'): {'precision': 0.8723404255319149, 'recall': 0.8241206030150754, 'f1-score': 0.8475452196382429, 'support': 199.0}, np.str_('Smart Economy'): {'precision': 0.6911764705882353, 'recall': 0.7704918032786885, 'f1-score': 0.7286821705426356, 'support': 61.0}, np.str_('Smart Environment'): {'precision': 0.711864406779661, 'recall': 0.7924528301886793, 'f1-score': 0.75, 'support': 53.0}, np.str_('Smart Governance'): {'precision': 0.71875, 'recall': 0.7986111111111112, 'f1-score': 0.756578947368421, 'support': 144.0}, np.str_('Smart Living'): {'precision': 0.6730769230769231, 'recall': 0.603448275862069, 'f1-score': 0.6363636363636364, 'support': 58.0}, np.str_('Smart Mobility'): {'precision': 0.8846153846153846, 'recall': 0.8679245283018868, 'f1-score': 0.8761904761904762, 'support': 53.0}, np.str_('Smart People'): {'precision': 0.81481481

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at indolem/indobertweet-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Macro F1,Detailed Report
1,0.3066,0.996695,0.681337,"{np.str_('Irrelevant'): {'precision': 0.918918918918919, 'recall': 0.5125628140703518, 'f1-score': 0.6580645161290323, 'support': 199.0}, np.str_('Smart Economy'): {'precision': 0.6582278481012658, 'recall': 0.8524590163934426, 'f1-score': 0.7428571428571429, 'support': 61.0}, np.str_('Smart Environment'): {'precision': 0.6923076923076923, 'recall': 0.8490566037735849, 'f1-score': 0.7627118644067796, 'support': 53.0}, np.str_('Smart Governance'): {'precision': 0.7203389830508474, 'recall': 0.5862068965517241, 'f1-score': 0.6463878326996197, 'support': 145.0}, np.str_('Smart Living'): {'precision': 0.3951612903225806, 'recall': 0.8596491228070176, 'f1-score': 0.5414364640883977, 'support': 57.0}, np.str_('Smart Mobility'): {'precision': 0.7258064516129032, 'recall': 0.8490566037735849, 'f1-score': 0.782608695652174, 'support': 53.0}, np.str_('Smart People'): {'precision': 0.574468085106383, 'recall': 0.7105263157894737, 'f1-score': 0.6352941176470588, 'support': 38.0}, 'accuracy': 0.6683168316831684, 'macro avg': {'precision': 0.6693184670600844, 'recall': 0.7456453390227399, 'f1-score': 0.6813372333543148, 'support': 606.0}, 'weighted avg': {'precision': 0.7375906708772872, 'recall': 0.6683168316831684, 'f1-score': 0.6714528131553787, 'support': 606.0}}"
2,0.1038,0.949072,0.744035,"{np.str_('Irrelevant'): {'precision': 0.8881987577639752, 'recall': 0.7185929648241206, 'f1-score': 0.7944444444444444, 'support': 199.0}, np.str_('Smart Economy'): {'precision': 0.6794871794871795, 'recall': 0.8688524590163934, 'f1-score': 0.762589928057554, 'support': 61.0}, np.str_('Smart Environment'): {'precision': 0.746031746031746, 'recall': 0.8867924528301887, 'f1-score': 0.8103448275862069, 'support': 53.0}, np.str_('Smart Governance'): {'precision': 0.746268656716418, 'recall': 0.6896551724137931, 'f1-score': 0.7168458781362007, 'support': 145.0}, np.str_('Smart Living'): {'precision': 0.5844155844155844, 'recall': 0.7894736842105263, 'f1-score': 0.6716417910447762, 'support': 57.0}, np.str_('Smart Mobility'): {'precision': 0.7457627118644068, 'recall': 0.8301886792452831, 'f1-score': 0.7857142857142857, 'support': 53.0}, np.str_('Smart People'): {'precision': 0.7058823529411765, 'recall': 0.631578947368421, 'f1-score': 0.6666666666666666, 'support': 38.0}, 'accuracy': 0.7524752475247525, 'macro avg': {'precision': 0.7280067127457838, 'recall': 0.7735906228441037, 'f1-score': 0.7440354030928764, 'support': 606.0}, 'weighted avg': {'precision': 0.7683325906923741, 'recall': 0.7524752475247525, 'f1-score': 0.7537345392962309, 'support': 606.0}}"
3,0.02,1.102652,0.715186,"{np.str_('Irrelevant'): {'precision': 0.8596491228070176, 'recall': 0.7386934673366834, 'f1-score': 0.7945945945945946, 'support': 199.0}, np.str_('Smart Economy'): {'precision': 0.7954545454545454, 'recall': 0.5737704918032787, 'f1-score': 0.6666666666666666, 'support': 61.0}, np.str_('Smart Environment'): {'precision': 0.75, 'recall': 0.8490566037735849, 'f1-score': 0.7964601769911505, 'support': 53.0}, np.str_('Smart Governance'): {'precision': 0.6166666666666667, 'recall': 0.7655172413793103, 'f1-score': 0.683076923076923, 'support': 145.0}, np.str_('Smart Living'): {'precision': 0.6166666666666667, 'recall': 0.6491228070175439, 'f1-score': 0.6324786324786325, 'support': 57.0}, np.str_('Smart Mobility'): {'precision': 0.7592592592592593, 'recall': 0.7735849056603774, 'f1-score': 0.7663551401869159, 'support': 53.0}, np.str_('Smart People'): {'precision': 0.6756756756756757, 'recall': 0.6578947368421053, 'f1-score': 0.6666666666666666, 'support': 38.0}, 'accuracy': 0.7277227722772277, 'macro avg': {'precision': 0.7247674195042616, 'recall': 0.7153771791161263, 'f1-score': 0.71518554295165, 'support': 606.0}, 'weighted avg': {'precision': 0.7422871052712984, 'recall': 0.7277227722772277, 'f1-score': 0.729457049558082, 'support': 606.0}}"
4,0.0521,1.190307,0.735491,"{np.str_('Irrelevant'): {'precision': 0.8324324324324325, 'recall': 0.7738693467336684, 'f1-score': 0.8020833333333334, 'support': 199.0}, np.str_('Smart Economy'): {'precision': 0.8076923076923077, 'recall': 0.6885245901639344, 'f1-score': 0.7433628318584071, 'support': 61.0}, np.str_('Smart Environment'): {'precision': 0.8035714285714286, 'recall': 0.8490566037735849, 'f1-score': 0.8256880733944955, 'support': 53.0}, np.str_('Smart Governance'): {'precision': 0.6529411764705882, 'recall': 0.7655172413793103, 'f1-score': 0.7047619047619048, 'support': 145.0}, np.str_('Smart Living'): {'precision': 0.6666666666666666, 'recall': 0.631578947368421, 'f1-score': 0.6486486486486487, 'support': 57.0}, np.str_('Smart Mobility'): {'precision': 0.7592592592592593, 'recall': 0.7735849056603774, 'f1-score': 0.7663551401869159, 'support': 53.0}, np.str_('Smart People'): {'precision': 0.6857142857142857, 'recall': 0.631578947368421, 'f1-score': 0.6575342465753424, 'support': 38.0}, 'accuracy': 0.7475247524752475, 'macro avg': {'precision': 0.7440396509724242, 'recall': 0.7305300832068168, 'f1-score': 0.7354905969655782, 'support': 606.0}, 'weighted avg': {'precision': 0.7532787536694545, 'recall': 0.7475247524752475, 'f1-score': 0.748329631897398, 'support': 606.0}}"
5,0.0184,1.247926,0.730803,"{np.str_('Irrelevant'): {'precision': 0.847457627118644, 'recall': 0.7537688442211056, 'f1-score': 0.7978723404255319, 'support': 199.0}, np.str_('Smart Economy'): {'precision': 0.7636363636363637, 'recall': 0.6885245901639344, 'f1-score': 0.7241379310344828, 'support': 61.0}, np.str_('Smart Environment'): {'precision': 0.7894736842105263, 'recall': 0.8490566037735849, 'f1-score': 0.8181818181818182, 'support': 53.0}, np.str_('Smart Governance'): {'precision': 0.6491228070175439, 'recall': 0.7655172413793103, 'f1-score': 0.7025316455696202, 'support': 145.0}, np.str_('Smart Living'): {'precision': 0.6666666666666666, 'recall': 0.631578947368421, 'f1-score': 0.6486486486486487, 'support': 57.0}, np.str_('Smart Mobility'): {'precision': 0.7592592592592593, 'recall': 0.7735849056603774, 'f1-score': 0.7663551401869159, 'support': 53.0}, np.str_('Smart People'): {'precision': 0.6578947368421053, 'recall': 0.6578947368421053, 'f1-score': 0.6578947368421053, 'support': 38.0}, 'accuracy': 0.7425742574257426, 'macro avg': {'precision': 0.733358734964444, 'recall': 0.73141798134412, 'f1-score': 0.7308031801270175, 'support': 606.0}, 'weighted avg': {'precision': 0.7498870280525922, 'recall': 0.7425742574257426, 'f1-score': 0.7438441087655036, 'support': 606.0}}"


Trainer is attempting to log a value of "{np.str_('Irrelevant'): {'precision': 0.918918918918919, 'recall': 0.5125628140703518, 'f1-score': 0.6580645161290323, 'support': 199.0}, np.str_('Smart Economy'): {'precision': 0.6582278481012658, 'recall': 0.8524590163934426, 'f1-score': 0.7428571428571429, 'support': 61.0}, np.str_('Smart Environment'): {'precision': 0.6923076923076923, 'recall': 0.8490566037735849, 'f1-score': 0.7627118644067796, 'support': 53.0}, np.str_('Smart Governance'): {'precision': 0.7203389830508474, 'recall': 0.5862068965517241, 'f1-score': 0.6463878326996197, 'support': 145.0}, np.str_('Smart Living'): {'precision': 0.3951612903225806, 'recall': 0.8596491228070176, 'f1-score': 0.5414364640883977, 'support': 57.0}, np.str_('Smart Mobility'): {'precision': 0.7258064516129032, 'recall': 0.8490566037735849, 'f1-score': 0.782608695652174, 'support': 53.0}, np.str_('Smart People'): {'precision': 0.574468085106383, 'recall': 0.7105263157894737, 'f1-score': 0.635294117647

Trainer is attempting to log a value of "{np.str_('Irrelevant'): {'precision': 0.8881987577639752, 'recall': 0.7185929648241206, 'f1-score': 0.7944444444444444, 'support': 199.0}, np.str_('Smart Economy'): {'precision': 0.6794871794871795, 'recall': 0.8688524590163934, 'f1-score': 0.762589928057554, 'support': 61.0}, np.str_('Smart Environment'): {'precision': 0.746031746031746, 'recall': 0.8867924528301887, 'f1-score': 0.8103448275862069, 'support': 53.0}, np.str_('Smart Governance'): {'precision': 0.746268656716418, 'recall': 0.6896551724137931, 'f1-score': 0.7168458781362007, 'support': 145.0}, np.str_('Smart Living'): {'precision': 0.5844155844155844, 'recall': 0.7894736842105263, 'f1-score': 0.6716417910447762, 'support': 57.0}, np.str_('Smart Mobility'): {'precision': 0.7457627118644068, 'recall': 0.8301886792452831, 'f1-score': 0.7857142857142857, 'support': 53.0}, np.str_('Smart People'): {'precision': 0.7058823529411765, 'recall': 0.631578947368421, 'f1-score': 0.6666666666666

Fold 3 complete. Evaluation results: {'eval_loss': 0.9490715265274048, 'eval_macro_f1': 0.7440354030928764, 'eval_detailed_report': {np.str_('Irrelevant'): {'precision': 0.8881987577639752, 'recall': 0.7185929648241206, 'f1-score': 0.7944444444444444, 'support': 199.0}, np.str_('Smart Economy'): {'precision': 0.6794871794871795, 'recall': 0.8688524590163934, 'f1-score': 0.762589928057554, 'support': 61.0}, np.str_('Smart Environment'): {'precision': 0.746031746031746, 'recall': 0.8867924528301887, 'f1-score': 0.8103448275862069, 'support': 53.0}, np.str_('Smart Governance'): {'precision': 0.746268656716418, 'recall': 0.6896551724137931, 'f1-score': 0.7168458781362007, 'support': 145.0}, np.str_('Smart Living'): {'precision': 0.5844155844155844, 'recall': 0.7894736842105263, 'f1-score': 0.6716417910447762, 'support': 57.0}, np.str_('Smart Mobility'): {'precision': 0.7457627118644068, 'recall': 0.8301886792452831, 'f1-score': 0.7857142857142857, 'support': 53.0}, np.str_('Smart People'):

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at indolem/indobertweet-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Macro F1,Detailed Report
1,0.3857,1.101961,0.632541,"{np.str_('Irrelevant'): {'precision': 0.921875, 'recall': 0.29797979797979796, 'f1-score': 0.45038167938931295, 'support': 198.0}, np.str_('Smart Economy'): {'precision': 0.5151515151515151, 'recall': 0.8225806451612904, 'f1-score': 0.6335403726708074, 'support': 62.0}, np.str_('Smart Environment'): {'precision': 0.7230769230769231, 'recall': 0.8867924528301887, 'f1-score': 0.7966101694915254, 'support': 53.0}, np.str_('Smart Governance'): {'precision': 0.6507936507936508, 'recall': 0.5694444444444444, 'f1-score': 0.6074074074074074, 'support': 144.0}, np.str_('Smart Living'): {'precision': 0.4174757281553398, 'recall': 0.7543859649122807, 'f1-score': 0.5375, 'support': 57.0}, np.str_('Smart Mobility'): {'precision': 0.684931506849315, 'recall': 0.9433962264150944, 'f1-score': 0.7936507936507936, 'support': 53.0}, np.str_('Smart People'): {'precision': 0.4605263157894737, 'recall': 0.8974358974358975, 'f1-score': 0.6086956521739131, 'support': 39.0}, 'accuracy': 0.6056105610561056, 'macro avg': {'precision': 0.6248329485451739, 'recall': 0.7388593470255707, 'f1-score': 0.6325408678262515, 'support': 606.0}, 'weighted avg': {'precision': 0.7006039921789012, 'recall': 0.6056105610561056, 'f1-score': 0.5851191481397132, 'support': 606.0}}"
2,0.101,0.784191,0.750577,"{np.str_('Irrelevant'): {'precision': 0.8081395348837209, 'recall': 0.702020202020202, 'f1-score': 0.7513513513513513, 'support': 198.0}, np.str_('Smart Economy'): {'precision': 0.6410256410256411, 'recall': 0.8064516129032258, 'f1-score': 0.7142857142857143, 'support': 62.0}, np.str_('Smart Environment'): {'precision': 0.851063829787234, 'recall': 0.7547169811320755, 'f1-score': 0.8, 'support': 53.0}, np.str_('Smart Governance'): {'precision': 0.725, 'recall': 0.8055555555555556, 'f1-score': 0.7631578947368421, 'support': 144.0}, np.str_('Smart Living'): {'precision': 0.6938775510204082, 'recall': 0.5964912280701754, 'f1-score': 0.6415094339622641, 'support': 57.0}, np.str_('Smart Mobility'): {'precision': 0.8, 'recall': 0.9056603773584906, 'f1-score': 0.8495575221238938, 'support': 53.0}, np.str_('Smart People'): {'precision': 0.725, 'recall': 0.7435897435897436, 'f1-score': 0.7341772151898734, 'support': 39.0}, 'accuracy': 0.7524752475247525, 'macro avg': {'precision': 0.7491580795310006, 'recall': 0.7592122429470669, 'f1-score': 0.7505770188071342, 'support': 606.0}, 'weighted avg': {'precision': 0.7582303977515727, 'recall': 0.7524752475247525, 'f1-score': 0.7517714793666792, 'support': 606.0}}"
3,0.0343,0.984032,0.725187,"{np.str_('Irrelevant'): {'precision': 0.8482758620689655, 'recall': 0.6212121212121212, 'f1-score': 0.717201166180758, 'support': 198.0}, np.str_('Smart Economy'): {'precision': 0.6875, 'recall': 0.7096774193548387, 'f1-score': 0.6984126984126984, 'support': 62.0}, np.str_('Smart Environment'): {'precision': 0.7288135593220338, 'recall': 0.8113207547169812, 'f1-score': 0.7678571428571429, 'support': 53.0}, np.str_('Smart Governance'): {'precision': 0.6436170212765957, 'recall': 0.8402777777777778, 'f1-score': 0.7289156626506024, 'support': 144.0}, np.str_('Smart Living'): {'precision': 0.6122448979591837, 'recall': 0.5263157894736842, 'f1-score': 0.5660377358490566, 'support': 57.0}, np.str_('Smart Mobility'): {'precision': 0.8135593220338984, 'recall': 0.9056603773584906, 'f1-score': 0.8571428571428571, 'support': 53.0}, np.str_('Smart People'): {'precision': 0.7142857142857143, 'recall': 0.7692307692307693, 'f1-score': 0.7407407407407407, 'support': 39.0}, 'accuracy': 0.7244224422442245, 'macro avg': {'precision': 0.7211851967066274, 'recall': 0.7405278584463805, 'f1-score': 0.7251868576905508, 'support': 606.0}, 'weighted avg': {'precision': 0.7388866939045639, 'recall': 0.7244224422442245, 'f1-score': 0.7220285700649328, 'support': 606.0}}"
4,0.0192,1.105662,0.752984,"{np.str_('Irrelevant'): {'precision': 0.8791946308724832, 'recall': 0.6616161616161617, 'f1-score': 0.7550432276657061, 'support': 198.0}, np.str_('Smart Economy'): {'precision': 0.6410256410256411, 'recall': 0.8064516129032258, 'f1-score': 0.7142857142857143, 'support': 62.0}, np.str_('Smart Environment'): {'precision': 0.8125, 'recall': 0.7358490566037735, 'f1-score': 0.7722772277227723, 'support': 53.0}, np.str_('Smart Governance'): {'precision': 0.7023809523809523, 'recall': 0.8194444444444444, 'f1-score': 0.7564102564102564, 'support': 144.0}, np.str_('Smart Living'): {'precision': 0.639344262295082, 'recall': 0.6842105263157895, 'f1-score': 0.6610169491525424, 'support': 57.0}, np.str_('Smart Mobility'): {'precision': 0.8275862068965517, 'recall': 0.9056603773584906, 'f1-score': 0.8648648648648649, 'support': 53.0}, np.str_('Smart People'): {'precision': 0.7045454545454546, 'recall': 0.7948717948717948, 'f1-score': 0.7469879518072289, 'support': 39.0}, 'accuracy': 0.7524752475247525, 'macro avg': {'precision': 0.7437967354308805, 'recall': 0.77258628201624, 'f1-score': 0.7529837417012979, 'support': 606.0}, 'weighted avg': {'precision': 0.768665756506284, 'recall': 0.7524752475247525, 'f1-score': 0.7529478175180241, 'support': 606.0}}"
5,0.0103,1.040038,0.76467,"{np.str_('Irrelevant'): {'precision': 0.8421052631578947, 'recall': 0.7272727272727273, 'f1-score': 0.7804878048780488, 'support': 198.0}, np.str_('Smart Economy'): {'precision': 0.6911764705882353, 'recall': 0.7580645161290323, 'f1-score': 0.7230769230769231, 'support': 62.0}, np.str_('Smart Environment'): {'precision': 0.7777777777777778, 'recall': 0.7924528301886793, 'f1-score': 0.7850467289719626, 'support': 53.0}, np.str_('Smart Governance'): {'precision': 0.740506329113924, 'recall': 0.8125, 'f1-score': 0.7748344370860927, 'support': 144.0}, np.str_('Smart Living'): {'precision': 0.6851851851851852, 'recall': 0.6491228070175439, 'f1-score': 0.6666666666666666, 'support': 57.0}, np.str_('Smart Mobility'): {'precision': 0.8135593220338984, 'recall': 0.9056603773584906, 'f1-score': 0.8571428571428571, 'support': 53.0}, np.str_('Smart People'): {'precision': 0.7380952380952381, 'recall': 0.7948717948717948, 'f1-score': 0.7654320987654321, 'support': 39.0}, 'accuracy': 0.768976897689769, 'macro avg': {'precision': 0.7554865122788791, 'recall': 0.777135007548324, 'f1-score': 0.7646696452268547, 'support': 606.0}, 'weighted avg': {'precision': 0.7729452653554908, 'recall': 0.768976897689769, 'f1-score': 0.7686987020675866, 'support': 606.0}}"


Trainer is attempting to log a value of "{np.str_('Irrelevant'): {'precision': 0.921875, 'recall': 0.29797979797979796, 'f1-score': 0.45038167938931295, 'support': 198.0}, np.str_('Smart Economy'): {'precision': 0.5151515151515151, 'recall': 0.8225806451612904, 'f1-score': 0.6335403726708074, 'support': 62.0}, np.str_('Smart Environment'): {'precision': 0.7230769230769231, 'recall': 0.8867924528301887, 'f1-score': 0.7966101694915254, 'support': 53.0}, np.str_('Smart Governance'): {'precision': 0.6507936507936508, 'recall': 0.5694444444444444, 'f1-score': 0.6074074074074074, 'support': 144.0}, np.str_('Smart Living'): {'precision': 0.4174757281553398, 'recall': 0.7543859649122807, 'f1-score': 0.5375, 'support': 57.0}, np.str_('Smart Mobility'): {'precision': 0.684931506849315, 'recall': 0.9433962264150944, 'f1-score': 0.7936507936507936, 'support': 53.0}, np.str_('Smart People'): {'precision': 0.4605263157894737, 'recall': 0.8974358974358975, 'f1-score': 0.6086956521739131, 'support': 3

Trainer is attempting to log a value of "{np.str_('Irrelevant'): {'precision': 0.8421052631578947, 'recall': 0.7272727272727273, 'f1-score': 0.7804878048780488, 'support': 198.0}, np.str_('Smart Economy'): {'precision': 0.6911764705882353, 'recall': 0.7580645161290323, 'f1-score': 0.7230769230769231, 'support': 62.0}, np.str_('Smart Environment'): {'precision': 0.7777777777777778, 'recall': 0.7924528301886793, 'f1-score': 0.7850467289719626, 'support': 53.0}, np.str_('Smart Governance'): {'precision': 0.740506329113924, 'recall': 0.8125, 'f1-score': 0.7748344370860927, 'support': 144.0}, np.str_('Smart Living'): {'precision': 0.6851851851851852, 'recall': 0.6491228070175439, 'f1-score': 0.6666666666666666, 'support': 57.0}, np.str_('Smart Mobility'): {'precision': 0.8135593220338984, 'recall': 0.9056603773584906, 'f1-score': 0.8571428571428571, 'support': 53.0}, np.str_('Smart People'): {'precision': 0.7380952380952381, 'recall': 0.7948717948717948, 'f1-score': 0.7654320987654321, 'sup

Fold 4 complete. Evaluation results: {'eval_loss': 1.0400376319885254, 'eval_macro_f1': 0.7646696452268547, 'eval_detailed_report': {np.str_('Irrelevant'): {'precision': 0.8421052631578947, 'recall': 0.7272727272727273, 'f1-score': 0.7804878048780488, 'support': 198.0}, np.str_('Smart Economy'): {'precision': 0.6911764705882353, 'recall': 0.7580645161290323, 'f1-score': 0.7230769230769231, 'support': 62.0}, np.str_('Smart Environment'): {'precision': 0.7777777777777778, 'recall': 0.7924528301886793, 'f1-score': 0.7850467289719626, 'support': 53.0}, np.str_('Smart Governance'): {'precision': 0.740506329113924, 'recall': 0.8125, 'f1-score': 0.7748344370860927, 'support': 144.0}, np.str_('Smart Living'): {'precision': 0.6851851851851852, 'recall': 0.6491228070175439, 'f1-score': 0.6666666666666666, 'support': 57.0}, np.str_('Smart Mobility'): {'precision': 0.8135593220338984, 'recall': 0.9056603773584906, 'f1-score': 0.8571428571428571, 'support': 53.0}, np.str_('Smart People'): {'precisi

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at indolem/indobertweet-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Macro F1,Detailed Report
1,0.3084,0.918843,0.702426,"{np.str_('Irrelevant'): {'precision': 0.8609271523178808, 'recall': 0.6565656565656566, 'f1-score': 0.7449856733524355, 'support': 198.0}, np.str_('Smart Economy'): {'precision': 0.6, 'recall': 0.8360655737704918, 'f1-score': 0.6986301369863014, 'support': 61.0}, np.str_('Smart Environment'): {'precision': 0.7619047619047619, 'recall': 0.9056603773584906, 'f1-score': 0.8275862068965517, 'support': 53.0}, np.str_('Smart Governance'): {'precision': 0.7678571428571429, 'recall': 0.5972222222222222, 'f1-score': 0.671875, 'support': 144.0}, np.str_('Smart Living'): {'precision': 0.4788732394366197, 'recall': 0.5862068965517241, 'f1-score': 0.5271317829457365, 'support': 58.0}, np.str_('Smart Mobility'): {'precision': 0.75, 'recall': 0.9056603773584906, 'f1-score': 0.8205128205128205, 'support': 53.0}, np.str_('Smart People'): {'precision': 0.5166666666666667, 'recall': 0.7948717948717948, 'f1-score': 0.6262626262626263, 'support': 39.0}, 'accuracy': 0.7062706270627063, 'macro avg': {'precision': 0.6766041375975816, 'recall': 0.7546075569569816, 'f1-score': 0.7024263209937817, 'support': 606.0}, 'weighted avg': {'precision': 0.7354630445522199, 'recall': 0.7062706270627063, 'f1-score': 0.7082850428510536, 'support': 606.0}}"
2,0.1333,0.924008,0.729954,"{np.str_('Irrelevant'): {'precision': 0.8125, 'recall': 0.7878787878787878, 'f1-score': 0.8, 'support': 198.0}, np.str_('Smart Economy'): {'precision': 0.6617647058823529, 'recall': 0.7377049180327869, 'f1-score': 0.6976744186046512, 'support': 61.0}, np.str_('Smart Environment'): {'precision': 0.7966101694915254, 'recall': 0.8867924528301887, 'f1-score': 0.8392857142857143, 'support': 53.0}, np.str_('Smart Governance'): {'precision': 0.7412587412587412, 'recall': 0.7361111111111112, 'f1-score': 0.7386759581881533, 'support': 144.0}, np.str_('Smart Living'): {'precision': 0.5686274509803921, 'recall': 0.5, 'f1-score': 0.5321100917431193, 'support': 58.0}, np.str_('Smart Mobility'): {'precision': 0.8214285714285714, 'recall': 0.8679245283018868, 'f1-score': 0.8440366972477065, 'support': 53.0}, np.str_('Smart People'): {'precision': 0.6756756756756757, 'recall': 0.6410256410256411, 'f1-score': 0.6578947368421053, 'support': 39.0}, 'accuracy': 0.7491749174917491, 'macro avg': {'precision': 0.7254093306738941, 'recall': 0.7367767770257718, 'f1-score': 0.7299539452730643, 'support': 606.0}, 'weighted avg': {'precision': 0.7476430735595075, 'recall': 0.7491749174917491, 'f1-score': 0.7476301078930563, 'support': 606.0}}"
3,0.0352,1.081516,0.745827,"{np.str_('Irrelevant'): {'precision': 0.8546511627906976, 'recall': 0.7424242424242424, 'f1-score': 0.7945945945945946, 'support': 198.0}, np.str_('Smart Economy'): {'precision': 0.7368421052631579, 'recall': 0.6885245901639344, 'f1-score': 0.711864406779661, 'support': 61.0}, np.str_('Smart Environment'): {'precision': 0.8070175438596491, 'recall': 0.8679245283018868, 'f1-score': 0.8363636363636363, 'support': 53.0}, np.str_('Smart Governance'): {'precision': 0.6946107784431138, 'recall': 0.8055555555555556, 'f1-score': 0.7459807073954984, 'support': 144.0}, np.str_('Smart Living'): {'precision': 0.6470588235294118, 'recall': 0.5689655172413793, 'f1-score': 0.6055045871559633, 'support': 58.0}, np.str_('Smart Mobility'): {'precision': 0.8518518518518519, 'recall': 0.8679245283018868, 'f1-score': 0.8598130841121495, 'support': 53.0}, np.str_('Smart People'): {'precision': 0.6041666666666666, 'recall': 0.7435897435897436, 'f1-score': 0.6666666666666666, 'support': 39.0}, 'accuracy': 0.7574257425742574, 'macro avg': {'precision': 0.7423141332006499, 'recall': 0.7549869579398042, 'f1-score': 0.7458268118668814, 'support': 606.0}, 'weighted avg': {'precision': 0.7643634331465917, 'recall': 0.7574257425742574, 'f1-score': 0.757741440014032, 'support': 606.0}}"
4,0.0072,1.144598,0.750295,"{np.str_('Irrelevant'): {'precision': 0.851063829787234, 'recall': 0.8080808080808081, 'f1-score': 0.8290155440414507, 'support': 198.0}, np.str_('Smart Economy'): {'precision': 0.6935483870967742, 'recall': 0.7049180327868853, 'f1-score': 0.6991869918699187, 'support': 61.0}, np.str_('Smart Environment'): {'precision': 0.7833333333333333, 'recall': 0.8867924528301887, 'f1-score': 0.831858407079646, 'support': 53.0}, np.str_('Smart Governance'): {'precision': 0.7586206896551724, 'recall': 0.7638888888888888, 'f1-score': 0.7612456747404844, 'support': 144.0}, np.str_('Smart Living'): {'precision': 0.6101694915254238, 'recall': 0.6206896551724138, 'f1-score': 0.6153846153846154, 'support': 58.0}, np.str_('Smart Mobility'): {'precision': 0.8653846153846154, 'recall': 0.8490566037735849, 'f1-score': 0.8571428571428571, 'support': 53.0}, np.str_('Smart People'): {'precision': 0.65, 'recall': 0.6666666666666666, 'f1-score': 0.6582278481012658, 'support': 39.0}, 'accuracy': 0.7706270627062707, 'macro avg': {'precision': 0.7445886209689362, 'recall': 0.7571561583142052, 'f1-score': 0.7502945626228911, 'support': 606.0}, 'weighted avg': {'precision': 0.7725748366528816, 'recall': 0.7706270627062707, 'f1-score': 0.7711140629685672, 'support': 606.0}}"
5,0.004,1.18625,0.746299,"{np.str_('Irrelevant'): {'precision': 0.8429319371727748, 'recall': 0.8131313131313131, 'f1-score': 0.8277634961439588, 'support': 198.0}, np.str_('Smart Economy'): {'precision': 0.7166666666666667, 'recall': 0.7049180327868853, 'f1-score': 0.7107438016528925, 'support': 61.0}, np.str_('Smart Environment'): {'precision': 0.7833333333333333, 'recall': 0.8867924528301887, 'f1-score': 0.831858407079646, 'support': 53.0}, np.str_('Smart Governance'): {'precision': 0.7692307692307693, 'recall': 0.7638888888888888, 'f1-score': 0.7665505226480837, 'support': 144.0}, np.str_('Smart Living'): {'precision': 0.5806451612903226, 'recall': 0.6206896551724138, 'f1-score': 0.6, 'support': 58.0}, np.str_('Smart Mobility'): {'precision': 0.8627450980392157, 'recall': 0.8301886792452831, 'f1-score': 0.8461538461538461, 'support': 53.0}, np.str_('Smart People'): {'precision': 0.6410256410256411, 'recall': 0.6410256410256411, 'f1-score': 0.6410256410256411, 'support': 39.0}, 'accuracy': 0.768976897689769, 'macro avg': {'precision': 0.7423683723941032, 'recall': 0.7515192375829448, 'f1-score': 0.7462993878148669, 'support': 606.0}, 'weighted avg': {'precision': 0.77113200860345, 'recall': 0.768976897689769, 'f1-score': 0.7695882323762862, 'support': 606.0}}"


Trainer is attempting to log a value of "{np.str_('Irrelevant'): {'precision': 0.8609271523178808, 'recall': 0.6565656565656566, 'f1-score': 0.7449856733524355, 'support': 198.0}, np.str_('Smart Economy'): {'precision': 0.6, 'recall': 0.8360655737704918, 'f1-score': 0.6986301369863014, 'support': 61.0}, np.str_('Smart Environment'): {'precision': 0.7619047619047619, 'recall': 0.9056603773584906, 'f1-score': 0.8275862068965517, 'support': 53.0}, np.str_('Smart Governance'): {'precision': 0.7678571428571429, 'recall': 0.5972222222222222, 'f1-score': 0.671875, 'support': 144.0}, np.str_('Smart Living'): {'precision': 0.4788732394366197, 'recall': 0.5862068965517241, 'f1-score': 0.5271317829457365, 'support': 58.0}, np.str_('Smart Mobility'): {'precision': 0.75, 'recall': 0.9056603773584906, 'f1-score': 0.8205128205128205, 'support': 53.0}, np.str_('Smart People'): {'precision': 0.5166666666666667, 'recall': 0.7948717948717948, 'f1-score': 0.6262626262626263, 'support': 39.0}, 'accuracy': 

Trainer is attempting to log a value of "{np.str_('Irrelevant'): {'precision': 0.851063829787234, 'recall': 0.8080808080808081, 'f1-score': 0.8290155440414507, 'support': 198.0}, np.str_('Smart Economy'): {'precision': 0.6935483870967742, 'recall': 0.7049180327868853, 'f1-score': 0.6991869918699187, 'support': 61.0}, np.str_('Smart Environment'): {'precision': 0.7833333333333333, 'recall': 0.8867924528301887, 'f1-score': 0.831858407079646, 'support': 53.0}, np.str_('Smart Governance'): {'precision': 0.7586206896551724, 'recall': 0.7638888888888888, 'f1-score': 0.7612456747404844, 'support': 144.0}, np.str_('Smart Living'): {'precision': 0.6101694915254238, 'recall': 0.6206896551724138, 'f1-score': 0.6153846153846154, 'support': 58.0}, np.str_('Smart Mobility'): {'precision': 0.8653846153846154, 'recall': 0.8490566037735849, 'f1-score': 0.8571428571428571, 'support': 53.0}, np.str_('Smart People'): {'precision': 0.65, 'recall': 0.6666666666666666, 'f1-score': 0.6582278481012658, 'suppor

Fold 5 complete. Evaluation results: {'eval_loss': 1.1445977687835693, 'eval_macro_f1': 0.7502945626228911, 'eval_detailed_report': {np.str_('Irrelevant'): {'precision': 0.851063829787234, 'recall': 0.8080808080808081, 'f1-score': 0.8290155440414507, 'support': 198.0}, np.str_('Smart Economy'): {'precision': 0.6935483870967742, 'recall': 0.7049180327868853, 'f1-score': 0.6991869918699187, 'support': 61.0}, np.str_('Smart Environment'): {'precision': 0.7833333333333333, 'recall': 0.8867924528301887, 'f1-score': 0.831858407079646, 'support': 53.0}, np.str_('Smart Governance'): {'precision': 0.7586206896551724, 'recall': 0.7638888888888888, 'f1-score': 0.7612456747404844, 'support': 144.0}, np.str_('Smart Living'): {'precision': 0.6101694915254238, 'recall': 0.6206896551724138, 'f1-score': 0.6153846153846154, 'support': 58.0}, np.str_('Smart Mobility'): {'precision': 0.8653846153846154, 'recall': 0.8490566037735849, 'f1-score': 0.8571428571428571, 'support': 53.0}, np.str_('Smart People')

In [None]:
# Extract the 'eval_macro_f1' score from each fold's result
macro_f1_scores = [result['eval_macro_f1'] for result in fold_results]

# Calculate the mean and standard deviation
mean_macro_f1 = np.mean(macro_f1_scores)
std_macro_f1 = np.std(macro_f1_scores)

print("--- Final Aggregated Results (5-Fold CV) for IndoBERTweet (Improved) ---")
print(f"Macro F1-Score = {mean_macro_f1:.4f} ± {std_macro_f1:.4f}")

--- Final Aggregated Results (5-Fold CV) for IndoBERTweet (Improved) ---
Macro F1-Score = 0.7569 ± 0.0102


In [None]:
# Prepare the results dictionary for saving
final_results = {
    'model': 'IndoBERTweet',
    'mean_macro_f1': mean_macro_f1,
    'std_dev_macro_f1': std_macro_f1,
    'results_per_fold': fold_results
}

# Define the output file path
results_file_path = os.path.join(GDRIVE_PATH, 'results_indobertweet_asc.json')

# Save to a JSON file
with open(results_file_path, 'w') as f:
    json.dump(final_results, f, indent=4)

print(f"\n✅ Final results for IndoBERTweet have been saved to: '{results_file_path}'")


✅ Final results for IndoBERTweet have been saved to: '/content/drive/MyDrive/eecsi_revise/results_indobertweet.json'
