# Install packages and download dataset

In [None]:
# https://huggingface.co/google-bert/bert-base-multilingual-cased

In [None]:
!pip install --upgrade pip
!pip install sentencepiece
!pip install datasets
!pip install transformers

[0m

In [None]:
!pip install transformers[torch]

[0m

In [None]:
!pip install optuna

[0m

In [None]:
!pip install accelerate -U

[0m

# Prepare dataset

In [None]:
# Download the combined training data
# Please ensure the combined_training_esen.json file is in the data directory
# You can generate this file using the data processing notebook

Downloading...
From: https://drive.google.com/uc?id=10tmR1VyVPySlwIKbkgRkfaqofBJwTTqx
To: /content/combined_training_esen.json
  0% 0.00/1.29M [00:00<?, ?B/s]100% 1.29M/1.29M [00:00<00:00, 118MB/s]


In [None]:
import pandas as pd

# Load JSON data directly into a DataFrame
df = pd.read_json('./data/combined_training_esen.json')
len(df)

# Display the first few rows of the DataFrame to verify
print(df.head())


                                                text  label
0  No me puedo creer que me haya engañado por otr...      0
1  Cuando tu colega dice "el número de cuerpos de...      1
2  Sí, las pollas son importantes, me temo ******...      0
3  2016 TODA LA SOCIEDAD #MGTOW ¡Por favor, sal c...      1
4  El logo oficial de la zona de amigos IG: @meme...      0


# Fine-tuning seperately

## clean train and test dataset for training

In [None]:
# len(X_english_test)

In [None]:
X = df['text'].values
y = df['label'].values.astype(int)

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
len(X)

3077

In [None]:
from transformers import AutoModelForSequenceClassification, Trainer, TrainingArguments, EarlyStoppingCallback
import numpy as np
from datasets import load_dataset, load_metric
import transformers
from transformers import AutoTokenizer
import optuna

In [None]:
num_labels = len(set(y))

In [None]:
num_labels

2

In [None]:
MODEL = "dccuchile/bert-base-spanish-wwm-cased"
MAX_TRAINING_EXAMPLES = -1

In [None]:
tokenizer = AutoTokenizer.from_pretrained(MODEL, use_fast=True)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/364 [00:00<?, ?B/s]



config.json:   0%|          | 0.00/648 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/242k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/480k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/134 [00:00<?, ?B/s]

In [None]:
train_encodings = tokenizer(X_train.tolist(), truncation=True, padding=True)
val_encodings = tokenizer(X_test.tolist(), truncation=True, padding=True)

In [None]:
import torch
from torch.utils.data import Dataset

class MyDataset(Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):

        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx], dtype=torch.long)
        return item

    def __len__(self):
        return len(self.labels)


In [None]:
train_dataset = MyDataset(train_encodings, y_train)
val_dataset = MyDataset(val_encodings, y_test)

In [None]:
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, confusion_matrix

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)

    acc = accuracy_score(labels, predictions)

    f1_pos = f1_score(labels, predictions, average='binary', pos_label=1)

    precision_pos = precision_score(labels, predictions, pos_label=1)
    recall_pos = recall_score(labels, predictions, pos_label=1)

    precision_macro = precision_score(labels, predictions, average='macro')
    recall_macro = recall_score(labels, predictions, average='macro')
    f1_macro = f1_score(labels, predictions, average='macro')

    precision_micro = precision_score(labels, predictions, average='micro')
    recall_micro = recall_score(labels, predictions, average='micro')
    f1_micro = f1_score(labels, predictions, average='micro')

    conf_matrix = confusion_matrix(labels, predictions)

    return {
        "accuracy": acc,
        "f1_score_positive": f1_pos,
        "precision_positive": precision_pos,
        "recall_positive": recall_pos,
        "precision_macro": precision_macro,
        "recall_macro": recall_macro,
        "f1_macro": f1_macro,
        "precision_micro": precision_micro,
        "recall_micro": recall_micro,
        "f1_micro": f1_micro,
        "confusion_matrix": conf_matrix.tolist()
    }



def objective(trial):

    def model_init():
        model = AutoModelForSequenceClassification.from_pretrained(MODEL, num_labels=num_labels)
        model.classifier.dropout = torch.nn.Dropout(trial.suggest_float('dropout_rate', 0, 0.5))  # Adjust dropout
        return model

    learning_rate = trial.suggest_float('learning_rate', 1e-5, 5e-5, log=True)
    num_train_epochs = trial.suggest_int('num_train_epochs', 3, 5)
    per_device_train_batch_size = trial.suggest_categorical('per_device_train_batch_size', [8, 16])
    warmup_steps = trial.suggest_int('warmup_steps', 0, 500)
    weight_decay = trial.suggest_float('weight_decay', 0.0, 0.3)

    args = TrainingArguments(
        output_dir='./results',
        learning_rate=learning_rate,
        num_train_epochs=num_train_epochs,
        per_device_train_batch_size=per_device_train_batch_size,
        per_device_eval_batch_size=per_device_train_batch_size,
        warmup_steps=warmup_steps,
        weight_decay=weight_decay,
        evaluation_strategy="epoch",
        save_strategy="epoch",
        load_best_model_at_end=True,
        metric_for_best_model='accuracy',
        greater_is_better=True,
        logging_dir='./logs',
        logging_steps=10,

    )

    trainer = Trainer(
        model_init=model_init,
        args=args,
        train_dataset=train_dataset,
        eval_dataset=val_dataset,
        tokenizer=tokenizer,
        compute_metrics=compute_metrics,
        callbacks = [EarlyStoppingCallback(early_stopping_patience=3)]
    )

    # Train model
    trainer.train()
    # Evaluate model
    eval_results = trainer.evaluate()
    print(f"Accuracy: {eval_results['eval_accuracy']}")
    print(f"F1 Score (Positive): {eval_results['eval_f1_score_positive']}")
    print(f"Macro F1 Score: {eval_results['eval_f1_macro']}")
    print(f"Micro F1 Score: {eval_results['eval_f1_micro']}")

    return eval_results['eval_accuracy']


In [None]:
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=15)

[I 2024-07-01 19:45:24,866] A new study created in memory with name: no-name-635b73fe-963e-482e-af84-e2e63c174066


pytorch_model.bin:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dccuchile/bert-base-spanish-wwm-cased and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dccuchile/bert-base-spanish-wwm-cased and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1 Score Positive,Precision Positive,Recall Positive,Precision Macro,Recall Macro,F1 Macro,Precision Micro,Recall Micro,F1 Micro,Confusion Matrix
1,0.5758,0.565113,0.707792,0.775,0.715935,0.844687,0.70223,0.675355,0.679167,0.707792,0.707792,0.707792,"[[126, 123], [57, 310]]"
2,0.4977,0.545262,0.725649,0.773154,0.761905,0.784741,0.714986,0.711648,0.713066,0.725649,0.725649,0.725649,"[[159, 90], [79, 288]]"
3,0.4126,0.58418,0.730519,0.781579,0.755725,0.809264,0.720912,0.711861,0.714942,0.730519,0.730519,0.730519,"[[153, 96], [70, 297]]"
4,0.2803,0.673696,0.741883,0.795367,0.753659,0.841962,0.736053,0.71817,0.722958,0.741883,0.741883,0.741883,"[[148, 101], [58, 309]]"
5,0.1912,0.697203,0.728896,0.77822,0.759067,0.798365,0.718664,0.712436,0.714789,0.728896,0.728896,0.728896,"[[156, 93], [74, 293]]"


Trainer is attempting to log a value of "[[126, 123], [57, 310]]" of type <class 'list'> for key "eval/confusion_matrix" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "[[159, 90], [79, 288]]" of type <class 'list'> for key "eval/confusion_matrix" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "[[153, 96], [70, 297]]" of type <class 'list'> for key "eval/confusion_matrix" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "[[148, 101], [58, 309]]" of type <class 'list'> for key "eval/confusion_matrix" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "[[156, 93], [74, 293]]" of type <class 'l

Trainer is attempting to log a value of "[[148, 101], [58, 309]]" of type <class 'list'> for key "eval/confusion_matrix" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
[I 2024-07-01 20:05:41,213] Trial 0 finished with value: 0.7418831168831169 and parameters: {'learning_rate': 1.1649208640724675e-05, 'num_train_epochs': 5, 'per_device_train_batch_size': 16, 'warmup_steps': 14, 'weight_decay': 0.29658294630496496, 'dropout_rate': 0.03068946492768787}. Best is trial 0 with value: 0.7418831168831169.


Accuracy: 0.7418831168831169
F1 Score (Positive): 0.7953667953667953
Macro F1 Score: 0.722958122958123
Micro F1 Score: 0.7418831168831169


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dccuchile/bert-base-spanish-wwm-cased and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dccuchile/bert-base-spanish-wwm-cased and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1 Score Positive,Precision Positive,Recall Positive,Precision Macro,Recall Macro,F1 Macro,Precision Micro,Recall Micro,F1 Micro,Confusion Matrix
1,0.6203,0.571472,0.701299,0.745152,0.757746,0.73297,0.691134,0.693794,0.692184,0.701299,0.701299,0.701299,"[[163, 86], [98, 269]]"
2,0.4295,0.552798,0.732143,0.780876,0.761658,0.80109,0.722133,0.715806,0.718204,0.732143,0.732143,0.732143,"[[157, 92], [73, 294]]"
3,0.3327,0.71033,0.738636,0.783893,0.772487,0.79564,0.72868,0.725129,0.726649,0.738636,0.738636,0.738636,"[[163, 86], [75, 292]]"


Trainer is attempting to log a value of "[[163, 86], [98, 269]]" of type <class 'list'> for key "eval/confusion_matrix" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "[[157, 92], [73, 294]]" of type <class 'list'> for key "eval/confusion_matrix" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "[[163, 86], [75, 292]]" of type <class 'list'> for key "eval/confusion_matrix" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.


Trainer is attempting to log a value of "[[163, 86], [75, 292]]" of type <class 'list'> for key "eval/confusion_matrix" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
[I 2024-07-01 20:18:26,879] Trial 1 finished with value: 0.7386363636363636 and parameters: {'learning_rate': 2.4275888061390413e-05, 'num_train_epochs': 3, 'per_device_train_batch_size': 8, 'warmup_steps': 450, 'weight_decay': 0.06539625254008764, 'dropout_rate': 0.19868835077282987}. Best is trial 0 with value: 0.7418831168831169.


Accuracy: 0.7386363636363636
F1 Score (Positive): 0.7838926174496644
Macro F1 Score: 0.7266485674517316
Micro F1 Score: 0.7386363636363636


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dccuchile/bert-base-spanish-wwm-cased and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dccuchile/bert-base-spanish-wwm-cased and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1 Score Positive,Precision Positive,Recall Positive,Precision Macro,Recall Macro,F1 Macro,Precision Micro,Recall Micro,F1 Micro,Confusion Matrix
1,0.6474,0.609265,0.654221,0.757127,0.65098,0.904632,0.660396,0.594886,0.578563,0.654221,0.654221,0.654221,"[[71, 178], [35, 332]]"
2,0.569,0.551549,0.719156,0.775616,0.740099,0.814714,0.709672,0.696514,0.700172,0.719156,0.719156,0.719156,"[[144, 105], [68, 299]]"
3,0.5184,0.531715,0.74513,0.794771,0.763819,0.828338,0.737414,0.725414,0.729291,0.74513,0.74513,0.74513,"[[155, 94], [63, 304]]"
4,0.2852,0.715093,0.738636,0.802938,0.728889,0.893733,0.746975,0.701887,0.707493,0.738636,0.738636,0.738636,"[[127, 122], [39, 328]]"
5,0.1331,0.749956,0.738636,0.783311,0.773936,0.792916,0.728635,0.725775,0.727034,0.738636,0.738636,0.738636,"[[164, 85], [76, 291]]"


Trainer is attempting to log a value of "[[71, 178], [35, 332]]" of type <class 'list'> for key "eval/confusion_matrix" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "[[144, 105], [68, 299]]" of type <class 'list'> for key "eval/confusion_matrix" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "[[155, 94], [63, 304]]" of type <class 'list'> for key "eval/confusion_matrix" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "[[127, 122], [39, 328]]" of type <class 'list'> for key "eval/confusion_matrix" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "[[164, 85], [76, 291]]" of type <class 'l

Trainer is attempting to log a value of "[[155, 94], [63, 304]]" of type <class 'list'> for key "eval/confusion_matrix" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
[I 2024-07-01 20:38:55,793] Trial 2 finished with value: 0.7451298701298701 and parameters: {'learning_rate': 1.3873430641590019e-05, 'num_train_epochs': 5, 'per_device_train_batch_size': 16, 'warmup_steps': 400, 'weight_decay': 0.024544432532139525, 'dropout_rate': 0.2154190065962187}. Best is trial 2 with value: 0.7451298701298701.


Accuracy: 0.7451298701298701
F1 Score (Positive): 0.7947712418300654
Macro F1 Score: 0.7292914024996151
Micro F1 Score: 0.7451298701298701


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dccuchile/bert-base-spanish-wwm-cased and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dccuchile/bert-base-spanish-wwm-cased and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1 Score Positive,Precision Positive,Recall Positive,Precision Macro,Recall Macro,F1 Macro,Precision Micro,Recall Micro,F1 Micro,Confusion Matrix
1,0.5741,0.579838,0.701299,0.755968,0.736434,0.776567,0.689178,0.683464,0.685515,0.701299,0.701299,0.701299,"[[147, 102], [82, 285]]"
2,0.5293,0.53648,0.730519,0.788804,0.739857,0.844687,0.725258,0.703468,0.708303,0.730519,0.730519,0.730519,"[[140, 109], [57, 310]]"
3,0.3483,0.61282,0.743506,0.789333,0.772846,0.80654,0.734062,0.728571,0.730766,0.743506,0.743506,0.743506,"[[162, 87], [71, 296]]"
4,0.1567,0.719391,0.737013,0.782842,0.770449,0.79564,0.726996,0.723121,0.724754,0.737013,0.737013,0.737013,"[[162, 87], [75, 292]]"


Trainer is attempting to log a value of "[[147, 102], [82, 285]]" of type <class 'list'> for key "eval/confusion_matrix" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "[[140, 109], [57, 310]]" of type <class 'list'> for key "eval/confusion_matrix" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "[[162, 87], [71, 296]]" of type <class 'list'> for key "eval/confusion_matrix" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "[[162, 87], [75, 292]]" of type <class 'list'> for key "eval/confusion_matrix" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.


Trainer is attempting to log a value of "[[162, 87], [71, 296]]" of type <class 'list'> for key "eval/confusion_matrix" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
[I 2024-07-01 20:55:21,031] Trial 3 finished with value: 0.7435064935064936 and parameters: {'learning_rate': 1.784185591246423e-05, 'num_train_epochs': 4, 'per_device_train_batch_size': 16, 'warmup_steps': 127, 'weight_decay': 0.2368881464108401, 'dropout_rate': 0.4652758707752888}. Best is trial 2 with value: 0.7451298701298701.


Accuracy: 0.7435064935064936
F1 Score (Positive): 0.7893333333333333
Macro F1 Score: 0.7307662517289073
Micro F1 Score: 0.7435064935064934


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dccuchile/bert-base-spanish-wwm-cased and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dccuchile/bert-base-spanish-wwm-cased and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1 Score Positive,Precision Positive,Recall Positive,Precision Macro,Recall Macro,F1 Macro,Precision Micro,Recall Micro,F1 Micro,Confusion Matrix
1,0.6313,0.588974,0.688312,0.767554,0.690632,0.86376,0.68608,0.64674,0.647324,0.688312,0.688312,0.688312,"[[107, 142], [50, 317]]"
2,0.5664,0.541054,0.730519,0.784974,0.748148,0.825613,0.722415,0.707987,0.712052,0.730519,0.730519,0.730519,"[[147, 102], [64, 303]]"
3,0.5081,0.545869,0.743506,0.784741,0.784741,0.784741,0.733736,0.733736,0.733736,0.743506,0.743506,0.743506,"[[170, 79], [79, 288]]"


Trainer is attempting to log a value of "[[107, 142], [50, 317]]" of type <class 'list'> for key "eval/confusion_matrix" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "[[147, 102], [64, 303]]" of type <class 'list'> for key "eval/confusion_matrix" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "[[170, 79], [79, 288]]" of type <class 'list'> for key "eval/confusion_matrix" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.


Trainer is attempting to log a value of "[[170, 79], [79, 288]]" of type <class 'list'> for key "eval/confusion_matrix" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
[I 2024-07-01 21:07:44,002] Trial 4 finished with value: 0.7435064935064936 and parameters: {'learning_rate': 1.9195290578967604e-05, 'num_train_epochs': 3, 'per_device_train_batch_size': 16, 'warmup_steps': 424, 'weight_decay': 0.10748843862925991, 'dropout_rate': 0.4184308950298612}. Best is trial 2 with value: 0.7451298701298701.


Accuracy: 0.7435064935064936
F1 Score (Positive): 0.7847411444141691
Macro F1 Score: 0.7337360340544741
Micro F1 Score: 0.7435064935064934


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dccuchile/bert-base-spanish-wwm-cased and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dccuchile/bert-base-spanish-wwm-cased and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1 Score Positive,Precision Positive,Recall Positive,Precision Macro,Recall Macro,F1 Macro,Precision Micro,Recall Micro,F1 Micro,Confusion Matrix
1,0.6214,0.56193,0.719156,0.770861,0.75,0.792916,0.708333,0.701679,0.704089,0.719156,0.719156,0.719156,"[[152, 97], [76, 291]]"
2,0.4459,0.529483,0.738636,0.788436,0.761421,0.817439,0.72981,0.719964,0.723305,0.738636,0.738636,0.738636,"[[155, 94], [67, 300]]"
3,0.4838,0.635362,0.74513,0.794233,0.765152,0.825613,0.737121,0.72606,0.729739,0.74513,0.74513,0.74513,"[[156, 93], [64, 303]]"
4,0.2198,0.85036,0.74026,0.792746,0.755556,0.833787,0.733228,0.718099,0.72246,0.74026,0.74026,0.74026,"[[150, 99], [61, 306]]"


Trainer is attempting to log a value of "[[152, 97], [76, 291]]" of type <class 'list'> for key "eval/confusion_matrix" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "[[155, 94], [67, 300]]" of type <class 'list'> for key "eval/confusion_matrix" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "[[156, 93], [64, 303]]" of type <class 'list'> for key "eval/confusion_matrix" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "[[150, 99], [61, 306]]" of type <class 'list'> for key "eval/confusion_matrix" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.


Trainer is attempting to log a value of "[[156, 93], [64, 303]]" of type <class 'list'> for key "eval/confusion_matrix" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
[I 2024-07-01 21:24:38,084] Trial 5 finished with value: 0.7451298701298701 and parameters: {'learning_rate': 1.3068941370344067e-05, 'num_train_epochs': 4, 'per_device_train_batch_size': 8, 'warmup_steps': 380, 'weight_decay': 0.2620768792892513, 'dropout_rate': 0.24759844844520923}. Best is trial 2 with value: 0.7451298701298701.


Accuracy: 0.7451298701298701
F1 Score (Positive): 0.7942332896461336
Macro F1 Score: 0.7297392461023844
Micro F1 Score: 0.7451298701298701


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dccuchile/bert-base-spanish-wwm-cased and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dccuchile/bert-base-spanish-wwm-cased and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1 Score Positive,Precision Positive,Recall Positive,Precision Macro,Recall Macro,F1 Macro,Precision Micro,Recall Micro,F1 Micro,Confusion Matrix
1,0.5931,0.560813,0.720779,0.761773,0.774648,0.749319,0.711079,0.714017,0.712259,0.720779,0.720779,0.720779,"[[169, 80], [92, 275]]"
2,0.337,0.570926,0.741883,0.791612,0.762626,0.822888,0.733586,0.722689,0.726296,0.741883,0.741883,0.741883,"[[155, 94], [65, 302]]"


Trainer is attempting to log a value of "[[169, 80], [92, 275]]" of type <class 'list'> for key "eval/confusion_matrix" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "[[155, 94], [65, 302]]" of type <class 'list'> for key "eval/confusion_matrix" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.


Epoch,Training Loss,Validation Loss,Accuracy,F1 Score Positive,Precision Positive,Recall Positive,Precision Macro,Recall Macro,F1 Macro,Precision Micro,Recall Micro,F1 Micro,Confusion Matrix
1,0.5931,0.560813,0.720779,0.761773,0.774648,0.749319,0.711079,0.714017,0.712259,0.720779,0.720779,0.720779,"[[169, 80], [92, 275]]"
2,0.337,0.570926,0.741883,0.791612,0.762626,0.822888,0.733586,0.722689,0.726296,0.741883,0.741883,0.741883,"[[155, 94], [65, 302]]"
3,0.3206,0.803172,0.727273,0.767313,0.780282,0.754768,0.717727,0.720758,0.718951,0.727273,0.727273,0.727273,"[[171, 78], [90, 277]]"
4,0.1602,1.014715,0.737013,0.782842,0.770449,0.79564,0.726996,0.723121,0.724754,0.737013,0.737013,0.737013,"[[162, 87], [75, 292]]"


Trainer is attempting to log a value of "[[171, 78], [90, 277]]" of type <class 'list'> for key "eval/confusion_matrix" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "[[162, 87], [75, 292]]" of type <class 'list'> for key "eval/confusion_matrix" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.


Trainer is attempting to log a value of "[[155, 94], [65, 302]]" of type <class 'list'> for key "eval/confusion_matrix" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
[I 2024-07-01 21:41:28,756] Trial 6 finished with value: 0.7418831168831169 and parameters: {'learning_rate': 1.7265956017674122e-05, 'num_train_epochs': 4, 'per_device_train_batch_size': 8, 'warmup_steps': 93, 'weight_decay': 0.2430701756228109, 'dropout_rate': 0.3835641260917532}. Best is trial 2 with value: 0.7451298701298701.


Accuracy: 0.7418831168831169
F1 Score (Positive): 0.7916120576671035
Macro F1 Score: 0.7262964339508226
Micro F1 Score: 0.7418831168831169


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dccuchile/bert-base-spanish-wwm-cased and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dccuchile/bert-base-spanish-wwm-cased and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1 Score Positive,Precision Positive,Recall Positive,Precision Macro,Recall Macro,F1 Macro,Precision Micro,Recall Micro,F1 Micro,Confusion Matrix
1,0.6105,0.568067,0.717532,0.760331,0.768802,0.752044,0.707358,0.709355,0.708229,0.717532,0.717532,0.717532,"[[166, 83], [91, 276]]"
2,0.4201,0.558784,0.74026,0.792746,0.755556,0.833787,0.733228,0.718099,0.72246,0.74026,0.74026,0.74026,"[[150, 99], [61, 306]]"
3,0.3978,0.662436,0.746753,0.797403,0.761787,0.836512,0.740048,0.725485,0.72987,0.746753,0.746753,0.746753,"[[153, 96], [60, 307]]"
4,0.2345,1.284801,0.74026,0.78836,0.766067,0.811989,0.731051,0.723264,0.726113,0.74026,0.74026,0.74026,"[[158, 91], [69, 298]]"
5,0.0365,1.432226,0.727273,0.771117,0.771117,0.771117,0.716884,0.716884,0.716884,0.727273,0.727273,0.727273,"[[165, 84], [84, 283]]"


Trainer is attempting to log a value of "[[166, 83], [91, 276]]" of type <class 'list'> for key "eval/confusion_matrix" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "[[150, 99], [61, 306]]" of type <class 'list'> for key "eval/confusion_matrix" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "[[153, 96], [60, 307]]" of type <class 'list'> for key "eval/confusion_matrix" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "[[158, 91], [69, 298]]" of type <class 'list'> for key "eval/confusion_matrix" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "[[165, 84], [84, 283]]" of type <class 'lis

Trainer is attempting to log a value of "[[153, 96], [60, 307]]" of type <class 'list'> for key "eval/confusion_matrix" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
[I 2024-07-01 22:02:11,751] Trial 7 finished with value: 0.7467532467532467 and parameters: {'learning_rate': 2.4435499087997742e-05, 'num_train_epochs': 5, 'per_device_train_batch_size': 8, 'warmup_steps': 404, 'weight_decay': 0.2339372606912032, 'dropout_rate': 0.16517758323964699}. Best is trial 7 with value: 0.7467532467532467.


Accuracy: 0.7467532467532467
F1 Score (Positive): 0.7974025974025974
Macro F1 Score: 0.7298701298701299
Micro F1 Score: 0.7467532467532466


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dccuchile/bert-base-spanish-wwm-cased and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dccuchile/bert-base-spanish-wwm-cased and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1 Score Positive,Precision Positive,Recall Positive,Precision Macro,Recall Macro,F1 Macro,Precision Micro,Recall Micro,F1 Micro,Confusion Matrix
1,0.5928,0.55169,0.733766,0.786458,0.753117,0.822888,0.725396,0.712649,0.716505,0.733766,0.733766,0.733766,"[[150, 99], [65, 302]]"
2,0.3191,0.585049,0.738636,0.780952,0.779891,0.782016,0.728655,0.728358,0.728504,0.738636,0.738636,0.738636,"[[168, 81], [80, 287]]"
3,0.3346,0.806731,0.738636,0.786189,0.766839,0.80654,0.729072,0.722547,0.725036,0.738636,0.738636,0.738636,"[[159, 90], [71, 296]]"


Trainer is attempting to log a value of "[[150, 99], [65, 302]]" of type <class 'list'> for key "eval/confusion_matrix" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "[[168, 81], [80, 287]]" of type <class 'list'> for key "eval/confusion_matrix" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "[[159, 90], [71, 296]]" of type <class 'list'> for key "eval/confusion_matrix" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.


Trainer is attempting to log a value of "[[168, 81], [80, 287]]" of type <class 'list'> for key "eval/confusion_matrix" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
[I 2024-07-01 22:14:58,264] Trial 8 finished with value: 0.7386363636363636 and parameters: {'learning_rate': 2.4709140478918323e-05, 'num_train_epochs': 3, 'per_device_train_batch_size': 8, 'warmup_steps': 13, 'weight_decay': 0.03474939337495001, 'dropout_rate': 0.04087882951769267}. Best is trial 7 with value: 0.7467532467532467.


Accuracy: 0.7386363636363636
F1 Score (Positive): 0.780952380952381
Macro F1 Score: 0.7285043594902749
Micro F1 Score: 0.7386363636363636


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dccuchile/bert-base-spanish-wwm-cased and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dccuchile/bert-base-spanish-wwm-cased and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1 Score Positive,Precision Positive,Recall Positive,Precision Macro,Recall Macro,F1 Macro,Precision Micro,Recall Micro,F1 Micro,Confusion Matrix
1,0.565,0.558937,0.712662,0.772786,0.730583,0.820163,0.703527,0.68719,0.691029,0.712662,0.712662,0.712662,"[[138, 111], [66, 301]]"
2,0.4849,0.543149,0.751623,0.797351,0.775773,0.820163,0.74315,0.735383,0.738298,0.751623,0.751623,0.751623,"[[162, 87], [66, 301]]"
3,0.2156,0.743002,0.738636,0.779754,0.782967,0.776567,0.728785,0.729649,0.729198,0.738636,0.738636,0.738636,"[[170, 79], [82, 285]]"
4,0.0586,1.059746,0.743506,0.793194,0.763224,0.825613,0.735493,0.724052,0.727793,0.743506,0.743506,0.743506,"[[155, 94], [64, 303]]"


Trainer is attempting to log a value of "[[138, 111], [66, 301]]" of type <class 'list'> for key "eval/confusion_matrix" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "[[162, 87], [66, 301]]" of type <class 'list'> for key "eval/confusion_matrix" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "[[170, 79], [82, 285]]" of type <class 'list'> for key "eval/confusion_matrix" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "[[155, 94], [64, 303]]" of type <class 'list'> for key "eval/confusion_matrix" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.


Trainer is attempting to log a value of "[[162, 87], [66, 301]]" of type <class 'list'> for key "eval/confusion_matrix" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
[I 2024-07-01 22:31:05,150] Trial 9 finished with value: 0.7516233766233766 and parameters: {'learning_rate': 4.021114701295033e-05, 'num_train_epochs': 4, 'per_device_train_batch_size': 16, 'warmup_steps': 79, 'weight_decay': 0.029122388214730566, 'dropout_rate': 0.09526951769915887}. Best is trial 9 with value: 0.7516233766233766.


Accuracy: 0.7516233766233766
F1 Score (Positive): 0.7973509933774834
Macro F1 Score: 0.7382981381981757
Micro F1 Score: 0.7516233766233766


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dccuchile/bert-base-spanish-wwm-cased and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dccuchile/bert-base-spanish-wwm-cased and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1 Score Positive,Precision Positive,Recall Positive,Precision Macro,Recall Macro,F1 Macro,Precision Micro,Recall Micro,F1 Micro,Confusion Matrix
1,0.5786,0.581148,0.696429,0.746955,0.741935,0.752044,0.684492,0.683251,0.683823,0.696429,0.696429,0.696429,"[[153, 96], [91, 276]]"
2,0.58,0.536648,0.756494,0.799465,0.784777,0.814714,0.747708,0.742698,0.744774,0.756494,0.756494,0.756494,"[[167, 82], [68, 299]]"
3,0.2802,0.640352,0.733766,0.780161,0.76781,0.792916,0.723567,0.719751,0.721356,0.733766,0.733766,0.733766,"[[161, 88], [76, 291]]"
4,0.0661,1.113075,0.727273,0.777188,0.757106,0.798365,0.716981,0.710428,0.712862,0.727273,0.727273,0.727273,"[[155, 94], [74, 293]]"


Trainer is attempting to log a value of "[[153, 96], [91, 276]]" of type <class 'list'> for key "eval/confusion_matrix" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "[[167, 82], [68, 299]]" of type <class 'list'> for key "eval/confusion_matrix" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "[[161, 88], [76, 291]]" of type <class 'list'> for key "eval/confusion_matrix" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "[[155, 94], [74, 293]]" of type <class 'list'> for key "eval/confusion_matrix" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.


Trainer is attempting to log a value of "[[167, 82], [68, 299]]" of type <class 'list'> for key "eval/confusion_matrix" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
[I 2024-07-01 22:47:09,675] Trial 10 finished with value: 0.7564935064935064 and parameters: {'learning_rate': 4.54647473506188e-05, 'num_train_epochs': 4, 'per_device_train_batch_size': 16, 'warmup_steps': 254, 'weight_decay': 0.1580808091745869, 'dropout_rate': 0.10274824157881546}. Best is trial 10 with value: 0.7564935064935064.


Accuracy: 0.7564935064935064
F1 Score (Positive): 0.7994652406417112
Macro F1 Score: 0.7447739426349052
Micro F1 Score: 0.7564935064935063


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dccuchile/bert-base-spanish-wwm-cased and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dccuchile/bert-base-spanish-wwm-cased and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1 Score Positive,Precision Positive,Recall Positive,Precision Macro,Recall Macro,F1 Macro,Precision Micro,Recall Micro,F1 Micro,Confusion Matrix
1,0.5791,0.579905,0.696429,0.746955,0.741935,0.752044,0.684492,0.683251,0.683823,0.696429,0.696429,0.696429,"[[153, 96], [91, 276]]"
2,0.5721,0.543688,0.733766,0.776567,0.776567,0.776567,0.723625,0.723625,0.723625,0.733766,0.733766,0.733766,"[[167, 82], [82, 285]]"
3,0.3414,0.607777,0.753247,0.797333,0.780679,0.814714,0.744417,0.738682,0.74099,0.753247,0.753247,0.753247,"[[165, 84], [68, 299]]"
4,0.0747,1.107719,0.738636,0.782726,0.775401,0.790191,0.72861,0.726421,0.727412,0.738636,0.738636,0.738636,"[[165, 84], [77, 290]]"


Trainer is attempting to log a value of "[[153, 96], [91, 276]]" of type <class 'list'> for key "eval/confusion_matrix" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "[[167, 82], [82, 285]]" of type <class 'list'> for key "eval/confusion_matrix" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "[[165, 84], [68, 299]]" of type <class 'list'> for key "eval/confusion_matrix" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "[[165, 84], [77, 290]]" of type <class 'list'> for key "eval/confusion_matrix" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.


Trainer is attempting to log a value of "[[165, 84], [68, 299]]" of type <class 'list'> for key "eval/confusion_matrix" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
[I 2024-07-01 23:03:36,368] Trial 11 finished with value: 0.7532467532467533 and parameters: {'learning_rate': 4.921977148353014e-05, 'num_train_epochs': 4, 'per_device_train_batch_size': 16, 'warmup_steps': 264, 'weight_decay': 0.15559993657200552, 'dropout_rate': 0.11185089980240842}. Best is trial 10 with value: 0.7564935064935064.


Accuracy: 0.7532467532467533
F1 Score (Positive): 0.7973333333333332
Macro F1 Score: 0.7409903181189488
Micro F1 Score: 0.7532467532467532


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dccuchile/bert-base-spanish-wwm-cased and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dccuchile/bert-base-spanish-wwm-cased and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1 Score Positive,Precision Positive,Recall Positive,Precision Macro,Recall Macro,F1 Macro,Precision Micro,Recall Micro,F1 Micro,Confusion Matrix
1,0.5792,0.57976,0.699675,0.748982,0.745946,0.752044,0.688014,0.687267,0.687622,0.699675,0.699675,0.699675,"[[155, 94], [91, 276]]"
2,0.5615,0.543641,0.725649,0.773762,0.760526,0.787466,0.715009,0.711002,0.712654,0.725649,0.725649,0.725649,"[[158, 91], [78, 289]]"
3,0.3007,0.636413,0.743506,0.790451,0.770026,0.811989,0.734358,0.72728,0.729953,0.743506,0.743506,0.743506,"[[160, 89], [69, 298]]"
4,0.0354,1.157833,0.732143,0.779116,0.765789,0.792916,0.721878,0.717743,0.719455,0.732143,0.732143,0.732143,"[[160, 89], [76, 291]]"


Trainer is attempting to log a value of "[[155, 94], [91, 276]]" of type <class 'list'> for key "eval/confusion_matrix" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "[[158, 91], [78, 289]]" of type <class 'list'> for key "eval/confusion_matrix" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "[[160, 89], [69, 298]]" of type <class 'list'> for key "eval/confusion_matrix" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "[[160, 89], [76, 291]]" of type <class 'list'> for key "eval/confusion_matrix" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.


Trainer is attempting to log a value of "[[160, 89], [69, 298]]" of type <class 'list'> for key "eval/confusion_matrix" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
[I 2024-07-01 23:20:01,982] Trial 12 finished with value: 0.7435064935064936 and parameters: {'learning_rate': 4.816933751766244e-05, 'num_train_epochs': 4, 'per_device_train_batch_size': 16, 'warmup_steps': 257, 'weight_decay': 0.1544674808803138, 'dropout_rate': 0.12939438567608816}. Best is trial 10 with value: 0.7564935064935064.


Accuracy: 0.7435064935064936
F1 Score (Positive): 0.7904509283819628
Macro F1 Score: 0.7299534976637847
Micro F1 Score: 0.7435064935064934


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dccuchile/bert-base-spanish-wwm-cased and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dccuchile/bert-base-spanish-wwm-cased and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1 Score Positive,Precision Positive,Recall Positive,Precision Macro,Recall Macro,F1 Macro,Precision Micro,Recall Micro,F1 Micro,Confusion Matrix
1,0.575,0.580423,0.706169,0.760265,0.739691,0.782016,0.694407,0.688197,0.690405,0.706169,0.706169,0.706169,"[[148, 101], [80, 287]]"
2,0.5767,0.548635,0.717532,0.772251,0.743073,0.803815,0.707153,0.697088,0.700228,0.717532,0.717532,0.717532,"[[147, 102], [72, 295]]"
3,0.3664,0.63116,0.74513,0.782849,0.794944,0.771117,0.735933,0.738972,0.737201,0.74513,0.74513,0.74513,"[[176, 73], [84, 283]]"
4,0.0617,0.983583,0.748377,0.793057,0.777487,0.809264,0.739171,0.733949,0.736073,0.748377,0.748377,0.748377,"[[164, 85], [70, 297]]"


Trainer is attempting to log a value of "[[148, 101], [80, 287]]" of type <class 'list'> for key "eval/confusion_matrix" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "[[147, 102], [72, 295]]" of type <class 'list'> for key "eval/confusion_matrix" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "[[176, 73], [84, 283]]" of type <class 'list'> for key "eval/confusion_matrix" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "[[164, 85], [70, 297]]" of type <class 'list'> for key "eval/confusion_matrix" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.


Trainer is attempting to log a value of "[[164, 85], [70, 297]]" of type <class 'list'> for key "eval/confusion_matrix" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
[I 2024-07-01 23:36:28,388] Trial 13 finished with value: 0.7483766233766234 and parameters: {'learning_rate': 3.44528232431734e-05, 'num_train_epochs': 4, 'per_device_train_batch_size': 16, 'warmup_steps': 266, 'weight_decay': 0.17684859801837546, 'dropout_rate': 0.08946753853830189}. Best is trial 10 with value: 0.7564935064935064.


Accuracy: 0.7483766233766234
F1 Score (Positive): 0.7930574098798399
Macro F1 Score: 0.7360732183974767
Micro F1 Score: 0.7483766233766234


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dccuchile/bert-base-spanish-wwm-cased and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dccuchile/bert-base-spanish-wwm-cased and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1 Score Positive,Precision Positive,Recall Positive,Precision Macro,Recall Macro,F1 Macro,Precision Micro,Recall Micro,F1 Micro,Confusion Matrix
1,0.5777,0.582515,0.701299,0.752022,0.744,0.760218,0.689427,0.687338,0.688256,0.701299,0.701299,0.701299,"[[153, 96], [88, 279]]"
2,0.55,0.538668,0.732143,0.782609,0.757653,0.809264,0.722577,0.713869,0.716886,0.732143,0.732143,0.732143,"[[154, 95], [70, 297]]"
3,0.3424,0.700169,0.727273,0.757925,0.804281,0.716621,0.72221,0.729797,0.722829,0.727273,0.727273,0.727273,"[[185, 64], [104, 263]]"
4,0.1029,1.147524,0.720779,0.761773,0.774648,0.749319,0.711079,0.714017,0.712259,0.720779,0.720779,0.720779,"[[169, 80], [92, 275]]"
5,0.0531,1.268428,0.727273,0.769231,0.775623,0.762943,0.717223,0.718821,0.717949,0.727273,0.727273,0.727273,"[[168, 81], [87, 280]]"


Trainer is attempting to log a value of "[[153, 96], [88, 279]]" of type <class 'list'> for key "eval/confusion_matrix" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "[[154, 95], [70, 297]]" of type <class 'list'> for key "eval/confusion_matrix" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "[[185, 64], [104, 263]]" of type <class 'list'> for key "eval/confusion_matrix" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "[[169, 80], [92, 275]]" of type <class 'list'> for key "eval/confusion_matrix" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "[[168, 81], [87, 280]]" of type <class 'li

Trainer is attempting to log a value of "[[154, 95], [70, 297]]" of type <class 'list'> for key "eval/confusion_matrix" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
[I 2024-07-01 23:56:56,486] Trial 14 finished with value: 0.7321428571428571 and parameters: {'learning_rate': 3.260143415695753e-05, 'num_train_epochs': 5, 'per_device_train_batch_size': 16, 'warmup_steps': 189, 'weight_decay': 0.18943883109644696, 'dropout_rate': 0.3158787651339925}. Best is trial 10 with value: 0.7564935064935064.


Accuracy: 0.7321428571428571
F1 Score (Positive): 0.7826086956521738
Macro F1 Score: 0.716885743174924
Micro F1 Score: 0.7321428571428571


In [None]:
study.best_trial

FrozenTrial(number=10, state=TrialState.COMPLETE, values=[0.7564935064935064], datetime_start=datetime.datetime(2024, 7, 1, 22, 31, 5, 151704), datetime_complete=datetime.datetime(2024, 7, 1, 22, 47, 9, 674825), params={'learning_rate': 4.54647473506188e-05, 'num_train_epochs': 4, 'per_device_train_batch_size': 16, 'warmup_steps': 254, 'weight_decay': 0.1580808091745869, 'dropout_rate': 0.10274824157881546}, user_attrs={}, system_attrs={}, intermediate_values={}, distributions={'learning_rate': FloatDistribution(high=5e-05, log=True, low=1e-05, step=None), 'num_train_epochs': IntDistribution(high=5, log=False, low=3, step=1), 'per_device_train_batch_size': CategoricalDistribution(choices=(8, 16)), 'warmup_steps': IntDistribution(high=500, log=False, low=0, step=1), 'weight_decay': FloatDistribution(high=0.3, log=False, low=0.0, step=None), 'dropout_rate': FloatDistribution(high=0.5, log=False, low=0.0, step=None)}, trial_id=10, value=None)

In [None]:
study.best_params

{'learning_rate': 4.54647473506188e-05,
 'num_train_epochs': 4,
 'per_device_train_batch_size': 16,
 'warmup_steps': 254,
 'weight_decay': 0.1580808091745869,
 'dropout_rate': 0.10274824157881546}

In [None]:
args = TrainingArguments(
        output_dir='./results',
        learning_rate=study.best_params["learning_rate"],
        num_train_epochs=study.best_params["num_train_epochs"],
        per_device_train_batch_size=study.best_params["per_device_train_batch_size"],
        per_device_eval_batch_size=study.best_params["per_device_train_batch_size"],
        warmup_steps=study.best_params["warmup_steps"],
        weight_decay=study.best_params["weight_decay"],
        evaluation_strategy="epoch",
        save_strategy="epoch",
        metric_for_best_model='accuracy',
        logging_dir='./logs',
        logging_steps=10,

)



In [None]:
def model_init(trail=None):
        model = AutoModelForSequenceClassification.from_pretrained(MODEL, num_labels=num_labels)
        model.classifier.dropout = torch.nn.Dropout(study.best_params["dropout_rate"])
        return model

trainer = Trainer(
    model_init=model_init,
    args=args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
    #callbacks=[EarlyStoppingCallback(early_stopping_patience=3)]
)

trainer.train()

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dccuchile/bert-base-spanish-wwm-cased and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dccuchile/bert-base-spanish-wwm-cased and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1 Score Positive,Precision Positive,Recall Positive,Precision Macro,Recall Macro,F1 Macro,Precision Micro,Recall Micro,F1 Micro,Confusion Matrix
1,0.5786,0.581148,0.696429,0.746955,0.741935,0.752044,0.684492,0.683251,0.683823,0.696429,0.696429,0.696429,"[[153, 96], [91, 276]]"
2,0.58,0.536648,0.756494,0.799465,0.784777,0.814714,0.747708,0.742698,0.744774,0.756494,0.756494,0.756494,"[[167, 82], [68, 299]]"
3,0.2802,0.640352,0.733766,0.780161,0.76781,0.792916,0.723567,0.719751,0.721356,0.733766,0.733766,0.733766,"[[161, 88], [76, 291]]"
4,0.0661,1.113075,0.727273,0.777188,0.757106,0.798365,0.716981,0.710428,0.712862,0.727273,0.727273,0.727273,"[[155, 94], [74, 293]]"


Trainer is attempting to log a value of "[[153, 96], [91, 276]]" of type <class 'list'> for key "eval/confusion_matrix" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "[[167, 82], [68, 299]]" of type <class 'list'> for key "eval/confusion_matrix" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "[[161, 88], [76, 291]]" of type <class 'list'> for key "eval/confusion_matrix" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "[[155, 94], [74, 293]]" of type <class 'list'> for key "eval/confusion_matrix" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.


TrainOutput(global_step=616, training_loss=0.38907787281197387, metrics={'train_runtime': 970.5213, 'train_samples_per_second': 10.143, 'train_steps_per_second': 0.635, 'total_flos': 2590065228963840.0, 'train_loss': 0.38907787281197387, 'epoch': 4.0})

In [None]:
trainer.evaluate()['eval_accuracy']

Trainer is attempting to log a value of "[[155, 94], [74, 293]]" of type <class 'list'> for key "eval/confusion_matrix" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.


0.7272727272727273

In [None]:
trainer.evaluate()

Trainer is attempting to log a value of "[[155, 94], [74, 293]]" of type <class 'list'> for key "eval/confusion_matrix" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.


{'eval_loss': 1.113074541091919,
 'eval_accuracy': 0.7272727272727273,
 'eval_f1_score_positive': 0.7771883289124669,
 'eval_precision_positive': 0.7571059431524548,
 'eval_recall_positive': 0.7983651226158038,
 'eval_precision_macro': 0.7169809191744807,
 'eval_recall_macro': 0.7104275412275807,
 'eval_f1_macro': 0.7128619468830117,
 'eval_precision_micro': 0.7272727272727273,
 'eval_recall_micro': 0.7272727272727273,
 'eval_f1_micro': 0.7272727272727273,
 'eval_confusion_matrix': [[155, 94], [74, 293]],
 'eval_runtime': 12.6387,
 'eval_samples_per_second': 48.739,
 'eval_steps_per_second': 3.086,
 'epoch': 4.0}

In [None]:
# Download test data
# Please ensure the test_data.json file is in the data directory

Downloading...
From: https://drive.google.com/uc?id=1DAD8DanXohNMlLcpZoTNqeUTABHGM6dR
To: /content/test_data.json
  0% 0.00/146k [00:00<?, ?B/s]100% 146k/146k [00:00<00:00, 134MB/s]


In [None]:
import pandas as pd
import json

file_path = './data/test_data.json'
with open(file_path, 'r') as file:
    data = [json.loads(line) for line in file]

df_test = pd.DataFrame(data)

print(len(df_test))
print(df_test.head())


343
                                       combined_text  gold
0  Cuando entras a las redes sociales y te topas ...     0
1  Cuando tus canciones de rap ya llegan a 100 re...     1
2  NI VÍCTIMAS NI PASIVAS, FEMINISTAS COMBATIVAS ...     1
3  imgflip.com YO CUANDO ME EXIGEN SÉ LA PROFESIO...     1
4  intensa.mx Mira, te llamo para decirte que no ...     0


In [None]:
X_test_final = df_test['combined_text'].values

In [None]:
import pandas as pd
import json

file_path = './data/test_data.json'
with open(file_path, 'r') as file:
    data = [json.loads(line) for line in file]

texts = [entry['combined_text'] for entry in data]
labels = [entry['gold'] for entry in data]

df_test = pd.DataFrame({
    'combined_text': texts,
    'gold': labels,
    'predictions': [0] * len(texts)
})

print(len(df_test))
print(df_test.head())


343
                                       combined_text  gold  predictions
0  Cuando entras a las redes sociales y te topas ...     0            0
1  Cuando tus canciones de rap ya llegan a 100 re...     1            0
2  NI VÍCTIMAS NI PASIVAS, FEMINISTAS COMBATIVAS ...     1            0
3  imgflip.com YO CUANDO ME EXIGEN SÉ LA PROFESIO...     1            0
4  intensa.mx Mira, te llamo para decirte que no ...     0            0


In [None]:
import json
from datasets import Dataset

file_path = './data/test_data.json'
with open(file_path, 'r') as file:
    data = [json.loads(line) for line in file]

texts = [entry['combined_text'] for entry in data]

dummy_labels = [0] * len(texts)
test_dict = dict({'text': texts, 'predictions': dummy_labels})
test_encodings = tokenizer(X_test_final.tolist(), truncation=True, padding=True)
test_dataset = MyDataset(test_encodings, test_dict['predictions'])

In [None]:
test_predictions = trainer.predict(test_dataset)

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
test_preds_raw, test_labels , _ = trainer.predict(test_dataset)
test_preds = np.argmax(test_preds_raw, axis=-1)
print(test_preds)

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


[0 0 1 0 1 1 1 0 1 0 0 1 0 1 1 1 1 0 1 1 0 1 1 0 1 1 1 1 1 1 1 1 1 1 0 1 0
 1 1 1 0 0 1 1 1 1 0 0 0 1 0 1 1 0 1 0 0 1 1 1 1 1 1 0 1 1 0 1 1 1 0 1 0 0
 1 0 0 1 1 1 1 1 0 0 1 1 0 1 1 0 1 0 0 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 0 0 1
 1 0 1 0 1 1 1 1 0 1 1 0 1 1 0 1 1 1 1 0 1 0 1 1 1 1 0 1 1 0 1 1 0 1 1 0 1
 0 0 1 1 0 1 1 0 0 0 1 1 1 1 0 1 1 1 1 1 0 1 0 1 0 1 1 1 0 0 1 1 0 0 1 1 1
 0 1 1 1 0 0 1 1 1 0 0 1 1 1 0 1 0 1 0 0 1 1 1 1 0 1 1 1 1 1 0 1 0 1 1 1 0
 1 1 1 1 0 1 1 0 0 1 0 1 0 1 0 1 1 0 1 1 0 0 0 1 1 0 0 1 0 1 1 1 0 0 1 1 1
 1 0 1 1 0 0 0 1 0 1 1 0 0 1 1 1 1 0 1 0 0 1 1 1 1 1 1 0 0 1 1 1 0 0 0 1 1
 1 1 1 1 1 0 1 1 1 1 1 0 1 1 1 0 1 1 0 1 1 1 0 1 1 1 1 1 1 1 1 0 1 1 0 0 1
 0 0 1 1 0 0 1 0 1 0]


In [None]:
import numpy as np
import pandas as pd


df_predictions = pd.DataFrame({
    'text': texts,
    'predicted_label': test_preds,
    'gold_label': labels
})


df_predictions.to_csv('./results/google-bert-based-uncased-predictions-ablation.csv', index=False)

print("预测结果已经保存到 'predictions.csv' 文件.")


预测结果已经保存到 'predictions.csv' 文件.


In [None]:
with open('./data/spanish_memes_test.json', 'r') as file:
    data_ids = json.load(file)

ids = [entry['id_EXIST'] for entry in data_ids.values()]
values = ["NO" if pred == 0 else "YES" if pred == 1 else None for pred in test_preds]

df_predictions = pd.DataFrame({
    'id': ids,
    'value': values,
    'test_case': 'EXIST2024'
})

df_predictions.to_json('./results/bert-based-uncased-es_test_submission.json', orient='records', lines=True, indent=2)


In [None]:
len(test_preds)

343

In [None]:
len(df_predictions)

343