In [1]:
%pip install ./humor-detection ipywidgets==8.1.5 --q

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 25.0.1 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [2]:
from humor_detection.encoder import classification_model, detection_model, load_model
from humor_detection.test import test_classification, test_detection
from humor_detection.train import train_classification, train_detection
from humor_detection.predict import predict_classification, predict_detection
from humor_detection.utils import set_random_seeds
from IPython.display import display
from transformers.training_args import TrainingArguments

# Nombre del modelo en HuggingFace
model_name = "distilbert/distilbert-base-multilingual-cased"
# Carpeta para agrupar y guardar modelos de clasificación y detección
save_path = "./models/distilbert"
# Argumentos que nunca van a cambiar en entrenamiento/pruebas, dependen de sus GPUs
default_arguments = {
    "bf16": True,
    "bf16_full_eval": True,
    "disable_tqdm": False,
    "per_device_eval_batch_size": 150,
    "per_device_train_batch_size": 150,
}
# Prompts para predicciones
prompts = [
    "¿Cuál es el último animal que subió al arca de Noé? El del-fin.",  # Humor
    "El otro día unas chicas llamarón a mi puerta y me pidieron una pequeña donación para una piscina local.\nLes di un garrafa de agua.",  # Humor
    "The brain surgeon changed my life. He really opened my mind.",  # No humor
    "djasndoasndoa",  # No humor
    "jajaja",  # No humor
]

In [3]:
# Tarea de clasificación 1 a 5 (Los labels son 0 a 4).
def run_classification(full_dataset: bool):
    # Si bien al importar ya se ejecuta este llamado, cuando de usa jupyter es necesario llamarlo en cada celda
    set_random_seeds()
    # Función para crear el modelo, tokenizador y añadir un lora si es necesario
    model, tokenizer = classification_model(model_name)
    # Ajustes de trainer de Transformers https://huggingface.co/docs/transformers/v4.51.3/en/main_classes/trainer#transformers.TrainingArguments
    #  Lo más importante es usar bf16 o fp16 para VRAM, batch_sizes para la velocidad y train_epochs para los epochs
    arguments = TrainingArguments(
        num_train_epochs=10,
        lr_scheduler_type="cosine_with_restarts",
        max_grad_norm=0.01,
        **default_arguments,
    )
    # Entrenamiento con datos en español, Con full_dataset=True entrenan el modelo final, english_data=True añade el dataset en inglés
    train_logs, metrics = train_classification(
        model,
        tokenizer,
        arguments,
        full_dataset=full_dataset,  # Entrenamiento final
        english_data=False,  # Usar dataset de StupidStuff (esto se va a remover con las traducciones)
        class_weights=[1, 1.25, 1.25, 2, 4],  # Pesos de clases para desbalance
        sample=False,  # Parámetro par "under" o "over" sample, por el momento no se puede modificar el factor de mágnitud así que no da buenos resultados,
        best_model_metric="macro_f1",  # "macro_f1" por defecto, "weighted_f1" o "accuracy" para guardar el mejor epoch del modelo con la mejor métrica seleccionada
        save_path=(
            f"{save_path}/classification" if full_dataset else None
        ),  # Dónde guardar los logs de entrenamiento y el modelo final entrenado, puede ser None si no es necesario
    )
    display(train_logs)
    display(metrics)
    # Función para cargar el modelo guardado
    model, _ = load_model(model_name, f"{save_path}/classification")
    if not full_dataset:
        # Recolección de datos de test con dataset hecho por nosotros en el caso de full_dataset ya se hace en train
        display(test_classification(model, tokenizer, arguments))
    # Predicción manual de prompts
    display(predict_classification(model, tokenizer, prompts, arguments))
    # Función para guardar el modelo manualmente (Borra las carpetas antiguas por lo que puede eliminar las métricas)
    # save_model(model, path)

In [4]:
run_classification(True)

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert/distilbert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/9234 [00:00<?, ? examples/s]

Map:   0%|          | 0/1178 [00:00<?, ? examples/s]

Epoch,Training Loss,Validation Loss,0 Precision,0 Recall,0 F1-score,0 Support,1 Precision,1 Recall,1 F1-score,1 Support,2 Precision,2 Recall,2 F1-score,2 Support,3 Precision,3 Recall,3 F1-score,3 Support,4 Precision,4 Recall,4 F1-score,4 Support,Accuracy,Macro Avg Precision,Macro Avg Recall,Macro Avg F1-score,Macro Avg Support,Weighted Avg Precision,Weighted Avg Recall,Weighted Avg F1-score,Weighted Avg Support
1,1.5691,1.495356,0.446429,0.192308,0.268817,520.0,0.300529,0.791086,0.435583,359.0,0.111111,0.004505,0.008658,222.0,0.0,0.0,0.0,65.0,0.0,0.0,0.0,12.0,0.326825,0.171614,0.19758,0.142612,1178.0,0.309592,0.326825,0.25304,1178.0
2,1.5521,1.620178,0.5625,0.121154,0.199367,520.0,0.2,0.047354,0.076577,359.0,0.194274,0.855856,0.316667,222.0,0.0,0.0,0.0,65.0,0.0,0.0,0.0,12.0,0.229202,0.191355,0.204873,0.118522,1178.0,0.345865,0.229202,0.17102,1178.0
3,1.4694,1.653001,0.513966,0.176923,0.263233,520.0,0.288703,0.384401,0.329749,359.0,0.195545,0.355856,0.252396,222.0,0.016949,0.015385,0.016129,65.0,0.017241,0.083333,0.028571,12.0,0.264007,0.206481,0.20318,0.178016,1178.0,0.352824,0.264007,0.265437,1178.0
4,1.2826,1.782049,0.497696,0.207692,0.29308,520.0,0.254098,0.259053,0.256552,359.0,0.216606,0.27027,0.240481,222.0,0.067416,0.276923,0.108434,65.0,0.039216,0.166667,0.063492,12.0,0.23854,0.215006,0.236121,0.192408,1178.0,0.342073,0.23854,0.259508,1178.0
5,1.0654,1.954202,0.512048,0.163462,0.247813,520.0,0.252226,0.236769,0.244253,359.0,0.190065,0.396396,0.256934,222.0,0.081395,0.215385,0.118143,65.0,0.025,0.083333,0.038462,12.0,0.231749,0.212147,0.219069,0.181121,1178.0,0.343463,0.231749,0.23916,1178.0
6,0.8878,2.034287,0.520179,0.223077,0.312248,520.0,0.266839,0.286908,0.27651,359.0,0.226766,0.274775,0.248473,222.0,0.07722,0.307692,0.123457,65.0,0.04878,0.166667,0.075472,12.0,0.256367,0.227957,0.251824,0.207232,1178.0,0.358434,0.256367,0.276509,1178.0
7,0.75,2.255198,0.502283,0.211538,0.2977,520.0,0.253687,0.239554,0.246418,359.0,0.231034,0.301802,0.261719,222.0,0.081181,0.338462,0.130952,65.0,0.033898,0.166667,0.056338,12.0,0.243633,0.220417,0.251605,0.198625,1178.0,0.347397,0.243633,0.263631,1178.0
8,0.6571,2.271823,0.486034,0.167308,0.248927,520.0,0.274648,0.325905,0.298089,359.0,0.206897,0.351351,0.260434,222.0,0.08,0.215385,0.116667,65.0,0.047619,0.083333,0.060606,12.0,0.252122,0.219039,0.228656,0.196945,1178.0,0.342138,0.252122,0.256862,1178.0
9,0.6078,2.381421,0.50838,0.175,0.260372,520.0,0.266484,0.270195,0.268326,359.0,0.216049,0.315315,0.25641,222.0,0.072993,0.307692,0.117994,65.0,0.027027,0.083333,0.040816,12.0,0.236842,0.218187,0.230307,0.188784,1178.0,0.350643,0.236842,0.251957,1178.0
10,0.5794,2.364112,0.508021,0.182692,0.268741,520.0,0.26776,0.272981,0.270345,359.0,0.21365,0.324324,0.257603,222.0,0.079365,0.307692,0.126183,65.0,0.027778,0.083333,0.041667,12.0,0.242784,0.219315,0.234205,0.192908,1178.0,0.35078,0.242784,0.256952,1178.0


2025/05/07 10:59:56 INFO mlflow.tracking.fluent: Experiment with name 'test_distilbert/distilbert-base-multilingual-cased' does not exist. Creating a new experiment.


Unnamed: 0,train_loss,loss,accuracy,macro_f1,weighted_f1
0,1.5691,1.495356,0.326825,0.142612,0.25304
1,1.5521,1.620178,0.229202,0.118522,0.17102
2,1.4694,1.653001,0.264007,0.178016,0.265437
3,1.2826,1.782049,0.23854,0.192408,0.259508
4,1.0654,1.954202,0.231749,0.181121,0.23916
5,0.8878,2.034287,0.256367,0.207232,0.276509
6,0.75,2.255198,0.243633,0.198625,0.263631
7,0.6571,2.271823,0.252122,0.196945,0.256862
8,0.6078,2.381421,0.236842,0.188784,0.251957
9,0.5794,2.364112,0.242784,0.192908,0.256952


{'loss': 2.034287452697754,
 '0_precision': 0.5201793721973094,
 '0_recall': 0.2230769230769231,
 '0_f1-score': 0.3122476446837147,
 '0_support': 520.0,
 '1_precision': 0.266839378238342,
 '1_recall': 0.28690807799442897,
 '1_f1-score': 0.276510067114094,
 '1_support': 359.0,
 '2_precision': 0.22676579925650558,
 '2_recall': 0.2747747747747748,
 '2_f1-score': 0.2484725050916497,
 '2_support': 222.0,
 '3_precision': 0.07722007722007722,
 '3_recall': 0.3076923076923077,
 '3_f1-score': 0.12345679012345678,
 '3_support': 65.0,
 '4_precision': 0.04878048780487805,
 '4_recall': 0.16666666666666666,
 '4_f1-score': 0.07547169811320754,
 '4_support': 12.0,
 'accuracy': 0.2563667232597623,
 'macro_avg_precision': 0.22795702294342246,
 'macro_avg_recall': 0.25182375004102026,
 'macro_avg_f1-score': 0.20723174102522454,
 'macro_avg_support': 1178.0,
 'weighted_avg_precision': 0.3584340311019299,
 'weighted_avg_recall': 0.2563667232597623,
 'weighted_avg_f1-score': 0.2765086054288801,
 'weighted_av

Map:   0%|          | 0/5 [00:00<?, ? examples/s]

Unnamed: 0,score_0,score_1,score_2,score_3,score_4,labels
0,0.138818,0.521852,0.193484,0.140729,0.005116,1
1,0.014776,0.285422,0.192373,0.497032,0.010396,3
2,0.028479,0.133697,0.50087,0.228421,0.108533,2
3,0.651193,0.125625,0.105413,0.029984,0.087786,0
4,0.212109,0.189855,0.173796,0.106447,0.317792,4


In [5]:
def run_detection(full_dataset: bool, threshold: float | None):
    set_random_seeds()
    model, tokenizer = detection_model(model_name)
    arguments = TrainingArguments(
        num_train_epochs=4,
        lr_scheduler_type="cosine_with_restarts",
        **default_arguments,
    )
    train_logs, metrics = train_detection(
        model,
        tokenizer,
        arguments,
        full_dataset=full_dataset,
        threshold=threshold,  # Threshold para predecir humor más alto requiere máyor probabiliad (0.75 es un buen valor pero depende del modelo)
        save_path=f"{save_path}/detection" if full_dataset else None,
    )
    display(train_logs)
    display(metrics)
    if not full_dataset:
        display(test_detection(model, tokenizer, arguments, threshold=threshold))
    display(
        predict_detection(model, tokenizer, prompts, arguments, threshold=threshold)
    )

In [6]:
run_detection(True, None)

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert/distilbert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/24000 [00:00<?, ? examples/s]

Map:   0%|          | 0/1991 [00:00<?, ? examples/s]

Epoch,Training Loss,Validation Loss,0 Precision,0 Recall,0 F1-score,0 Support,1 Precision,1 Recall,1 F1-score,1 Support,Accuracy,Macro Avg Precision,Macro Avg Recall,Macro Avg F1-score,Macro Avg Support,Weighted Avg Precision,Weighted Avg Recall,Weighted Avg F1-score,Weighted Avg Support
1,0.3893,0.527266,0.83308,0.675277,0.745924,813.0,0.801802,0.906621,0.850996,1178.0,0.812155,0.817441,0.790949,0.79846,1991.0,0.814574,0.812155,0.808091,1991.0
2,0.2868,0.518757,0.825963,0.712177,0.764861,813.0,0.818605,0.896435,0.855754,1178.0,0.821195,0.822284,0.804306,0.810307,1991.0,0.821609,0.821195,0.818639,1991.0
3,0.1971,0.655579,0.864322,0.634686,0.731915,813.0,0.786944,0.931239,0.853033,1178.0,0.810146,0.825633,0.782963,0.792474,1991.0,0.81854,0.810146,0.803576,1991.0
4,0.1402,0.722282,0.868825,0.627306,0.728571,813.0,0.784188,0.934635,0.852827,1178.0,0.809141,0.826506,0.780971,0.790699,1991.0,0.818748,0.809141,0.802089,1991.0


Unnamed: 0,train_loss,loss,accuracy,macro_f1,weighted_f1
0,0.3893,0.527266,0.812155,0.79846,0.808091
1,0.2868,0.518757,0.821195,0.810307,0.818639
2,0.1971,0.655579,0.810146,0.792474,0.803576
3,0.1402,0.722282,0.809141,0.790699,0.802089


{'loss': 0.5187572240829468,
 '0_precision': 0.8259629101283881,
 '0_recall': 0.7121771217712177,
 '0_f1-score': 0.7648612945838837,
 '0_support': 813.0,
 '1_precision': 0.8186046511627907,
 '1_recall': 0.8964346349745331,
 '1_f1-score': 0.8557536466774717,
 '1_support': 1178.0,
 'accuracy': 0.8211953792064289,
 'macro_avg_precision': 0.8222837806455894,
 'macro_avg_recall': 0.8043058783728754,
 'macro_avg_f1-score': 0.8103074706306777,
 'macro_avg_support': 1991.0,
 'weighted_avg_precision': 0.8216093043717463,
 'weighted_avg_recall': 0.8211953792064289,
 'weighted_avg_f1-score': 0.8186388891425208,
 'weighted_avg_support': 1991.0,
 'runtime': 2.5951,
 'samples_per_second': 767.208,
 'steps_per_second': 5.395,
 'epoch': 2.0}

Map:   0%|          | 0/5 [00:00<?, ? examples/s]

Unnamed: 0,score_0,score_1,labels
0,0.412057,0.587943,1
1,0.177811,0.822189,1
2,0.964855,0.035145,0
3,0.996777,0.003222,0
4,0.996727,0.003273,0
