In [4]:
from transformers import RobertaForSequenceClassification, RobertaTokenizer, Trainer, TrainingArguments
from datasets import load_dataset
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, confusion_matrix

def train_and_evaluate_model(model_name):
    # Cargar el modelo preentrenado
    model = RobertaForSequenceClassification.from_pretrained(model_name, num_labels=2)

    # Cargar el tokenizador
    tokenizer = RobertaTokenizer.from_pretrained(model_name)

    # Cargar el dataset desde los archivos CSV temporales
    dataset = load_dataset('csv', data_files={'train': 'train_temp_stratify.csv', 'test': 'test_temp_stratify.csv'})

    # Funci√≥n de tokenizaci√≥n
    def tokenize_function(examples):
        return tokenizer(examples['text'], padding='max_length', truncation=True)

    # Tokenizar el dataset
    tokenized_datasets = dataset.map(tokenize_function, batched=True)

    # Configuraci√≥n del entrenamiento
    training_args = TrainingArguments(
        output_dir='./results',
        evaluation_strategy="epoch",
        learning_rate=2e-5,
        per_device_train_batch_size=16,
        per_device_eval_batch_size=16,
        num_train_epochs=10,
        weight_decay=0.01,
        logging_dir='./logs'
    )

    # Definir el entrenador
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=tokenized_datasets['train'],
        eval_dataset=tokenized_datasets['test']
    )

    # Entrenar el modelo
    trainer.train()

    # Obtener las predicciones del modelo
    predictions = trainer.predict(tokenized_datasets['test'])

    # Obtener las etiquetas verdaderas y las predicciones
    y_true = predictions.label_ids
    y_pred = predictions.predictions.argmax(-1)

    # Calcular m√©tricas
    accuracy = accuracy_score(y_true, y_pred)
    precision, recall, f1, _ = precision_recall_fscore_support(y_true, y_pred, average='binary')
    conf_matrix = confusion_matrix(y_true, y_pred)
    # Mostrar m√©tricas
    print(f'Accuracy: {accuracy:.4f}')
    print(f'Precision: {precision:.4f}')
    print(f'Recall: {recall:.4f}')
    print(f'F1 Score: {f1:.4f}')
    print(f'Confusion Matrix:\n{conf_matrix}')

    # Extraer valores de la matriz de confusi√≥n
    tn, fp, fn, tp = conf_matrix.ravel()

    print(f'True Negatives: {tn}')
    print(f'False Positives: {fp}')
    print(f'False Negatives: {fn}')
    print(f'True Positives: {tp}')

    # Guardar el modelo y el tokenizador
    model.save_pretrained(model_name)
    tokenizer.save_pretrained(model_name)




  from .autonotebook import tqdm as notebook_tqdm


In [13]:
# Llamar a la funci√≥n con el nombre del modelo deseado
train_and_evaluate_model('PlanTL-GOB-ES/roberta-base-biomedical-clinical-es')


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at PlanTL-GOB-ES/roberta-base-biomedical-clinical-es and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Map: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 80/80 [00:00<00:00, 153.79 examples/s]
Map: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 20/20 [00:00<00:00, 119.50 examples/s]





                                              
 10%|‚ñà         | 5/50 [04:02<31:48, 42.41s/it]

{'eval_loss': 0.6485847234725952, 'eval_runtime': 6.1005, 'eval_samples_per_second': 3.278, 'eval_steps_per_second': 0.328, 'epoch': 1.0}


                                               
 20%|‚ñà‚ñà        | 10/50 [06:55<23:45, 35.64s/it]

{'eval_loss': 0.5991672873497009, 'eval_runtime': 6.3659, 'eval_samples_per_second': 3.142, 'eval_steps_per_second': 0.314, 'epoch': 2.0}


                                               
 30%|‚ñà‚ñà‚ñà       | 15/50 [09:34<18:20, 31.43s/it]

{'eval_loss': 0.5373362898826599, 'eval_runtime': 5.9271, 'eval_samples_per_second': 3.374, 'eval_steps_per_second': 0.337, 'epoch': 3.0}


                                               
 40%|‚ñà‚ñà‚ñà‚ñà      | 20/50 [12:12<15:17, 30.58s/it]

{'eval_loss': 0.5017908215522766, 'eval_runtime': 6.023, 'eval_samples_per_second': 3.321, 'eval_steps_per_second': 0.332, 'epoch': 4.0}


                                               
 50%|‚ñà‚ñà‚ñà‚ñà‚ñà     | 25/50 [14:40<11:44, 28.17s/it]

{'eval_loss': 0.4658777117729187, 'eval_runtime': 5.9426, 'eval_samples_per_second': 3.366, 'eval_steps_per_second': 0.337, 'epoch': 5.0}


                                               
 60%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà    | 30/50 [17:36<11:37, 34.87s/it]

{'eval_loss': 0.4290100634098053, 'eval_runtime': 5.9642, 'eval_samples_per_second': 3.353, 'eval_steps_per_second': 0.335, 'epoch': 6.0}


                                               
 70%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà   | 35/50 [20:50<09:11, 36.75s/it]

{'eval_loss': 0.44942933320999146, 'eval_runtime': 5.9943, 'eval_samples_per_second': 3.337, 'eval_steps_per_second': 0.334, 'epoch': 7.0}


                                               
 80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 40/50 [23:48<05:25, 32.56s/it]

{'eval_loss': 0.44322580099105835, 'eval_runtime': 5.8513, 'eval_samples_per_second': 3.418, 'eval_steps_per_second': 0.342, 'epoch': 8.0}


                                               
 90%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà | 45/50 [26:19<02:21, 28.36s/it]

{'eval_loss': 0.4374488890171051, 'eval_runtime': 6.126, 'eval_samples_per_second': 3.265, 'eval_steps_per_second': 0.326, 'epoch': 9.0}


                                               
100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 50/50 [28:36<00:00, 34.34s/it]


{'eval_loss': 0.43879905343055725, 'eval_runtime': 5.9589, 'eval_samples_per_second': 3.356, 'eval_steps_per_second': 0.336, 'epoch': 10.0}
{'train_runtime': 1716.9757, 'train_samples_per_second': 0.466, 'train_steps_per_second': 0.029, 'train_loss': 0.4716197967529297, 'epoch': 10.0}


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 2/2 [00:01<00:00,  1.86it/s]


Accuracy: 0.8000
Precision: 0.8000
Recall: 0.8000
F1 Score: 0.8000
Confusion Matrix:
[[8 2]
 [2 8]]
True Negatives: 8
False Positives: 2
False Negatives: 2
True Positives: 8


In [14]:
train_and_evaluate_model('PlanTL-GOB-ES/bsc-bio-ehr-es')

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at PlanTL-GOB-ES/bsc-bio-ehr-es and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Map: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 80/80 [00:00<00:00, 145.08 examples/s]
Map: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 20/20 [00:00<00:00, 118.01 examples/s]
 10%|‚ñà         | 5/50 [02:08<19:07, 25.50s/it]
 10%|‚ñà         | 5/50 [02:14<19:07, 25.50s/it]

{'eval_loss': 0.6326601505279541, 'eval_runtime': 5.9363, 'eval_samples_per_second': 3.369, 'eval_steps_per_second': 0.337, 'epoch': 1.0}


 20%|‚ñà‚ñà        | 10/50 [04:43<19:05, 28.64s/it]
 20%|‚ñà‚ñà        | 10/50 [04:49<19:05, 28.64s/it]

{'eval_loss': 0.5768107175827026, 'eval_runtime': 5.8184, 'eval_samples_per_second': 3.437, 'eval_steps_per_second': 0.344, 'epoch': 2.0}


 30%|‚ñà‚ñà‚ñà       | 15/50 [07:11<17:24, 29.84s/it]
 30%|‚ñà‚ñà‚ñà       | 15/50 [07:17<17:24, 29.84s/it]

{'eval_loss': 0.5138899087905884, 'eval_runtime': 5.9943, 'eval_samples_per_second': 3.337, 'eval_steps_per_second': 0.334, 'epoch': 3.0}


 40%|‚ñà‚ñà‚ñà‚ñà      | 20/50 [10:21<18:07, 36.25s/it]
 40%|‚ñà‚ñà‚ñà‚ñà      | 20/50 [10:27<18:07, 36.25s/it]

{'eval_loss': 0.4473888874053955, 'eval_runtime': 5.8483, 'eval_samples_per_second': 3.42, 'eval_steps_per_second': 0.342, 'epoch': 4.0}


 50%|‚ñà‚ñà‚ñà‚ñà‚ñà     | 25/50 [13:13<13:52, 33.29s/it]
 50%|‚ñà‚ñà‚ñà‚ñà‚ñà     | 25/50 [13:19<13:52, 33.29s/it]

{'eval_loss': 0.4004879891872406, 'eval_runtime': 5.986, 'eval_samples_per_second': 3.341, 'eval_steps_per_second': 0.334, 'epoch': 5.0}


 60%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà    | 30/50 [15:46<09:57, 29.89s/it]
 60%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà    | 30/50 [15:52<09:57, 29.89s/it]

{'eval_loss': 0.35330432653427124, 'eval_runtime': 6.0888, 'eval_samples_per_second': 3.285, 'eval_steps_per_second': 0.328, 'epoch': 6.0}


 70%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà   | 35/50 [18:37<08:30, 34.02s/it]
 70%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà   | 35/50 [18:43<08:30, 34.02s/it]

{'eval_loss': 0.32386285066604614, 'eval_runtime': 5.9189, 'eval_samples_per_second': 3.379, 'eval_steps_per_second': 0.338, 'epoch': 7.0}


 80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 40/50 [21:58<06:19, 37.90s/it]
 80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 40/50 [22:04<06:19, 37.90s/it]

{'eval_loss': 0.2984296679496765, 'eval_runtime': 5.8428, 'eval_samples_per_second': 3.423, 'eval_steps_per_second': 0.342, 'epoch': 8.0}


 90%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà | 45/50 [24:58<02:56, 35.27s/it]
 90%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà | 45/50 [25:04<02:56, 35.27s/it]

{'eval_loss': 0.2856622040271759, 'eval_runtime': 5.9071, 'eval_samples_per_second': 3.386, 'eval_steps_per_second': 0.339, 'epoch': 9.0}


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 50/50 [27:37<00:00, 32.93s/it]
100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 50/50 [27:43<00:00, 33.28s/it]


{'eval_loss': 0.27829232811927795, 'eval_runtime': 5.9719, 'eval_samples_per_second': 3.349, 'eval_steps_per_second': 0.335, 'epoch': 10.0}
{'train_runtime': 1663.8055, 'train_samples_per_second': 0.481, 'train_steps_per_second': 0.03, 'train_loss': 0.3824604034423828, 'epoch': 10.0}


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 2/2 [00:01<00:00,  1.85it/s]


Accuracy: 0.8500
Precision: 0.8889
Recall: 0.8000
F1 Score: 0.8421
Confusion Matrix:
[[9 1]
 [2 8]]
True Negatives: 9
False Positives: 1
False Negatives: 2
True Positives: 8


In [15]:
train_and_evaluate_model('PlanTL-GOB-ES/bsc-bio-es')

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at PlanTL-GOB-ES/bsc-bio-es and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Map: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 80/80 [00:00<00:00, 148.96 examples/s]
Map: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 20/20 [00:00<00:00, 114.70 examples/s]
 10%|‚ñà         | 5/50 [01:54<16:58, 22.64s/it]
 10%|‚ñà         | 5/50 [02:00<16:58, 22.64s/it]

{'eval_loss': 0.6342555284500122, 'eval_runtime': 5.8925, 'eval_samples_per_second': 3.394, 'eval_steps_per_second': 0.339, 'epoch': 1.0}


 20%|‚ñà‚ñà        | 10/50 [03:56<15:10, 22.77s/it]
 20%|‚ñà‚ñà        | 10/50 [04:02<15:10, 22.77s/it]

{'eval_loss': 0.5935845971107483, 'eval_runtime': 6.0286, 'eval_samples_per_second': 3.318, 'eval_steps_per_second': 0.332, 'epoch': 2.0}


 30%|‚ñà‚ñà‚ñà       | 15/50 [06:50<18:33, 31.82s/it]
 30%|‚ñà‚ñà‚ñà       | 15/50 [06:56<18:33, 31.82s/it]

{'eval_loss': 0.5344194769859314, 'eval_runtime': 5.7936, 'eval_samples_per_second': 3.452, 'eval_steps_per_second': 0.345, 'epoch': 3.0}


 40%|‚ñà‚ñà‚ñà‚ñà      | 20/50 [09:20<14:06, 28.22s/it]
 40%|‚ñà‚ñà‚ñà‚ñà      | 20/50 [09:26<14:06, 28.22s/it]

{'eval_loss': 0.48782461881637573, 'eval_runtime': 5.8873, 'eval_samples_per_second': 3.397, 'eval_steps_per_second': 0.34, 'epoch': 4.0}


 50%|‚ñà‚ñà‚ñà‚ñà‚ñà     | 25/50 [11:41<11:21, 27.26s/it]
 50%|‚ñà‚ñà‚ñà‚ñà‚ñà     | 25/50 [11:47<11:21, 27.26s/it]

{'eval_loss': 0.46082741022109985, 'eval_runtime': 5.889, 'eval_samples_per_second': 3.396, 'eval_steps_per_second': 0.34, 'epoch': 5.0}


 60%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà    | 30/50 [14:00<09:11, 27.58s/it]
 60%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà    | 30/50 [14:06<09:11, 27.58s/it]

{'eval_loss': 0.4403974413871765, 'eval_runtime': 5.8368, 'eval_samples_per_second': 3.427, 'eval_steps_per_second': 0.343, 'epoch': 6.0}


 70%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà   | 35/50 [16:27<07:12, 28.84s/it]
 70%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà   | 35/50 [16:33<07:12, 28.84s/it]

{'eval_loss': 0.46200188994407654, 'eval_runtime': 5.8315, 'eval_samples_per_second': 3.43, 'eval_steps_per_second': 0.343, 'epoch': 7.0}


 80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 40/50 [19:01<04:47, 28.76s/it]
 80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 40/50 [19:07<04:47, 28.76s/it]

{'eval_loss': 0.4429369866847992, 'eval_runtime': 5.8515, 'eval_samples_per_second': 3.418, 'eval_steps_per_second': 0.342, 'epoch': 8.0}


 90%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà | 45/50 [21:31<02:24, 28.81s/it]
 90%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà | 45/50 [21:37<02:24, 28.81s/it]

{'eval_loss': 0.4336150586605072, 'eval_runtime': 6.6131, 'eval_samples_per_second': 3.024, 'eval_steps_per_second': 0.302, 'epoch': 9.0}


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 50/50 [23:43<00:00, 25.46s/it]
100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 50/50 [23:49<00:00, 28.60s/it]


{'eval_loss': 0.437967449426651, 'eval_runtime': 5.9815, 'eval_samples_per_second': 3.344, 'eval_steps_per_second': 0.334, 'epoch': 10.0}
{'train_runtime': 1429.9435, 'train_samples_per_second': 0.559, 'train_steps_per_second': 0.035, 'train_loss': 0.4726178741455078, 'epoch': 10.0}


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 2/2 [00:01<00:00,  1.86it/s]


Accuracy: 0.8000
Precision: 0.8000
Recall: 0.8000
F1 Score: 0.8000
Confusion Matrix:
[[8 2]
 [2 8]]
True Negatives: 8
False Positives: 2
False Negatives: 2
True Positives: 8


In [5]:
import pandas as pd
from sklearn.model_selection import train_test_split


data = pd.read_csv('../DATASET CLASIFICADO/dataset_large.csv')

train_data, test_data = train_test_split(data, test_size=0.2, random_state=42, stratify=data['Label'])

# Guardar los conjuntos divididos en archivos CSV temporales
train_data.to_csv('train_temp_large_stratify.csv', index=False)
test_data.to_csv('test_temp_large_stratify.csv', index=False)


In [6]:
def train_and_evaluate_model_large(model_name):
    # Cargar el modelo preentrenado
    model = RobertaForSequenceClassification.from_pretrained(model_name, num_labels=2)

    # Cargar el tokenizador
    tokenizer = RobertaTokenizer.from_pretrained(model_name)

    # Cargar el dataset desde los archivos CSV temporales
    dataset = load_dataset('csv', data_files={'train': 'train_temp_large_stratify.csv', 'test': 'test_temp_large_stratify.csv'})

    # Funci√≥n de tokenizaci√≥n
    def tokenize_function(examples):
        return tokenizer(examples['Text'], padding='max_length', truncation=True)

    # Tokenizar el dataset
    tokenized_datasets = dataset.map(tokenize_function, batched=True)

    # Renombrar las columnas para asegurarnos de que las etiquetas est√°n presentes
    tokenized_datasets = tokenized_datasets.rename_column("Label", "labels")

    # Configuraci√≥n del entrenamiento
    training_args = TrainingArguments(
        output_dir='./results',
        evaluation_strategy="epoch",
        learning_rate=2e-5,
        per_device_train_batch_size=16,
        per_device_eval_batch_size=16,
        num_train_epochs=10,
        weight_decay=0.01,
        logging_dir='./logs'
    )

    # Definir el entrenador
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=tokenized_datasets['train'],
        eval_dataset=tokenized_datasets['test']
    )

    # Entrenar el modelo
    trainer.train()

    # Obtener las predicciones del modelo
    predictions = trainer.predict(tokenized_datasets['test'])

    # Obtener las etiquetas verdaderas y las predicciones
    y_true = predictions.label_ids
    y_pred = predictions.predictions.argmax(-1)

    # Calcular m√©tricas
    accuracy = accuracy_score(y_true, y_pred)
    precision, recall, f1, _ = precision_recall_fscore_support(y_true, y_pred, average='binary')

    # Mostrar m√©tricas
    print(f'Accuracy: {accuracy:.4f}')
    print(f'Precision: {precision:.4f}')
    print(f'Recall: {recall:.4f}')
    print(f'F1 Score: {f1:.4f}')

    # Guardar el modelo y el tokenizador
    model.save_pretrained(f"{model_name}-large")
    tokenizer.save_pretrained(f"{model_name}-large")


In [7]:
# Llamar a la funci√≥n con el nombre del modelo deseado
train_and_evaluate_model_large('PlanTL-GOB-ES/roberta-base-biomedical-clinical-es')


Generating train split: 325 examples [00:00, 11607.29 examples/s]
Generating test split: 82 examples [00:00, 10262.06 examples/s]
Map: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 325/325 [00:01<00:00, 242.25 examples/s]
Map: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 82/82 [00:00<00:00, 187.74 examples/s]





                                                  
 10%|‚ñà         | 21/210 [09:21<1:07:31, 21.43s/it]

{'eval_loss': 0.2831881046295166, 'eval_runtime': 25.0198, 'eval_samples_per_second': 3.277, 'eval_steps_per_second': 0.24, 'epoch': 1.0}


                                                  
 20%|‚ñà‚ñà        | 42/210 [19:19<59:34, 21.27s/it]

{'eval_loss': 0.14821089804172516, 'eval_runtime': 24.8577, 'eval_samples_per_second': 3.299, 'eval_steps_per_second': 0.241, 'epoch': 2.0}


                                                  
 30%|‚ñà‚ñà‚ñà       | 63/210 [29:09<53:25, 21.81s/it]

{'eval_loss': 0.12969444692134857, 'eval_runtime': 25.0004, 'eval_samples_per_second': 3.28, 'eval_steps_per_second': 0.24, 'epoch': 3.0}


                                                  
 40%|‚ñà‚ñà‚ñà‚ñà      | 84/210 [38:53<43:50, 20.88s/it]

{'eval_loss': 0.16634182631969452, 'eval_runtime': 24.8488, 'eval_samples_per_second': 3.3, 'eval_steps_per_second': 0.241, 'epoch': 4.0}


                                                  
 50%|‚ñà‚ñà‚ñà‚ñà‚ñà     | 105/210 [48:50<37:05, 21.19s/it]

{'eval_loss': 0.15200179815292358, 'eval_runtime': 24.8986, 'eval_samples_per_second': 3.293, 'eval_steps_per_second': 0.241, 'epoch': 5.0}


                                                 
 60%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà    | 126/210 [58:21<27:28, 19.62s/it]

{'eval_loss': 0.19665752351284027, 'eval_runtime': 24.6833, 'eval_samples_per_second': 3.322, 'eval_steps_per_second': 0.243, 'epoch': 6.0}


                                                   
 70%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà   | 147/210 [1:07:57<20:45, 19.77s/it]

{'eval_loss': 0.18835368752479553, 'eval_runtime': 24.8418, 'eval_samples_per_second': 3.301, 'eval_steps_per_second': 0.242, 'epoch': 7.0}


                                                   
 80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 168/210 [1:17:23<14:21, 20.50s/it]

{'eval_loss': 0.20475420355796814, 'eval_runtime': 24.8607, 'eval_samples_per_second': 3.298, 'eval_steps_per_second': 0.241, 'epoch': 8.0}


                                                   
 90%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà | 189/210 [1:27:22<08:03, 23.03s/it]

{'eval_loss': 0.20111684501171112, 'eval_runtime': 24.7385, 'eval_samples_per_second': 3.315, 'eval_steps_per_second': 0.243, 'epoch': 9.0}


                                                   
100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 210/210 [1:37:28<00:00, 27.85s/it]


{'eval_loss': 0.20014069974422455, 'eval_runtime': 25.9517, 'eval_samples_per_second': 3.16, 'eval_steps_per_second': 0.231, 'epoch': 10.0}
{'train_runtime': 5848.238, 'train_samples_per_second': 0.556, 'train_steps_per_second': 0.036, 'train_loss': 0.10056460244315012, 'epoch': 10.0}


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 6/6 [00:20<00:00,  3.43s/it]


Accuracy: 0.9512
Precision: 0.9048
Recall: 0.9048
F1 Score: 0.9048


In [8]:
train_and_evaluate_model_large('PlanTL-GOB-ES/bsc-bio-ehr-es')

Map: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 325/325 [00:01<00:00, 248.88 examples/s]
Map: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 82/82 [00:00<00:00, 191.80 examples/s]
 10%|‚ñà         | 21/210 [09:03<1:01:16, 19.45s/it]
 10%|‚ñà         | 21/210 [09:29<1:01:16, 19.45s/it]

{'eval_loss': 0.15091797709465027, 'eval_runtime': 25.8157, 'eval_samples_per_second': 3.176, 'eval_steps_per_second': 0.232, 'epoch': 1.0}


 20%|‚ñà‚ñà        | 42/210 [18:18<57:41, 20.61s/it]  
 20%|‚ñà‚ñà        | 42/210 [18:44<57:41, 20.61s/it]

{'eval_loss': 0.14807073771953583, 'eval_runtime': 25.9153, 'eval_samples_per_second': 3.164, 'eval_steps_per_second': 0.232, 'epoch': 2.0}


 30%|‚ñà‚ñà‚ñà       | 63/210 [28:11<51:40, 21.09s/it]  
 30%|‚ñà‚ñà‚ñà       | 63/210 [28:37<51:40, 21.09s/it]

{'eval_loss': 0.15885797142982483, 'eval_runtime': 25.6307, 'eval_samples_per_second': 3.199, 'eval_steps_per_second': 0.234, 'epoch': 3.0}


 40%|‚ñà‚ñà‚ñà‚ñà      | 84/210 [37:36<45:06, 21.48s/it]  
 40%|‚ñà‚ñà‚ñà‚ñà      | 84/210 [38:00<45:06, 21.48s/it]

{'eval_loss': 0.18214403092861176, 'eval_runtime': 24.6696, 'eval_samples_per_second': 3.324, 'eval_steps_per_second': 0.243, 'epoch': 4.0}


 50%|‚ñà‚ñà‚ñà‚ñà‚ñà     | 105/210 [47:34<37:21, 21.35s/it] 
 50%|‚ñà‚ñà‚ñà‚ñà‚ñà     | 105/210 [47:59<37:21, 21.35s/it]

{'eval_loss': 0.18754495680332184, 'eval_runtime': 24.6624, 'eval_samples_per_second': 3.325, 'eval_steps_per_second': 0.243, 'epoch': 5.0}


 60%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà    | 126/210 [57:08<28:17, 20.21s/it]
 60%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà    | 126/210 [57:33<28:17, 20.21s/it]

{'eval_loss': 0.1998073309659958, 'eval_runtime': 25.2297, 'eval_samples_per_second': 3.25, 'eval_steps_per_second': 0.238, 'epoch': 6.0}


 70%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà   | 147/210 [1:07:01<21:53, 20.84s/it]
 70%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà   | 147/210 [1:07:27<21:53, 20.84s/it]

{'eval_loss': 0.20879000425338745, 'eval_runtime': 25.5066, 'eval_samples_per_second': 3.215, 'eval_steps_per_second': 0.235, 'epoch': 7.0}


 80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 168/210 [1:16:22<14:12, 20.31s/it]
 80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 168/210 [1:16:48<14:12, 20.31s/it]

{'eval_loss': 0.20973293483257294, 'eval_runtime': 25.7873, 'eval_samples_per_second': 3.18, 'eval_steps_per_second': 0.233, 'epoch': 8.0}


 90%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà | 189/210 [1:25:41<06:58, 19.93s/it]
 90%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà | 189/210 [1:26:07<06:58, 19.93s/it]

{'eval_loss': 0.22559534013271332, 'eval_runtime': 25.8526, 'eval_samples_per_second': 3.172, 'eval_steps_per_second': 0.232, 'epoch': 9.0}


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 210/210 [1:34:46<00:00, 20.19s/it]
100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 210/210 [1:35:13<00:00, 27.21s/it]


{'eval_loss': 0.23101471364498138, 'eval_runtime': 26.0532, 'eval_samples_per_second': 3.147, 'eval_steps_per_second': 0.23, 'epoch': 10.0}
{'train_runtime': 5713.0321, 'train_samples_per_second': 0.569, 'train_steps_per_second': 0.037, 'train_loss': 0.0669925190153576, 'epoch': 10.0}


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 6/6 [00:20<00:00,  3.42s/it]


Accuracy: 0.9512
Precision: 0.9048
Recall: 0.9048
F1 Score: 0.9048


In [9]:
train_and_evaluate_model_large('PlanTL-GOB-ES/bsc-bio-es')

Map: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 325/325 [00:01<00:00, 250.77 examples/s]
Map: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 82/82 [00:00<00:00, 197.03 examples/s]
 10%|‚ñà         | 21/210 [08:13<1:05:37, 20.83s/it]
 10%|‚ñà         | 21/210 [08:38<1:05:37, 20.83s/it]

{'eval_loss': 0.22292762994766235, 'eval_runtime': 24.8409, 'eval_samples_per_second': 3.301, 'eval_steps_per_second': 0.242, 'epoch': 1.0}


 20%|‚ñà‚ñà        | 42/210 [16:33<47:47, 17.07s/it]  
 20%|‚ñà‚ñà        | 42/210 [16:58<47:47, 17.07s/it]

{'eval_loss': 0.14695627987384796, 'eval_runtime': 24.655, 'eval_samples_per_second': 3.326, 'eval_steps_per_second': 0.243, 'epoch': 2.0}


 30%|‚ñà‚ñà‚ñà       | 63/210 [24:47<42:25, 17.32s/it]  
 30%|‚ñà‚ñà‚ñà       | 63/210 [25:12<42:25, 17.32s/it]

{'eval_loss': 0.15568889677524567, 'eval_runtime': 24.7276, 'eval_samples_per_second': 3.316, 'eval_steps_per_second': 0.243, 'epoch': 3.0}


 40%|‚ñà‚ñà‚ñà‚ñà      | 84/210 [32:44<33:52, 16.13s/it]  
 40%|‚ñà‚ñà‚ñà‚ñà      | 84/210 [33:09<33:52, 16.13s/it]

{'eval_loss': 0.16700312495231628, 'eval_runtime': 24.694, 'eval_samples_per_second': 3.321, 'eval_steps_per_second': 0.243, 'epoch': 4.0}


 50%|‚ñà‚ñà‚ñà‚ñà‚ñà     | 105/210 [40:39<30:02, 17.17s/it]
 50%|‚ñà‚ñà‚ñà‚ñà‚ñà     | 105/210 [41:03<30:02, 17.17s/it]

{'eval_loss': 0.171054869890213, 'eval_runtime': 24.7667, 'eval_samples_per_second': 3.311, 'eval_steps_per_second': 0.242, 'epoch': 5.0}


 60%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà    | 126/210 [48:56<23:14, 16.60s/it]
 60%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà    | 126/210 [49:21<23:14, 16.60s/it]

{'eval_loss': 0.18239150941371918, 'eval_runtime': 24.6484, 'eval_samples_per_second': 3.327, 'eval_steps_per_second': 0.243, 'epoch': 6.0}


 70%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà   | 147/210 [14:09:28<1:39:29, 94.76s/it]     
 70%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà   | 147/210 [14:09:54<1:39:29, 94.76s/it]

{'eval_loss': 0.18920879065990448, 'eval_runtime': 26.2921, 'eval_samples_per_second': 3.119, 'eval_steps_per_second': 0.228, 'epoch': 7.0}


 80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 168/210 [14:21:10<18:33, 26.52s/it]  
 80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 168/210 [14:21:36<18:33, 26.52s/it]

{'eval_loss': 0.22810699045658112, 'eval_runtime': 25.8889, 'eval_samples_per_second': 3.167, 'eval_steps_per_second': 0.232, 'epoch': 8.0}


 90%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà | 189/210 [14:31:34<07:23, 21.11s/it]
 90%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà | 189/210 [14:31:59<07:23, 21.11s/it]

{'eval_loss': 0.23574219644069672, 'eval_runtime': 24.8678, 'eval_samples_per_second': 3.297, 'eval_steps_per_second': 0.241, 'epoch': 9.0}


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 210/210 [14:41:22<00:00, 23.66s/it]
100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 210/210 [14:41:47<00:00, 251.94s/it]


{'eval_loss': 0.2346363514661789, 'eval_runtime': 24.728, 'eval_samples_per_second': 3.316, 'eval_steps_per_second': 0.243, 'epoch': 10.0}
{'train_runtime': 52907.0586, 'train_samples_per_second': 0.061, 'train_steps_per_second': 0.004, 'train_loss': 0.10913990565708705, 'epoch': 10.0}


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 6/6 [00:19<00:00,  3.28s/it]


Accuracy: 0.9512
Precision: 0.9474
Recall: 0.8571
F1 Score: 0.9000


In [1]:
from transformers import RobertaForSequenceClassification, RobertaTokenizer, Trainer, TrainingArguments
from datasets import load_dataset
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, confusion_matrix
import time
import torch

def train_and_evaluate_model_large(model_name):
    # Verificar si CUDA est√° disponible y seleccionar el dispositivo
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print(f"Using device: {device}")

    # Cargar el modelo preentrenado
    model = RobertaForSequenceClassification.from_pretrained(model_name, num_labels=2)
    model.to(device)  # Mover el modelo a la GPU si est√° disponible

    # Cargar el tokenizador
    tokenizer = RobertaTokenizer.from_pretrained(model_name)

    # Cargar el dataset desde los archivos CSV temporales
    dataset = load_dataset('csv', data_files={'train': 'train_temp_large_stratify.csv', 'test': 'test_temp_large_stratify.csv'})

    # Funci√≥n de tokenizaci√≥n
    def tokenize_function(examples):
        return tokenizer(examples['Text'], padding='max_length', truncation=True)

    # Tokenizar el dataset
    tokenized_datasets = dataset.map(tokenize_function, batched=True)

    # Renombrar las columnas para asegurarnos de que las etiquetas est√°n presentes
    tokenized_datasets = tokenized_datasets.rename_column("Label", "labels")

    # Configuraci√≥n del entrenamiento
    training_args = TrainingArguments(
        output_dir='./results',
        evaluation_strategy="steps",
        eval_steps=500,  # Evaluar cada 500 pasos
        learning_rate=3e-5,  # Ajuste de la tasa de aprendizaje
        per_device_train_batch_size=8,  # Reducir el tama√±o del lote para ahorrar memoria
        per_device_eval_batch_size=8,
        num_train_epochs=15,  # Ajuste del n√∫mero de √©pocas
        weight_decay=0.01,  # Ajuste del decay de peso
        logging_dir='./logs',
        logging_steps=50,  # Registrar cada 50 pasos
        save_steps=500,  # Guardar cada 500 pasos
        save_total_limit=3,  # Mantener solo los 3 √∫ltimos checkpoints
        fp16=True,  # Utilizar precisi√≥n mixta para reducir el uso de memoria
        gradient_accumulation_steps=4  # Acumular gradientes para simular un tama√±o de lote mayor
    )

    # Definir el entrenador
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=tokenized_datasets['train'],
        eval_dataset=tokenized_datasets['test'],
        compute_metrics=compute_metrics  # A√±adir funci√≥n para calcular m√©tricas personalizadas
    )

    # Entrenar el modelo
    trainer.train()

    # Obtener las predicciones del modelo
    predictions = trainer.predict(tokenized_datasets['test'])

    # Obtener las etiquetas verdaderas y las predicciones
    y_true = predictions.label_ids
    y_pred = predictions.predictions.argmax(-1)

    # Calcular m√©tricas
    accuracy = accuracy_score(y_true, y_pred)
    precision, recall, f1, _ = precision_recall_fscore_support(y_true, y_pred, average='binary')
    conf_matrix = confusion_matrix(y_true, y_pred)
    
    # Mostrar m√©tricas
    print(f'Accuracy: {accuracy:.4f}')
    print(f'Precision: {precision:.4f}')
    print(f'Recall: {recall:.4f}')
    print(f'F1 Score: {f1:.4f}')
    print(f'Confusion Matrix:\n{conf_matrix}')

    # Extraer valores de la matriz de confusi√≥n
    tn, fp, fn, tp = conf_matrix.ravel()

    print(f'True Negatives: {tn}')
    print(f'False Positives: {fp}')
    print(f'False Negatives: {fn}')
    print(f'True Positives: {tp}')

    # Generar un nombre √∫nico para el guardado del modelo
    timestamp = time.strftime("%Y%m%d-%H%M%S")
    save_directory = f"{model_name}-large-{timestamp}"

    # Guardar el modelo y el tokenizador
    model.save_pretrained(save_directory)
    tokenizer.save_pretrained(save_directory)

def compute_metrics(p):
    preds = p.predictions.argmax(-1)
    accuracy = accuracy_score(p.label_ids, preds)
    precision, recall, f1, _ = precision_recall_fscore_support(p.label_ids, preds, average='binary')
    return {
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'f1': f1,
    }



  from .autonotebook import tqdm as notebook_tqdm


In [2]:
train_and_evaluate_model_large('PlanTL-GOB-ES/roberta-base-biomedical-clinical-es')

Using device: cuda







 33%|‚ñà‚ñà‚ñà‚ñé      | 50/150 [00:52<01:39,  1.01it/s]

{'loss': 0.205, 'grad_norm': 0.21594415605068207, 'learning_rate': 1.9999999999999998e-05, 'epoch': 4.88}


 67%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã   | 100/150 [01:41<00:48,  1.02it/s]

{'loss': 0.0167, 'grad_norm': 0.06789407879114151, 'learning_rate': 9.999999999999999e-06, 'epoch': 9.76}


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 150/150 [02:30<00:00,  1.00s/it]


{'loss': 0.0044, 'grad_norm': 0.8298442959785461, 'learning_rate': 0.0, 'epoch': 14.63}
{'train_runtime': 150.2359, 'train_samples_per_second': 32.449, 'train_steps_per_second': 0.998, 'train_loss': 0.07537646303574244, 'epoch': 14.63}


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 11/11 [00:00<00:00, 16.41it/s]


Accuracy: 0.9512
Precision: 0.9048
Recall: 0.9048
F1 Score: 0.9048
Confusion Matrix:
[[59  2]
 [ 2 19]]
True Negatives: 59
False Positives: 2
False Negatives: 2
True Positives: 19


In [3]:
train_and_evaluate_model_large('PlanTL-GOB-ES/bsc-bio-ehr-es')

Using device: cuda


Map: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 325/325 [00:01<00:00, 243.38 examples/s]
Map: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 82/82 [00:00<00:00, 185.81 examples/s]
 33%|‚ñà‚ñà‚ñà‚ñé      | 50/150 [00:49<01:38,  1.02it/s]

{'loss': 0.1341, 'grad_norm': 0.1790304183959961, 'learning_rate': 2.02e-05, 'epoch': 4.88}


 67%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã   | 100/150 [01:38<00:48,  1.02it/s]

{'loss': 0.0275, 'grad_norm': 0.08087718486785889, 'learning_rate': 1.02e-05, 'epoch': 9.76}


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 150/150 [02:27<00:00,  1.02it/s]


{'loss': 0.0065, 'grad_norm': 0.08358662575483322, 'learning_rate': 2.0000000000000002e-07, 'epoch': 14.63}
{'train_runtime': 147.4722, 'train_samples_per_second': 33.057, 'train_steps_per_second': 1.017, 'train_loss': 0.05604925930500031, 'epoch': 14.63}


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 11/11 [00:00<00:00, 16.27it/s]


Accuracy: 0.9634
Precision: 0.9500
Recall: 0.9048
F1 Score: 0.9268
Confusion Matrix:
[[60  1]
 [ 2 19]]
True Negatives: 60
False Positives: 1
False Negatives: 2
True Positives: 19


In [4]:
train_and_evaluate_model_large('PlanTL-GOB-ES/bsc-bio-es')

Using device: cuda


Map: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 325/325 [00:01<00:00, 260.86 examples/s]
Map: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 82/82 [00:00<00:00, 189.88 examples/s]
 33%|‚ñà‚ñà‚ñà‚ñé      | 50/150 [00:49<01:37,  1.02it/s]

{'loss': 0.2015, 'grad_norm': 0.199437215924263, 'learning_rate': 1.9999999999999998e-05, 'epoch': 4.88}


 67%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã   | 100/150 [01:37<00:48,  1.02it/s]

{'loss': 0.038, 'grad_norm': 0.08665261417627335, 'learning_rate': 1.02e-05, 'epoch': 9.76}


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 150/150 [02:26<00:00,  1.02it/s]


{'loss': 0.0194, 'grad_norm': 0.08960975706577301, 'learning_rate': 2.0000000000000002e-07, 'epoch': 14.63}
{'train_runtime': 146.6529, 'train_samples_per_second': 33.242, 'train_steps_per_second': 1.023, 'train_loss': 0.08629438916842143, 'epoch': 14.63}


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 11/11 [00:00<00:00, 16.47it/s]


Accuracy: 0.9512
Precision: 0.9474
Recall: 0.8571
F1 Score: 0.9000
Confusion Matrix:
[[60  1]
 [ 3 18]]
True Negatives: 60
False Positives: 1
False Negatives: 3
True Positives: 18


In [10]:
from transformers import RobertaForSequenceClassification, RobertaTokenizer, Trainer, TrainingArguments
from datasets import load_dataset
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, confusion_matrix
from transformers import RobertaForSequenceClassification, RobertaTokenizer, Trainer, TrainingArguments

import time
import torch


def train_and_evaluate_model(model_name):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print(f"Using device: {device}")

    # Cargar el modelo preentrenado
    model = RobertaForSequenceClassification.from_pretrained(model_name, num_labels=2)
    model.to(device)  # Mover el modelo a la GPU si est√° disponible

    # Cargar el tokenizador
    tokenizer = RobertaTokenizer.from_pretrained(model_name)

    # Cargar el dataset desde los archivos CSV temporales
    dataset = load_dataset('csv', data_files={'train': 'train_temp_stratify.csv', 'test': 'test_temp_stratify.csv'})

    # Funci√≥n de tokenizaci√≥n
    def tokenize_function(examples):
        return tokenizer(examples['text'], padding='max_length', truncation=True)

    # Tokenizar el dataset
    tokenized_datasets = dataset.map(tokenize_function, batched=True)

    # Configuraci√≥n del entrenamiento
    training_args = TrainingArguments(
        output_dir='./results',
        evaluation_strategy="steps",
        eval_steps=500,  # Evaluar cada 500 pasos
        learning_rate=3e-5,  # Ajuste de la tasa de aprendizaje
        per_device_train_batch_size=8,  # Reducir el tama√±o del lote para ahorrar memoria
        per_device_eval_batch_size=8,
        num_train_epochs=15,  # Ajuste del n√∫mero de √©pocas
        weight_decay=0.01,  # Ajuste del decay de peso
        logging_dir='./logs',
        logging_steps=50,  # Registrar cada 50 pasos
        save_steps=500,  # Guardar cada 500 pasos
        save_total_limit=3,  # Mantener solo los 3 √∫ltimos checkpoints
        fp16=True,  # Utilizar precisi√≥n mixta para reducir el uso de memoria
        gradient_accumulation_steps=4  # Acumular gradientes para simular un tama√±o de lote mayor
    )

    # Definir el entrenador
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=tokenized_datasets['train'],
        eval_dataset=tokenized_datasets['test'],
        compute_metrics=compute_metrics  # A√±adir funci√≥n para calcular m√©tricas personalizadas
    )

    # Entrenar el modelo
    trainer.train()

    # Obtener las predicciones del modelo
    predictions = trainer.predict(tokenized_datasets['test'])

    # Obtener las etiquetas verdaderas y las predicciones
    y_true = predictions.label_ids
    y_pred = predictions.predictions.argmax(-1)

    # Calcular m√©tricas
    accuracy = accuracy_score(y_true, y_pred)
    precision, recall, f1, _ = precision_recall_fscore_support(y_true, y_pred, average='binary')
    conf_matrix = confusion_matrix(y_true, y_pred)
    
    # Mostrar m√©tricas
    print(f'Accuracy: {accuracy:.4f}')
    print(f'Precision: {precision:.4f}')
    print(f'Recall: {recall:.4f}')
    print(f'F1 Score: {f1:.4f}')
    print(f'Confusion Matrix:\n{conf_matrix}')

    # Extraer valores de la matriz de confusi√≥n
    tn, fp, fn, tp = conf_matrix.ravel()

    print(f'True Negatives: {tn}')
    print(f'False Positives: {fp}')
    print(f'False Negatives: {fn}')
    print(f'True Positives: {tp}')

    # Generar un nombre √∫nico para el guardado del modelo
    timestamp = time.strftime("%Y%m%d-%H%M%S")
    save_directory = f"{model_name}-{timestamp}"

    # Guardar el modelo y el tokenizador
    model.save_pretrained(save_directory)
    tokenizer.save_pretrained(save_directory)

def compute_metrics(p):
    preds = p.predictions.argmax(-1)
    accuracy = accuracy_score(p.label_ids, preds)
    precision, recall, f1, _ = precision_recall_fscore_support(p.label_ids, preds, average='binary')
    return {
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'f1': f1,
    }


In [11]:
train_and_evaluate_model('PlanTL-GOB-ES/roberta-base-biomedical-clinical-es')

Using device: cuda


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 30/30 [00:29<00:00,  1.00it/s]


{'train_runtime': 29.913, 'train_samples_per_second': 40.116, 'train_steps_per_second': 1.003, 'train_loss': 0.12969179153442384, 'epoch': 12.0}


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 3/3 [00:00<00:00, 26.43it/s]


Accuracy: 0.6500
Precision: 0.6154
Recall: 0.8000
F1 Score: 0.6957
Confusion Matrix:
[[5 5]
 [2 8]]
True Negatives: 5
False Positives: 5
False Negatives: 2
True Positives: 8


In [12]:
train_and_evaluate_model('PlanTL-GOB-ES/bsc-bio-ehr-es')

Using device: cuda


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 30/30 [00:30<00:00,  1.00s/it]


{'train_runtime': 30.0983, 'train_samples_per_second': 39.869, 'train_steps_per_second': 0.997, 'train_loss': 0.08999994595845541, 'epoch': 12.0}


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 3/3 [00:00<00:00, 27.00it/s]


Accuracy: 0.9000
Precision: 0.8333
Recall: 1.0000
F1 Score: 0.9091
Confusion Matrix:
[[ 8  2]
 [ 0 10]]
True Negatives: 8
False Positives: 2
False Negatives: 0
True Positives: 10


In [13]:
train_and_evaluate_model('PlanTL-GOB-ES/bsc-bio-es')

Using device: cuda


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 30/30 [00:30<00:00,  1.00s/it]


{'train_runtime': 30.1246, 'train_samples_per_second': 39.835, 'train_steps_per_second': 0.996, 'train_loss': 0.13567263285319012, 'epoch': 12.0}


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 3/3 [00:00<00:00, 27.07it/s]


Accuracy: 0.7000
Precision: 0.6250
Recall: 1.0000
F1 Score: 0.7692
Confusion Matrix:
[[ 4  6]
 [ 0 10]]
True Negatives: 4
False Positives: 6
False Negatives: 0
True Positives: 10
