In [1]:
from datasets import load_from_disk
from transformers import T5ForConditionalGeneration
from transformers import T5Tokenizer
import numpy as np

# We reload the saved dataset that was preprocessed in dataset-prep notebook.
dataset = load_from_disk('preprocessed-dataset-complete')

# We upload the T5 tokenizer
tokenizer = T5Tokenizer.from_pretrained("vgaraujov/t5-base-spanish")

# We upload the T5 pretrained model base
model = T5ForConditionalGeneration.from_pretrained('vgaraujov/t5-base-spanish')
#model = MT5ForConditionalGeneration.from_pretrained('./results/t5-finetuned-only-trees')

You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565


In [2]:
def token_accuracy(pred):
    logits = pred.predictions
    references = pred.label_ids
    predictions = np.argmax(logits, axis=-1)

    token_accuracies = []
    for pred_tokens, ref_tokens in zip(predictions, references):
        correct_tokens = sum(p == r for p, r in zip(pred_tokens, ref_tokens))
        token_accuracy = correct_tokens / len(ref_tokens) if len(ref_tokens) > 0 else 0
        token_accuracies.append(token_accuracy)

    # Calculate F1-score
    return {
        "token_accuracy": sum(token_accuracies) / len(token_accuracies)
    }

In [11]:
from sklearn.metrics import f1_score

def compute_metrics(pred):
    logits = pred.predictions
    references = pred.label_ids
    predictions = np.argmax(logits, axis=-1)

    f1 = f1_score(references, predictions, average="macro")

    return {
        "f1": f1
    }

In [2]:
import torch
def preprocess_logits_for_metrics(logits, labels):
    """
    Original Trainer may have a memory leak. 
    This is a workaround to avoid storing too many tensors that are not needed.
    """
    pred_ids = torch.argmax(logits[0], dim=-1)
    return pred_ids, labels

In [5]:
from transformers import Trainer, TrainingArguments, DataCollatorForSeq2Seq


# Definir los argumentos de entrenamiento
training_args = TrainingArguments(
    output_dir='./results',          # Directorio de salida
    eval_strategy='steps',     # Estrategia de evaluación
    learning_rate=3e-4,              # Tasa de aprendizaje
    per_device_train_batch_size=4,   # Tamaño del batch para entrenamiento
    per_device_eval_batch_size=4,    # Tamaño del batch para evaluación
    weight_decay=0.01,               # Decaimiento de peso
    save_total_limit=3,              # Límite de guardado
    num_train_epochs=5,              # Número de épocas de entrenamiento
    fp16=True,                       # Usar FP16 si está disponible
    logging_steps=100,
    logging_dir='./logs'
)

data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, model=model)

# Crear el entrenador
trainer = Trainer(
    model=model,                         # El modelo
    args=training_args,                  # Argumentos de entrenamiento
    train_dataset=dataset['train'],    # Dataset de entrenamiento
    eval_dataset=dataset['validation'],# Dataset de validación
    tokenizer=tokenizer,                 # Tokenizador
    data_collator=data_collator,
    preprocess_logits_for_metrics=preprocess_logits_for_metrics,
)

In [6]:
# Entrenar el modelo
trainer.train()

# Guardar el modelo y el tokenizador
trainer.save_model('./results/complete')

  0%|          | 0/4645 [00:00<?, ?it/s]

{'loss': 1.449, 'grad_norm': 1.6775729656219482, 'learning_rate': 0.0002935414424111948, 'epoch': 0.11}


  0%|          | 0/36 [00:00<?, ?it/s]

{'eval_loss': 0.7685956954956055, 'eval_runtime': 64.1309, 'eval_samples_per_second': 2.214, 'eval_steps_per_second': 0.561, 'epoch': 0.11}
{'loss': 0.6059, 'grad_norm': 1.3278467655181885, 'learning_rate': 0.00028708288482238965, 'epoch': 0.22}


  0%|          | 0/36 [00:00<?, ?it/s]

{'eval_loss': 0.7397180199623108, 'eval_runtime': 63.608, 'eval_samples_per_second': 2.232, 'eval_steps_per_second': 0.566, 'epoch': 0.22}
{'loss': 0.508, 'grad_norm': 0.9548324346542358, 'learning_rate': 0.00028062432723358446, 'epoch': 0.32}


  0%|          | 0/36 [00:00<?, ?it/s]

{'eval_loss': 0.6178632974624634, 'eval_runtime': 63.1661, 'eval_samples_per_second': 2.248, 'eval_steps_per_second': 0.57, 'epoch': 0.32}
{'loss': 0.4257, 'grad_norm': 0.9381598234176636, 'learning_rate': 0.0002741657696447793, 'epoch': 0.43}


  0%|          | 0/36 [00:00<?, ?it/s]

{'eval_loss': 0.6331713199615479, 'eval_runtime': 62.9778, 'eval_samples_per_second': 2.255, 'eval_steps_per_second': 0.572, 'epoch': 0.43}
{'loss': 0.3728, 'grad_norm': 0.545567512512207, 'learning_rate': 0.00026770721205597413, 'epoch': 0.54}


  0%|          | 0/36 [00:00<?, ?it/s]

{'eval_loss': 0.5643516778945923, 'eval_runtime': 61.7281, 'eval_samples_per_second': 2.3, 'eval_steps_per_second': 0.583, 'epoch': 0.54}
{'loss': 0.3821, 'grad_norm': 0.46359288692474365, 'learning_rate': 0.00026124865446716894, 'epoch': 0.65}


  0%|          | 0/36 [00:00<?, ?it/s]

{'eval_loss': 0.5070772171020508, 'eval_runtime': 69.3279, 'eval_samples_per_second': 2.048, 'eval_steps_per_second': 0.519, 'epoch': 0.65}
{'loss': 0.3353, 'grad_norm': 0.7352139949798584, 'learning_rate': 0.0002547900968783638, 'epoch': 0.75}


  0%|          | 0/36 [00:00<?, ?it/s]

{'eval_loss': 0.5056213736534119, 'eval_runtime': 57.2192, 'eval_samples_per_second': 2.482, 'eval_steps_per_second': 0.629, 'epoch': 0.75}
{'loss': 0.345, 'grad_norm': 0.7274213433265686, 'learning_rate': 0.00024833153928955867, 'epoch': 0.86}


  0%|          | 0/36 [00:00<?, ?it/s]

{'eval_loss': 0.4722718894481659, 'eval_runtime': 73.8674, 'eval_samples_per_second': 1.922, 'eval_steps_per_second': 0.487, 'epoch': 0.86}
{'loss': 0.3112, 'grad_norm': 1.1028333902359009, 'learning_rate': 0.00024187298170075348, 'epoch': 0.97}


  0%|          | 0/36 [00:00<?, ?it/s]

{'eval_loss': 0.46830737590789795, 'eval_runtime': 74.5968, 'eval_samples_per_second': 1.904, 'eval_steps_per_second': 0.483, 'epoch': 0.97}
{'loss': 0.2952, 'grad_norm': 0.9828572869300842, 'learning_rate': 0.0002354144241119483, 'epoch': 1.08}


  0%|          | 0/36 [00:00<?, ?it/s]

{'eval_loss': 0.4520767033100128, 'eval_runtime': 74.3292, 'eval_samples_per_second': 1.91, 'eval_steps_per_second': 0.484, 'epoch': 1.08}
{'loss': 0.2775, 'grad_norm': 0.9398730397224426, 'learning_rate': 0.00022895586652314315, 'epoch': 1.18}


  0%|          | 0/36 [00:00<?, ?it/s]

{'eval_loss': 0.40853869915008545, 'eval_runtime': 77.414, 'eval_samples_per_second': 1.834, 'eval_steps_per_second': 0.465, 'epoch': 1.18}
{'loss': 0.263, 'grad_norm': 1.4869905710220337, 'learning_rate': 0.00022249730893433799, 'epoch': 1.29}


  0%|          | 0/36 [00:00<?, ?it/s]

{'eval_loss': 0.4030405879020691, 'eval_runtime': 61.2288, 'eval_samples_per_second': 2.319, 'eval_steps_per_second': 0.588, 'epoch': 1.29}
{'loss': 0.2431, 'grad_norm': 0.717812180519104, 'learning_rate': 0.00021603875134553282, 'epoch': 1.4}


  0%|          | 0/36 [00:00<?, ?it/s]

{'eval_loss': 0.39047110080718994, 'eval_runtime': 59.9422, 'eval_samples_per_second': 2.369, 'eval_steps_per_second': 0.601, 'epoch': 1.4}
{'loss': 0.2465, 'grad_norm': 0.5883283019065857, 'learning_rate': 0.00020958019375672763, 'epoch': 1.51}


  0%|          | 0/36 [00:00<?, ?it/s]

{'eval_loss': 0.36811426281929016, 'eval_runtime': 64.6873, 'eval_samples_per_second': 2.195, 'eval_steps_per_second': 0.557, 'epoch': 1.51}
{'loss': 0.2272, 'grad_norm': 0.9509590864181519, 'learning_rate': 0.00020312163616792247, 'epoch': 1.61}


  0%|          | 0/36 [00:00<?, ?it/s]

{'eval_loss': 0.3411712646484375, 'eval_runtime': 75.5381, 'eval_samples_per_second': 1.88, 'eval_steps_per_second': 0.477, 'epoch': 1.61}
{'loss': 0.229, 'grad_norm': 0.7389516234397888, 'learning_rate': 0.0001966630785791173, 'epoch': 1.72}


  0%|          | 0/36 [00:00<?, ?it/s]

{'eval_loss': 0.34023311734199524, 'eval_runtime': 73.8041, 'eval_samples_per_second': 1.924, 'eval_steps_per_second': 0.488, 'epoch': 1.72}
{'loss': 0.2359, 'grad_norm': 1.0175586938858032, 'learning_rate': 0.00019020452099031214, 'epoch': 1.83}


  0%|          | 0/36 [00:00<?, ?it/s]

{'eval_loss': 0.3302302956581116, 'eval_runtime': 78.8465, 'eval_samples_per_second': 1.801, 'eval_steps_per_second': 0.457, 'epoch': 1.83}
{'loss': 0.2025, 'grad_norm': 0.6189272999763489, 'learning_rate': 0.00018374596340150698, 'epoch': 1.94}


  0%|          | 0/36 [00:00<?, ?it/s]

{'eval_loss': 0.3111824691295624, 'eval_runtime': 54.758, 'eval_samples_per_second': 2.593, 'eval_steps_per_second': 0.657, 'epoch': 1.94}
{'loss': 0.2156, 'grad_norm': 0.5202583074569702, 'learning_rate': 0.0001772874058127018, 'epoch': 2.05}


  0%|          | 0/36 [00:00<?, ?it/s]

{'eval_loss': 0.31269359588623047, 'eval_runtime': 59.6659, 'eval_samples_per_second': 2.38, 'eval_steps_per_second': 0.603, 'epoch': 2.05}
{'loss': 0.1915, 'grad_norm': 0.9444211721420288, 'learning_rate': 0.00017082884822389665, 'epoch': 2.15}


  0%|          | 0/36 [00:00<?, ?it/s]

{'eval_loss': 0.29869797825813293, 'eval_runtime': 71.9861, 'eval_samples_per_second': 1.973, 'eval_steps_per_second': 0.5, 'epoch': 2.15}
{'loss': 0.1931, 'grad_norm': 0.28051677346229553, 'learning_rate': 0.0001643702906350915, 'epoch': 2.26}


  0%|          | 0/36 [00:00<?, ?it/s]

{'eval_loss': 0.2804013788700104, 'eval_runtime': 75.1228, 'eval_samples_per_second': 1.89, 'eval_steps_per_second': 0.479, 'epoch': 2.26}
{'loss': 0.1762, 'grad_norm': 0.3813159763813019, 'learning_rate': 0.00015791173304628632, 'epoch': 2.37}


  0%|          | 0/36 [00:00<?, ?it/s]

{'eval_loss': 0.2735788822174072, 'eval_runtime': 56.2674, 'eval_samples_per_second': 2.524, 'eval_steps_per_second': 0.64, 'epoch': 2.37}
{'loss': 0.1747, 'grad_norm': 0.767526388168335, 'learning_rate': 0.00015145317545748116, 'epoch': 2.48}


  0%|          | 0/36 [00:00<?, ?it/s]

{'eval_loss': 0.26101070642471313, 'eval_runtime': 54.4877, 'eval_samples_per_second': 2.606, 'eval_steps_per_second': 0.661, 'epoch': 2.48}
{'loss': 0.1553, 'grad_norm': 0.637005627155304, 'learning_rate': 0.000144994617868676, 'epoch': 2.58}


  0%|          | 0/36 [00:00<?, ?it/s]

{'eval_loss': 0.27439644932746887, 'eval_runtime': 54.3506, 'eval_samples_per_second': 2.613, 'eval_steps_per_second': 0.662, 'epoch': 2.58}
{'loss': 0.1705, 'grad_norm': 0.7207756638526917, 'learning_rate': 0.0001385360602798708, 'epoch': 2.69}


  0%|          | 0/36 [00:00<?, ?it/s]

{'eval_loss': 0.25465983152389526, 'eval_runtime': 64.2974, 'eval_samples_per_second': 2.208, 'eval_steps_per_second': 0.56, 'epoch': 2.69}
{'loss': 0.1825, 'grad_norm': 0.8098375201225281, 'learning_rate': 0.00013207750269106564, 'epoch': 2.8}


  0%|          | 0/36 [00:00<?, ?it/s]

{'eval_loss': 0.24782529473304749, 'eval_runtime': 62.8148, 'eval_samples_per_second': 2.261, 'eval_steps_per_second': 0.573, 'epoch': 2.8}
{'loss': 0.172, 'grad_norm': 0.5804417133331299, 'learning_rate': 0.00012561894510226048, 'epoch': 2.91}


  0%|          | 0/36 [00:00<?, ?it/s]

{'eval_loss': 0.2339341938495636, 'eval_runtime': 73.9737, 'eval_samples_per_second': 1.92, 'eval_steps_per_second': 0.487, 'epoch': 2.91}
{'loss': 0.1578, 'grad_norm': 0.7958263754844666, 'learning_rate': 0.00011916038751345532, 'epoch': 3.01}


  0%|          | 0/36 [00:00<?, ?it/s]

{'eval_loss': 0.23189832270145416, 'eval_runtime': 64.4249, 'eval_samples_per_second': 2.204, 'eval_steps_per_second': 0.559, 'epoch': 3.01}
{'loss': 0.1344, 'grad_norm': 0.7088913321495056, 'learning_rate': 0.00011270182992465014, 'epoch': 3.12}


  0%|          | 0/36 [00:00<?, ?it/s]

{'eval_loss': 0.20932748913764954, 'eval_runtime': 59.6909, 'eval_samples_per_second': 2.379, 'eval_steps_per_second': 0.603, 'epoch': 3.12}
{'loss': 0.1313, 'grad_norm': 1.398024320602417, 'learning_rate': 0.00010624327233584499, 'epoch': 3.23}


  0%|          | 0/36 [00:00<?, ?it/s]

{'eval_loss': 0.20702151954174042, 'eval_runtime': 71.1696, 'eval_samples_per_second': 1.995, 'eval_steps_per_second': 0.506, 'epoch': 3.23}
{'loss': 0.1591, 'grad_norm': 0.4742352068424225, 'learning_rate': 9.978471474703983e-05, 'epoch': 3.34}


  0%|          | 0/36 [00:00<?, ?it/s]

{'eval_loss': 0.20262329280376434, 'eval_runtime': 69.6213, 'eval_samples_per_second': 2.04, 'eval_steps_per_second': 0.517, 'epoch': 3.34}
{'loss': 0.1191, 'grad_norm': 0.45089900493621826, 'learning_rate': 9.332615715823465e-05, 'epoch': 3.44}


  0%|          | 0/36 [00:00<?, ?it/s]

{'eval_loss': 0.1970006376504898, 'eval_runtime': 66.6605, 'eval_samples_per_second': 2.13, 'eval_steps_per_second': 0.54, 'epoch': 3.44}
{'loss': 0.1333, 'grad_norm': 0.990903913974762, 'learning_rate': 8.686759956942948e-05, 'epoch': 3.55}


  0%|          | 0/36 [00:00<?, ?it/s]

{'eval_loss': 0.18846863508224487, 'eval_runtime': 62.7014, 'eval_samples_per_second': 2.265, 'eval_steps_per_second': 0.574, 'epoch': 3.55}
{'loss': 0.1395, 'grad_norm': 0.37987130880355835, 'learning_rate': 8.040904198062432e-05, 'epoch': 3.66}


  0%|          | 0/36 [00:00<?, ?it/s]

{'eval_loss': 0.18134506046772003, 'eval_runtime': 59.0093, 'eval_samples_per_second': 2.406, 'eval_steps_per_second': 0.61, 'epoch': 3.66}
{'loss': 0.1223, 'grad_norm': 0.5670006275177002, 'learning_rate': 7.395048439181916e-05, 'epoch': 3.77}


  0%|          | 0/36 [00:00<?, ?it/s]

{'eval_loss': 0.17986665666103363, 'eval_runtime': 55.4916, 'eval_samples_per_second': 2.559, 'eval_steps_per_second': 0.649, 'epoch': 3.77}
{'loss': 0.1462, 'grad_norm': 0.43387657403945923, 'learning_rate': 6.749192680301398e-05, 'epoch': 3.88}


  0%|          | 0/36 [00:00<?, ?it/s]

{'eval_loss': 0.18029819428920746, 'eval_runtime': 51.7528, 'eval_samples_per_second': 2.744, 'eval_steps_per_second': 0.696, 'epoch': 3.88}
{'loss': 0.1265, 'grad_norm': 0.5330030918121338, 'learning_rate': 6.103336921420882e-05, 'epoch': 3.98}


  0%|          | 0/36 [00:00<?, ?it/s]

{'eval_loss': 0.1676575243473053, 'eval_runtime': 56.7868, 'eval_samples_per_second': 2.501, 'eval_steps_per_second': 0.634, 'epoch': 3.98}
{'loss': 0.1121, 'grad_norm': 0.38995450735092163, 'learning_rate': 5.457481162540366e-05, 'epoch': 4.09}


  0%|          | 0/36 [00:00<?, ?it/s]

{'eval_loss': 0.15614382922649384, 'eval_runtime': 63.3897, 'eval_samples_per_second': 2.24, 'eval_steps_per_second': 0.568, 'epoch': 4.09}
{'loss': 0.1135, 'grad_norm': 0.8713639974594116, 'learning_rate': 4.811625403659849e-05, 'epoch': 4.2}


  0%|          | 0/36 [00:00<?, ?it/s]

{'eval_loss': 0.15563558042049408, 'eval_runtime': 68.3743, 'eval_samples_per_second': 2.077, 'eval_steps_per_second': 0.527, 'epoch': 4.2}
{'loss': 0.1001, 'grad_norm': 1.039821982383728, 'learning_rate': 4.165769644779332e-05, 'epoch': 4.31}


  0%|          | 0/36 [00:00<?, ?it/s]

{'eval_loss': 0.1477048099040985, 'eval_runtime': 69.6525, 'eval_samples_per_second': 2.039, 'eval_steps_per_second': 0.517, 'epoch': 4.31}
{'loss': 0.0977, 'grad_norm': 1.1324372291564941, 'learning_rate': 3.519913885898816e-05, 'epoch': 4.41}


  0%|          | 0/36 [00:00<?, ?it/s]

{'eval_loss': 0.15007013082504272, 'eval_runtime': 65.2311, 'eval_samples_per_second': 2.177, 'eval_steps_per_second': 0.552, 'epoch': 4.41}
{'loss': 0.1071, 'grad_norm': 0.5502436757087708, 'learning_rate': 2.874058127018299e-05, 'epoch': 4.52}


  0%|          | 0/36 [00:00<?, ?it/s]

{'eval_loss': 0.14595066010951996, 'eval_runtime': 71.7842, 'eval_samples_per_second': 1.978, 'eval_steps_per_second': 0.502, 'epoch': 4.52}
{'loss': 0.0889, 'grad_norm': 0.28745922446250916, 'learning_rate': 2.2282023681377823e-05, 'epoch': 4.63}


  0%|          | 0/36 [00:00<?, ?it/s]

{'eval_loss': 0.14152459800243378, 'eval_runtime': 65.9759, 'eval_samples_per_second': 2.152, 'eval_steps_per_second': 0.546, 'epoch': 4.63}
{'loss': 0.0991, 'grad_norm': 0.4818344712257385, 'learning_rate': 1.5823466092572657e-05, 'epoch': 4.74}


  0%|          | 0/36 [00:00<?, ?it/s]

{'eval_loss': 0.13864074647426605, 'eval_runtime': 57.2059, 'eval_samples_per_second': 2.482, 'eval_steps_per_second': 0.629, 'epoch': 4.74}
{'loss': 0.1098, 'grad_norm': 0.8002377152442932, 'learning_rate': 9.364908503767491e-06, 'epoch': 4.84}


  0%|          | 0/36 [00:00<?, ?it/s]

{'eval_loss': 0.1351364552974701, 'eval_runtime': 80.4721, 'eval_samples_per_second': 1.765, 'eval_steps_per_second': 0.447, 'epoch': 4.84}
{'loss': 0.1004, 'grad_norm': 1.3744999170303345, 'learning_rate': 2.906350914962325e-06, 'epoch': 4.95}


  0%|          | 0/36 [00:00<?, ?it/s]

{'eval_loss': 0.13297851383686066, 'eval_runtime': 67.7068, 'eval_samples_per_second': 2.097, 'eval_steps_per_second': 0.532, 'epoch': 4.95}
{'train_runtime': 34326.9713, 'train_samples_per_second': 0.541, 'train_steps_per_second': 0.135, 'train_loss': 0.2351704702182273, 'epoch': 5.0}


In [8]:
results = trainer.evaluate(eval_dataset=dataset['test'])
print(results)

  0%|          | 0/36 [00:00<?, ?it/s]

{'eval_loss': 0.10755975544452667, 'eval_runtime': 64.0738, 'eval_samples_per_second': 2.232, 'eval_steps_per_second': 0.562, 'epoch': 5.0}


In [7]:
trainer.state.log_history

[{'loss': 1.449,
  'grad_norm': 1.6775729656219482,
  'learning_rate': 0.0002935414424111948,
  'epoch': 0.10764262648008611,
  'step': 100},
 {'eval_loss': 0.7685956954956055,
  'eval_runtime': 64.1309,
  'eval_samples_per_second': 2.214,
  'eval_steps_per_second': 0.561,
  'epoch': 0.10764262648008611,
  'step': 100},
 {'loss': 0.6059,
  'grad_norm': 1.3278467655181885,
  'learning_rate': 0.00028708288482238965,
  'epoch': 0.21528525296017223,
  'step': 200},
 {'eval_loss': 0.7397180199623108,
  'eval_runtime': 63.608,
  'eval_samples_per_second': 2.232,
  'eval_steps_per_second': 0.566,
  'epoch': 0.21528525296017223,
  'step': 200},
 {'loss': 0.508,
  'grad_norm': 0.9548324346542358,
  'learning_rate': 0.00028062432723358446,
  'epoch': 0.32292787944025836,
  'step': 300},
 {'eval_loss': 0.6178632974624634,
  'eval_runtime': 63.1661,
  'eval_samples_per_second': 2.248,
  'eval_steps_per_second': 0.57,
  'epoch': 0.32292787944025836,
  'step': 300},
 {'loss': 0.4257,
  'grad_norm': 

In [8]:
predictions = trainer.predict(dataset['validation'])

  0%|          | 0/71 [00:00<?, ?it/s]

In [None]:
import numpy as np
np.argmax(predictions.predictions, axis=-1)

In [12]:
compute_metrics(predictions)

ValueError: Found input variables with inconsistent numbers of samples: [281, 2]

In [None]:
trainer.log_metrics("test", results)