# Packages

In [1]:
# %pip -q install datasets
# %pip -q install evaluate

In [None]:
import datasets
# import evaluate
import numpy as np
from datasets import load_dataset
import pandas as pd

import transformers
from transformers import (
    AutoConfig,
    AutoModelForSeq2SeqLM,
    AutoTokenizer,
    DataCollatorForSeq2Seq,
    Seq2SeqTrainer,
    Seq2SeqTrainingArguments,
    set_seed,
)

  warn(
2025-04-20 13:58:33.277387: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [3]:
from datasets.dataset_dict import DatasetDict
from datasets import Dataset

In [4]:
import torch
import random
import numpy as np


def set_random_seed(seed):
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    np.random.seed(seed)
    random.seed(seed)
    torch.backends.cudnn.deterministic = True
    set_seed(seed)


set_random_seed(42)

# Data

In [5]:
dataset = load_dataset(
    "csv",
    data_files=["/home/jupyter/datasphere/project/rugec/data/art_gec_full.tsv"],
    sep="\t",
)
dataset['train'] = dataset['train'].rename_columns({'correct' : 'correct_sent', 'corrupt' : 'corrupt_sent'})
dataset = dataset['train'].take(150000)
dataset

Dataset({
    features: ['correct_sent', 'corrupt_sent'],
    num_rows: 150000
})

In [6]:
dev = '/home/jupyter/datasphere/project/rugec/data/RULEC-GEC.dev.tsv'
train = '/home/jupyter/datasphere/project/rugec/data/RULEC-GEC.train.tsv'
test = '/home/jupyter/datasphere/project/rugec/data/RULEC-GEC.test.tsv'
clang8 = pd.read_csv('/home/jupyter/datasphere/project/rugec/data/clang8_source_target_ru.spacy_tokenized.tsv', sep='\t')
gera_train = pd.read_csv('/home/jupyter/datasphere/project/rugec/data/GERA.train.tsv', sep='\t')
gera_test = pd.read_csv('/home/jupyter/datasphere/project/rugec/data/GERA.test.tsv', sep='\t')
gera_dev = pd.read_csv('/home/jupyter/datasphere/project/rugec/data/GERA.dev.tsv', sep='\t')

relco = pd.read_csv('/home/jupyter/datasphere/project/rugec/data/relco_filtered.tsv', sep='\t')
rulec_train = pd.read_csv(train, delimiter='\t')
rulec_test = pd.read_csv(test, delimiter='\t')
rulec_dev = pd.read_csv(dev, delimiter='\t')

In [7]:
# train_all = pd.concat([rulec_train.sample(frac=10, replace=True), clang8, relco])
train_all = pd.concat([rulec_train, clang8, relco, gera_train, gera_test, gera_dev])
train_all = train_all.sample(frac=1)
train_all.corrupt_sent = train_all.corrupt_sent.map(lambda x: str(x).capitalize())
train_all.correct_sent = train_all.correct_sent.map(lambda x: str(x).capitalize())
train_all.head()

Unnamed: 0,corrupt_sent,correct_sent
7286,Сегодня большинство студентов из - за -обществ...,Сегодня большинство студентов из - за - общест...
19336,Я расскажу моё рабочее расписание .,Я расскажу моё рабочее расписание .
8992,Я встал рано утром и веселел ради хорошей пого...,Я встал рано утром и веселился ради хорошей по...
3825,"Но я считаю , что это человек создал петербург...","Но я считаю , что это человек создал петербург..."
651,"Также , поведение , одежда , речь и т. д. все ...","Также поведение , одежда , речь и т. д. ‒ все ..."


In [8]:
fine_tune = {'train':Dataset.from_dict({'corrupt_sent':train_all['corrupt_sent'],'correct_sent' : train_all['correct_sent']}),
     'test':Dataset.from_dict({'corrupt_sent':rulec_test['corrupt_sent'],'correct_sent' : rulec_test['correct_sent']}),
      'dev':Dataset.from_dict({'corrupt_sent' : rulec_dev['corrupt_sent'], 'correct_sent':rulec_dev['correct_sent']})
     }
fine_tune

{'train': Dataset({
     features: ['corrupt_sent', 'correct_sent'],
     num_rows: 61261
 }),
 'test': Dataset({
     features: ['corrupt_sent', 'correct_sent'],
     num_rows: 5000
 }),
 'dev': Dataset({
     features: ['corrupt_sent', 'correct_sent'],
     num_rows: 2500
 })}

In [9]:
for each in fine_tune['train'].take(1):
    print(each)

{'corrupt_sent': 'Сегодня большинство студентов из - за -общества(сосредоточен на происхождение университета ) не только предпочитают курсы , такие как мнежмент или экономика , которые хорошо устраивают на работу но и для того чтобы посупить так называемые в престижные университеты', 'correct_sent': 'Сегодня большинство студентов из - за - общества ( сосредоточен на происхождение университета ) не только предпочитают курсы , такие как менеджмент или экономика , которые хорошо устраивают на работу но и для того чтобы поступить в так называемые престижные университеты'}


In [10]:
fine_tune['train'] = datasets.concatenate_datasets([fine_tune['train'], dataset])
fine_tune['train'] = fine_tune['train'].shuffle()

In [11]:
for each in fine_tune['train'].take(2):
    print(each)

{'corrupt_sent': 'Тринадцать ,    тринадцатый', 'correct_sent': 'Тринадцать , тринадцатый'}
{'corrupt_sent': 'Мы видели , один человек танцевал там .', 'correct_sent': 'Мы видели , один человек танцевал там .'}


# Model and tokenization

In [12]:
# from lightning.pytorch.callbacks import ModelCheckpoint


# # Your code here
# checkpoint_callback = ModelCheckpoint(
#     monitor="iou_from_smp/val", mode="max", filename="best"
# )

In [None]:
import json
CONFIG = json.load(open('/home/jupyter/datasphere/project/tokens.json'))
tok = CONFIG["HF_TOK"]
wdb_tok = CONFIG['WANDB_API_KEY']

from huggingface_hub import login
import wandb

login(token=tok)
wandb.login(key=wdb_tok)

In [14]:
# name = 'mika5883/pretrain_rugec_msu'
# name = "ai-forever/T5-large-spell" #english
name = "ai-forever/sage-mt5-large"
config = AutoConfig.from_pretrained(name)
tokenizer = AutoTokenizer.from_pretrained(name)
model = AutoModelForSeq2SeqLM.from_pretrained(name)

You set `add_prefix_space`. The tokenizer needs to be converted from the slow tokenizers


In [15]:
def tokenize_function(examples):
    inputs = [f'grammar: {each}' for each in examples['corrupt_sent']]
    targets = [each for each in examples['correct_sent']]

    # inputs = [prefix + inp for inp in inputs] #we're skipping this step because our data is prefixed

    model_inputs = tokenizer(inputs, max_length=128, padding='max_length', truncation=True)
    labels = tokenizer(text_target=targets, max_length=128, padding='max_length', truncation=True)
    labels["input_ids"] = [
                [(l if l != tokenizer.pad_token_id else -100) for l in label] for label in labels["input_ids"]
            ] #замена токенов паддинга на -100, чтобы они не учитывались при подсчёте потерь
    model_inputs["labels"] = labels["input_ids"]
    # model_inputs['corrupt_sent'] = inputs
    return model_inputs

In [16]:
train = fine_tune['train'].map(tokenize_function, batched=True)
test = fine_tune['test'].map(tokenize_function, batched=True)
dev = fine_tune['dev'].map(tokenize_function, batched=True)

Map: 100%|██████████| 5000/5000 [00:03<00:00, 1597.78 examples/s]
Map: 100%|██████████| 2500/2500 [00:01<00:00, 1639.31 examples/s]


In [18]:
train = fine_tune['train'].map(tokenize_function, batched=True)

In [25]:
for each in train.take(2):
    print(each)

{'corrupt_sent': 'Тринадцать ,    тринадцатый', 'correct_sent': 'Тринадцать , тринадцатый', 'input_ids': [259, 164814, 267, 36486, 94556, 833, 259, 261, 6035, 94556, 1633, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'labels': [36486, 94556, 833, 259, 261, 6035, 94556, 1633, 1, -100, -100

In [20]:
# def compute_metrics(eval_preds):
#     preds, labels = eval_preds
#     if isinstance(preds, tuple):
#         preds = preds[0]
#     decoded_preds = tokenizer.batch_decode(preds, skip_special_tokens=True)

#     labels = np.where(labels != -100, labels, tokenizer.pad_token_id)
#     decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)

#     decoded_preds, decoded_labels = postprocess_text(decoded_preds, decoded_labels)
    
#     sources = 
#     result = comet_metric.compute(predictions=decoded_preds, references=decoded_labels, sources=sources)
#     result = {"comet": result["mean_score"]}

#     # prediction_lens = [
#     #     np.count_nonzero(pred != tokenizer.pad_token_id) for pred in preds
#     # ]
#     # result["gen_len"] = np.mean(prediction_lens)
#     # result = {k: round(v, 4) for k, v in result.items()}
#     return result

In [21]:
data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, model=model)

In [23]:
len(fine_tune['train']) // (32 * 2) + 1

3301

In [None]:
training_args = Seq2SeqTrainingArguments(
    output_dir="MT5_large_A_art",
    eval_strategy="steps",
    eval_steps=100,
    max_steps=3300,
    logging_steps=100,
    # learning_rate=3e-5
    learning_rate=3.83229e-5,
    per_device_train_batch_size=16,
    resume_from_checkpoint="last-checkpoint",
    per_device_eval_batch_size=16,
    weight_decay=0.15,
    save_total_limit=1,
    # num_train_epochs=3,
    predict_with_generate=True,
    # fp16=True,
    bf16=True,
    push_to_hub=True,
    hub_strategy="every_save",
    report_to = 'all',
    # run_name='ft_rugec_A_v2',
    save_strategy='best',
    metric_for_best_model='loss',
    load_best_model_at_end=True,
    gradient_accumulation_steps=4,
    max_grad_norm=1,
    lr_scheduler_type="cosine",  
    warmup_ratio=0.1,
    run_name='sageMT5_ft_A_art_v1'


)

trainer = Seq2SeqTrainer(
    model=model,
    args=training_args,
    train_dataset=train,
    eval_dataset=dev,
    processing_class=tokenizer,
    data_collator=data_collator,
    # compute_metrics=compute_metrics,
)
transformers.logging.set_verbosity_info()
trainer.train()
# trainer.train(resume_from_checkpoint=False)

The following columns in the training set don't have a corresponding argument in `T5ForConditionalGeneration.forward` and have been ignored: corrupt_sent, correct_sent. If corrupt_sent, correct_sent are not expected by `T5ForConditionalGeneration.forward`,  you can safely ignore this message.
***** Running training *****
  Num examples = 211,261
  Num Epochs = 1
  Instantaneous batch size per device = 16
  Total train batch size (w. parallel, distributed & accumulation) = 64
  Gradient Accumulation steps = 4
  Total optimization steps = 3,300
  Number of trainable parameters = 1,229,581,312
Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"
wandb: Tracking run with wandb version 0.18.5
wandb: Run data is saved locally in /home/jupyter/work/resources/rugec/notebooks/wandb/run-20250420_140018-eae35w5b
wandb: Run `wandb offline` to turn off syncing.
wandb: Syncing run sageMT5_ft_A_art_v1
wandb: ⭐️ View project at https://wandb.ai/mika5883/hugg

{'loss': 0.9979, 'grad_norm': 0.6532544493675232, 'learning_rate': 1.1613e-05, 'epoch': 0.03}



  0%|          | 0/157 [00:00<?, ?it/s][A
  2%|▏         | 3/157 [00:00<00:08, 17.82it/s][A
  3%|▎         | 5/157 [00:00<00:10, 14.04it/s][A
  4%|▍         | 7/157 [00:00<00:11, 12.86it/s][A
  6%|▌         | 9/157 [00:00<00:11, 12.40it/s][A
  7%|▋         | 11/157 [00:00<00:12, 12.09it/s][A
  8%|▊         | 13/157 [00:01<00:12, 11.89it/s][A
 10%|▉         | 15/157 [00:01<00:12, 11.77it/s][A
 11%|█         | 17/157 [00:01<00:11, 11.69it/s][A
 12%|█▏        | 19/157 [00:01<00:11, 11.61it/s][A
 13%|█▎        | 21/157 [00:01<00:11, 11.59it/s][A
 15%|█▍        | 23/157 [00:01<00:11, 11.58it/s][A
 16%|█▌        | 25/157 [00:02<00:11, 11.59it/s][A
 17%|█▋        | 27/157 [00:02<00:11, 11.56it/s][A
 18%|█▊        | 29/157 [00:02<00:11, 11.56it/s][A
 20%|█▉        | 31/157 [00:02<00:10, 11.56it/s][A
 21%|██        | 33/157 [00:02<00:10, 11.50it/s][A
 22%|██▏       | 35/157 [00:02<00:10, 11.47it/s][A
 24%|██▎       | 37/157 [00:03<00:10, 11.50it/s][A
 25%|██▍       | 39/157 

{'eval_loss': 0.2649425268173218, 'eval_runtime': 13.6351, 'eval_samples_per_second': 183.35, 'eval_steps_per_second': 11.514, 'epoch': 0.03}


Model weights saved in MT5_large_A_art/checkpoint-100/model.safetensors
tokenizer config file saved in MT5_large_A_art/checkpoint-100/tokenizer_config.json
Special tokens file saved in MT5_large_A_art/checkpoint-100/special_tokens_map.json
Copy vocab file to MT5_large_A_art/checkpoint-100/spiece.model
tokenizer config file saved in MT5_large_A_art/tokenizer_config.json
Special tokens file saved in MT5_large_A_art/special_tokens_map.json
Copy vocab file to MT5_large_A_art/spiece.model
  6%|▌         | 200/3300 [09:35<56:13,  1.09s/it]    The following columns in the evaluation set don't have a corresponding argument in `T5ForConditionalGeneration.forward` and have been ignored: corrupt_sent, correct_sent. If corrupt_sent, correct_sent are not expected by `T5ForConditionalGeneration.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 2500
  Batch size = 16


{'loss': 0.5176, 'grad_norm': 0.40062445402145386, 'learning_rate': 2.3226e-05, 'epoch': 0.06}



  0%|          | 0/157 [00:00<?, ?it/s][A
  2%|▏         | 3/157 [00:00<00:08, 17.81it/s][A
  3%|▎         | 5/157 [00:00<00:10, 14.10it/s][A
  4%|▍         | 7/157 [00:00<00:11, 12.93it/s][A
  6%|▌         | 9/157 [00:00<00:11, 12.39it/s][A
  7%|▋         | 11/157 [00:00<00:12, 11.82it/s][A
  8%|▊         | 13/157 [00:01<00:12, 11.71it/s][A
 10%|▉         | 15/157 [00:01<00:12, 11.62it/s][A
 11%|█         | 17/157 [00:01<00:12, 11.64it/s][A
 12%|█▏        | 19/157 [00:01<00:11, 11.61it/s][A
 13%|█▎        | 21/157 [00:01<00:11, 11.58it/s][A
 15%|█▍        | 23/157 [00:01<00:11, 11.57it/s][A
 16%|█▌        | 25/157 [00:02<00:11, 11.48it/s][A
 17%|█▋        | 27/157 [00:02<00:11, 11.52it/s][A
 18%|█▊        | 29/157 [00:02<00:11, 11.52it/s][A
 20%|█▉        | 31/157 [00:02<00:10, 11.51it/s][A
 21%|██        | 33/157 [00:02<00:10, 11.46it/s][A
 22%|██▏       | 35/157 [00:02<00:10, 11.48it/s][A
 24%|██▎       | 37/157 [00:03<00:10, 11.51it/s][A
 25%|██▍       | 39/157 

{'eval_loss': 0.21704094111919403, 'eval_runtime': 13.6482, 'eval_samples_per_second': 183.175, 'eval_steps_per_second': 11.503, 'epoch': 0.06}


Model weights saved in MT5_large_A_art/checkpoint-200/model.safetensors
tokenizer config file saved in MT5_large_A_art/checkpoint-200/tokenizer_config.json
Special tokens file saved in MT5_large_A_art/checkpoint-200/special_tokens_map.json
Copy vocab file to MT5_large_A_art/checkpoint-200/spiece.model
tokenizer config file saved in MT5_large_A_art/tokenizer_config.json
Special tokens file saved in MT5_large_A_art/special_tokens_map.json
Copy vocab file to MT5_large_A_art/spiece.model
Deleting older checkpoint [MT5_large_A_art/checkpoint-100] due to args.save_total_limit
  9%|▉         | 300/3300 [17:24<54:52,  1.10s/it]    The following columns in the evaluation set don't have a corresponding argument in `T5ForConditionalGeneration.forward` and have been ignored: corrupt_sent, correct_sent. If corrupt_sent, correct_sent are not expected by `T5ForConditionalGeneration.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 2500
  Batch size = 16


{'loss': 0.3916, 'grad_norm': 0.5555945038795471, 'learning_rate': 3.4838999999999996e-05, 'epoch': 0.09}



  0%|          | 0/157 [00:00<?, ?it/s][A
  2%|▏         | 3/157 [00:00<00:08, 17.82it/s][A
  3%|▎         | 5/157 [00:00<00:10, 13.91it/s][A
  4%|▍         | 7/157 [00:00<00:11, 12.92it/s][A
  6%|▌         | 9/157 [00:00<00:12, 12.26it/s][A
  7%|▋         | 11/157 [00:00<00:12, 12.01it/s][A
  8%|▊         | 13/157 [00:01<00:12, 11.86it/s][A
 10%|▉         | 15/157 [00:01<00:12, 11.77it/s][A
 11%|█         | 17/157 [00:01<00:11, 11.67it/s][A
 12%|█▏        | 19/157 [00:01<00:11, 11.63it/s][A
 13%|█▎        | 21/157 [00:01<00:11, 11.61it/s][A
 15%|█▍        | 23/157 [00:01<00:11, 11.56it/s][A
 16%|█▌        | 25/157 [00:02<00:11, 11.54it/s][A
 17%|█▋        | 27/157 [00:02<00:11, 11.53it/s][A
 18%|█▊        | 29/157 [00:02<00:11, 11.49it/s][A
 20%|█▉        | 31/157 [00:02<00:11, 11.40it/s][A
 21%|██        | 33/157 [00:02<00:10, 11.43it/s][A
 22%|██▏       | 35/157 [00:02<00:10, 11.47it/s][A
 24%|██▎       | 37/157 [00:03<00:10, 11.49it/s][A
 25%|██▍       | 39/157 

{'eval_loss': 0.19732126593589783, 'eval_runtime': 13.6658, 'eval_samples_per_second': 182.938, 'eval_steps_per_second': 11.489, 'epoch': 0.09}


Model weights saved in MT5_large_A_art/checkpoint-300/model.safetensors
tokenizer config file saved in MT5_large_A_art/checkpoint-300/tokenizer_config.json
Special tokens file saved in MT5_large_A_art/checkpoint-300/special_tokens_map.json
Copy vocab file to MT5_large_A_art/checkpoint-300/spiece.model
Deleting older checkpoint [MT5_large_A_art/checkpoint-200] due to args.save_total_limit
 12%|█▏        | 400/3300 [23:47<52:39,  1.09s/it]   The following columns in the evaluation set don't have a corresponding argument in `T5ForConditionalGeneration.forward` and have been ignored: corrupt_sent, correct_sent. If corrupt_sent, correct_sent are not expected by `T5ForConditionalGeneration.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 2500
  Batch size = 16


{'loss': 0.3356, 'grad_norm': 0.3587130904197693, 'learning_rate': 3.827039715080965e-05, 'epoch': 0.12}



  0%|          | 0/157 [00:00<?, ?it/s][A
  2%|▏         | 3/157 [00:00<00:08, 17.92it/s][A
  3%|▎         | 5/157 [00:00<00:10, 14.01it/s][A
  4%|▍         | 7/157 [00:00<00:11, 12.84it/s][A
  6%|▌         | 9/157 [00:00<00:12, 12.23it/s][A
  7%|▋         | 11/157 [00:00<00:12, 11.90it/s][A
  8%|▊         | 13/157 [00:01<00:12, 11.81it/s][A
 10%|▉         | 15/157 [00:01<00:12, 11.72it/s][A
 11%|█         | 17/157 [00:01<00:11, 11.67it/s][A
 12%|█▏        | 19/157 [00:01<00:11, 11.62it/s][A
 13%|█▎        | 21/157 [00:01<00:11, 11.57it/s][A
 15%|█▍        | 23/157 [00:01<00:11, 11.52it/s][A
 16%|█▌        | 25/157 [00:02<00:11, 11.52it/s][A
 17%|█▋        | 27/157 [00:02<00:11, 11.53it/s][A
 18%|█▊        | 29/157 [00:02<00:11, 11.52it/s][A
 20%|█▉        | 31/157 [00:02<00:10, 11.54it/s][A
 21%|██        | 33/157 [00:02<00:10, 11.54it/s][A
 22%|██▏       | 35/157 [00:02<00:10, 11.52it/s][A
 24%|██▎       | 37/157 [00:03<00:10, 11.53it/s][A
 25%|██▍       | 39/157 

{'eval_loss': 0.19280365109443665, 'eval_runtime': 13.6445, 'eval_samples_per_second': 183.224, 'eval_steps_per_second': 11.506, 'epoch': 0.12}


Model weights saved in MT5_large_A_art/checkpoint-400/model.safetensors
tokenizer config file saved in MT5_large_A_art/checkpoint-400/tokenizer_config.json
Special tokens file saved in MT5_large_A_art/checkpoint-400/special_tokens_map.json
Copy vocab file to MT5_large_A_art/checkpoint-400/spiece.model
Deleting older checkpoint [MT5_large_A_art/checkpoint-300] due to args.save_total_limit
 15%|█▌        | 500/3300 [30:11<51:27,  1.10s/it]   The following columns in the evaluation set don't have a corresponding argument in `T5ForConditionalGeneration.forward` and have been ignored: corrupt_sent, correct_sent. If corrupt_sent, correct_sent are not expected by `T5ForConditionalGeneration.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 2500
  Batch size = 16


{'loss': 0.2993, 'grad_norm': 0.34884652495384216, 'learning_rate': 3.8013932731032435e-05, 'epoch': 0.15}



  0%|          | 0/157 [00:00<?, ?it/s][A
  2%|▏         | 3/157 [00:00<00:08, 17.71it/s][A
  3%|▎         | 5/157 [00:00<00:10, 14.02it/s][A
  4%|▍         | 7/157 [00:00<00:11, 12.87it/s][A
  6%|▌         | 9/157 [00:00<00:11, 12.38it/s][A
  7%|▋         | 11/157 [00:00<00:12, 12.10it/s][A
  8%|▊         | 13/157 [00:01<00:12, 11.90it/s][A
 10%|▉         | 15/157 [00:01<00:12, 11.71it/s][A
 11%|█         | 17/157 [00:01<00:11, 11.70it/s][A
 12%|█▏        | 19/157 [00:01<00:11, 11.63it/s][A
 13%|█▎        | 21/157 [00:01<00:11, 11.58it/s][A
 15%|█▍        | 23/157 [00:01<00:11, 11.61it/s][A
 16%|█▌        | 25/157 [00:02<00:11, 11.59it/s][A
 17%|█▋        | 27/157 [00:02<00:11, 11.58it/s][A
 18%|█▊        | 29/157 [00:02<00:11, 11.20it/s][A
 20%|█▉        | 31/157 [00:02<00:11, 11.30it/s][A
 21%|██        | 33/157 [00:02<00:10, 11.38it/s][A
 22%|██▏       | 35/157 [00:02<00:10, 11.39it/s][A
 24%|██▎       | 37/157 [00:03<00:10, 11.43it/s][A
 25%|██▍       | 39/157 

{'eval_loss': 0.19366812705993652, 'eval_runtime': 13.6264, 'eval_samples_per_second': 183.467, 'eval_steps_per_second': 11.522, 'epoch': 0.15}


 18%|█▊        | 600/3300 [32:15<49:15,  1.09s/it]  The following columns in the evaluation set don't have a corresponding argument in `T5ForConditionalGeneration.forward` and have been ignored: corrupt_sent, correct_sent. If corrupt_sent, correct_sent are not expected by `T5ForConditionalGeneration.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 2500
  Batch size = 16


{'loss': 0.2783, 'grad_norm': 0.38781115412712097, 'learning_rate': 3.754672663926551e-05, 'epoch': 0.18}



  0%|          | 0/157 [00:00<?, ?it/s][A
  2%|▏         | 3/157 [00:00<00:08, 17.74it/s][A
  3%|▎         | 5/157 [00:00<00:10, 14.14it/s][A
  4%|▍         | 7/157 [00:00<00:11, 12.95it/s][A
  6%|▌         | 9/157 [00:00<00:12, 12.24it/s][A
  7%|▋         | 11/157 [00:00<00:12, 12.00it/s][A
  8%|▊         | 13/157 [00:01<00:12, 11.88it/s][A
 10%|▉         | 15/157 [00:01<00:12, 11.76it/s][A
 11%|█         | 17/157 [00:01<00:11, 11.69it/s][A
 12%|█▏        | 19/157 [00:01<00:11, 11.62it/s][A
 13%|█▎        | 21/157 [00:01<00:11, 11.54it/s][A
 15%|█▍        | 23/157 [00:01<00:11, 11.52it/s][A
 16%|█▌        | 25/157 [00:02<00:11, 11.51it/s][A
 17%|█▋        | 27/157 [00:02<00:11, 11.50it/s][A
 18%|█▊        | 29/157 [00:02<00:11, 11.57it/s][A
 20%|█▉        | 31/157 [00:02<00:10, 11.49it/s][A
 21%|██        | 33/157 [00:02<00:10, 11.41it/s][A
 22%|██▏       | 35/157 [00:02<00:10, 11.49it/s][A
 24%|██▎       | 37/157 [00:03<00:10, 11.45it/s][A
 25%|██▍       | 39/157 

{'eval_loss': 0.1918616145849228, 'eval_runtime': 13.6429, 'eval_samples_per_second': 183.245, 'eval_steps_per_second': 11.508, 'epoch': 0.18}


Model weights saved in MT5_large_A_art/checkpoint-600/model.safetensors
tokenizer config file saved in MT5_large_A_art/checkpoint-600/tokenizer_config.json
Special tokens file saved in MT5_large_A_art/checkpoint-600/special_tokens_map.json
Copy vocab file to MT5_large_A_art/checkpoint-600/spiece.model
tokenizer config file saved in MT5_large_A_art/tokenizer_config.json
Special tokens file saved in MT5_large_A_art/special_tokens_map.json
Copy vocab file to MT5_large_A_art/spiece.model
Deleting older checkpoint [MT5_large_A_art/checkpoint-400] due to args.save_total_limit
 20%|██        | 674/3300 [39:34<47:52,  1.09s/it]    '(MaxRetryError("HTTPSConnectionPool(host='hf-hub-lfs-us-east-1.s3-accelerate.amazonaws.com', port=443): Max retries exceeded with url: /repos/e9/26/e926eae4bb9b182029f1f693d95e00ecd84f67a4f5cbff6ce0858916c563d441/ee7bdf1c6b86d550a2b7e4879592b4ea6b25db2fcea0b9c10f3aa48c74c5d928?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Content-Sha256=UNSIGNED-PAYLOAD&X-Amz-Credential=AK

{'loss': 0.268, 'grad_norm': 0.3414463400840759, 'learning_rate': 3.68740015188258e-05, 'epoch': 0.21}



  0%|          | 0/157 [00:00<?, ?it/s][A
  2%|▏         | 3/157 [00:00<00:08, 17.70it/s][A
  3%|▎         | 5/157 [00:00<00:10, 14.05it/s][A
  4%|▍         | 7/157 [00:00<00:11, 12.90it/s][A
  6%|▌         | 9/157 [00:00<00:12, 12.30it/s][A
  7%|▋         | 11/157 [00:00<00:12, 12.03it/s][A
  8%|▊         | 13/157 [00:01<00:12, 11.50it/s][A
 10%|▉         | 15/157 [00:01<00:12, 11.35it/s][A
 11%|█         | 17/157 [00:01<00:12, 11.10it/s][A
 12%|█▏        | 19/157 [00:01<00:12, 11.27it/s][A
 13%|█▎        | 21/157 [00:01<00:12, 11.31it/s][A
 15%|█▍        | 23/157 [00:01<00:12, 11.13it/s][A
 16%|█▌        | 25/157 [00:02<00:12, 10.73it/s][A
 17%|█▋        | 27/157 [00:02<00:11, 10.98it/s][A
 18%|█▊        | 29/157 [00:02<00:12, 10.34it/s][A
 20%|█▉        | 31/157 [00:02<00:11, 10.71it/s][A
 21%|██        | 33/157 [00:02<00:11, 10.72it/s][A
 22%|██▏       | 35/157 [00:03<00:11, 10.74it/s][A
 24%|██▎       | 37/157 [00:03<00:11, 10.60it/s][A
 25%|██▍       | 39/157 

{'eval_loss': 0.19072450697422028, 'eval_runtime': 14.7799, 'eval_samples_per_second': 169.148, 'eval_steps_per_second': 10.622, 'epoch': 0.21}


Model weights saved in MT5_large_A_art/checkpoint-700/model.safetensors
'(MaxRetryError("HTTPSConnectionPool(host='hf-hub-lfs-us-east-1.s3-accelerate.amazonaws.com', port=443): Max retries exceeded with url: /repos/e9/26/e926eae4bb9b182029f1f693d95e00ecd84f67a4f5cbff6ce0858916c563d441/ee7bdf1c6b86d550a2b7e4879592b4ea6b25db2fcea0b9c10f3aa48c74c5d928?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Content-Sha256=UNSIGNED-PAYLOAD&X-Amz-Credential=AKIA2JU7TKAQLC2QXPN7%2F20250420%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20250420T143953Z&X-Amz-Expires=86400&X-Amz-Signature=7b23b98e98b3ce6900fe6c6dbb4cd6aba498012f98c3edff4d5a49c35636cbc5&X-Amz-SignedHeaders=host&partNumber=79&uploadId=PGsbMwa6thGlAjiRsKfZk3w5u0vymNWbK8HRG3_sYVkK7fxJWrxsMWVgYFULG0jVMbYmadJeGNmbfY7yZ45fmltsGsG4kiqHqluXRnkFaJcfbeeResdoi58Txhn5Rsp5&x-id=UploadPart (Caused by SSLError(SSLEOFError(8, 'EOF occurred in violation of protocol (_ssl.c:2426)')))"), '(Request ID: 97edaf60-7143-41ce-b5ce-15c8799bbd50)')' thrown while requesting P

{'loss': 0.2697, 'grad_norm': 0.4456111192703247, 'learning_rate': 3.600327739870332e-05, 'epoch': 0.24}



  0%|          | 0/157 [00:00<?, ?it/s][A
  2%|▏         | 3/157 [00:00<00:08, 17.43it/s][A
  3%|▎         | 5/157 [00:00<00:10, 13.88it/s][A
  4%|▍         | 7/157 [00:00<00:11, 12.73it/s][A
  6%|▌         | 9/157 [00:00<00:12, 12.26it/s][A
  7%|▋         | 11/157 [00:00<00:12, 12.02it/s][A
  8%|▊         | 13/157 [00:01<00:12, 11.87it/s][A
 10%|▉         | 15/157 [00:01<00:12, 11.63it/s][A
 11%|█         | 17/157 [00:01<00:12, 11.58it/s][A
 12%|█▏        | 19/157 [00:01<00:11, 11.51it/s][A
 13%|█▎        | 21/157 [00:01<00:11, 11.50it/s][A
 15%|█▍        | 23/157 [00:01<00:11, 11.49it/s][A
 16%|█▌        | 25/157 [00:02<00:11, 11.51it/s][A
 17%|█▋        | 27/157 [00:02<00:11, 11.55it/s][A
 18%|█▊        | 29/157 [00:02<00:11, 11.53it/s][A
 20%|█▉        | 31/157 [00:02<00:10, 11.52it/s][A
 21%|██        | 33/157 [00:02<00:10, 11.45it/s][A
 22%|██▏       | 35/157 [00:02<00:10, 11.44it/s][A
 24%|██▎       | 37/157 [00:03<00:10, 11.40it/s][A
 25%|██▍       | 39/157 

{'eval_loss': 0.19144085049629211, 'eval_runtime': 13.7425, 'eval_samples_per_second': 181.917, 'eval_steps_per_second': 11.424, 'epoch': 0.24}


 27%|██▋       | 900/3300 [48:35<44:10,  1.10s/it]  The following columns in the evaluation set don't have a corresponding argument in `T5ForConditionalGeneration.forward` and have been ignored: corrupt_sent, correct_sent. If corrupt_sent, correct_sent are not expected by `T5ForConditionalGeneration.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 2500
  Batch size = 16


{'loss': 0.2491, 'grad_norm': 0.3355579972267151, 'learning_rate': 3.494428763123867e-05, 'epoch': 0.27}



  0%|          | 0/157 [00:00<?, ?it/s][A
  2%|▏         | 3/157 [00:00<00:08, 17.75it/s][A
  3%|▎         | 5/157 [00:00<00:10, 14.18it/s][A
  4%|▍         | 7/157 [00:00<00:11, 12.94it/s][A
  6%|▌         | 9/157 [00:00<00:11, 12.37it/s][A
  7%|▋         | 11/157 [00:00<00:12, 12.09it/s][A
  8%|▊         | 13/157 [00:01<00:12, 11.85it/s][A
 10%|▉         | 15/157 [00:01<00:12, 11.72it/s][A
 11%|█         | 17/157 [00:01<00:11, 11.67it/s][A
 12%|█▏        | 19/157 [00:01<00:11, 11.67it/s][A
 13%|█▎        | 21/157 [00:01<00:11, 11.62it/s][A
 15%|█▍        | 23/157 [00:01<00:11, 11.60it/s][A
 16%|█▌        | 25/157 [00:02<00:11, 11.57it/s][A
 17%|█▋        | 27/157 [00:02<00:11, 11.51it/s][A
 18%|█▊        | 29/157 [00:02<00:11, 11.55it/s][A
 20%|█▉        | 31/157 [00:02<00:10, 11.47it/s][A
 21%|██        | 33/157 [00:02<00:10, 11.46it/s][A
 22%|██▏       | 35/157 [00:02<00:10, 11.50it/s][A
 24%|██▎       | 37/157 [00:03<00:10, 11.47it/s][A
 25%|██▍       | 39/157 

{'eval_loss': 0.1900670826435089, 'eval_runtime': 13.716, 'eval_samples_per_second': 182.269, 'eval_steps_per_second': 11.447, 'epoch': 0.27}


Model weights saved in MT5_large_A_art/checkpoint-900/model.safetensors
tokenizer config file saved in MT5_large_A_art/checkpoint-900/tokenizer_config.json
Special tokens file saved in MT5_large_A_art/checkpoint-900/special_tokens_map.json
Copy vocab file to MT5_large_A_art/checkpoint-900/spiece.model
tokenizer config file saved in MT5_large_A_art/tokenizer_config.json
Special tokens file saved in MT5_large_A_art/special_tokens_map.json
Copy vocab file to MT5_large_A_art/spiece.model
Deleting older checkpoint [MT5_large_A_art/checkpoint-700] due to args.save_total_limit
 30%|███       | 1000/3300 [56:21<42:05,  1.10s/it]   The following columns in the evaluation set don't have a corresponding argument in `T5ForConditionalGeneration.forward` and have been ignored: corrupt_sent, correct_sent. If corrupt_sent, correct_sent are not expected by `T5ForConditionalGeneration.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 2500
  Batch size = 16


{'loss': 0.2488, 'grad_norm': 0.2681104242801666, 'learning_rate': 3.3708870088260165e-05, 'epoch': 0.3}



  0%|          | 0/157 [00:00<?, ?it/s][A
  2%|▏         | 3/157 [00:00<00:08, 17.39it/s][A
  3%|▎         | 5/157 [00:00<00:10, 13.95it/s][A
  4%|▍         | 7/157 [00:00<00:11, 12.78it/s][A
  6%|▌         | 9/157 [00:00<00:12, 12.23it/s][A
  7%|▋         | 11/157 [00:00<00:12, 11.99it/s][A
  8%|▊         | 13/157 [00:01<00:12, 11.82it/s][A
 10%|▉         | 15/157 [00:01<00:12, 11.69it/s][A
 11%|█         | 17/157 [00:01<00:12, 11.64it/s][A
 12%|█▏        | 19/157 [00:01<00:11, 11.61it/s][A
 13%|█▎        | 21/157 [00:01<00:11, 11.57it/s][A
 15%|█▍        | 23/157 [00:01<00:11, 11.56it/s][A
 16%|█▌        | 25/157 [00:02<00:11, 11.56it/s][A
 17%|█▋        | 27/157 [00:02<00:11, 11.47it/s][A
 18%|█▊        | 29/157 [00:02<00:11, 11.51it/s][A
 20%|█▉        | 31/157 [00:02<00:10, 11.48it/s][A
 21%|██        | 33/157 [00:02<00:10, 11.51it/s][A
 22%|██▏       | 35/157 [00:02<00:10, 11.55it/s][A
 24%|██▎       | 37/157 [00:03<00:10, 11.49it/s][A
 25%|██▍       | 39/157 

{'eval_loss': 0.18881675601005554, 'eval_runtime': 13.6849, 'eval_samples_per_second': 182.683, 'eval_steps_per_second': 11.472, 'epoch': 0.3}


Model weights saved in MT5_large_A_art/checkpoint-1000/model.safetensors
tokenizer config file saved in MT5_large_A_art/checkpoint-1000/tokenizer_config.json
Special tokens file saved in MT5_large_A_art/checkpoint-1000/special_tokens_map.json
Copy vocab file to MT5_large_A_art/checkpoint-1000/spiece.model
Deleting older checkpoint [MT5_large_A_art/checkpoint-900] due to args.save_total_limit
 33%|███▎      | 1100/3300 [1:02:45<40:11,  1.10s/it]   The following columns in the evaluation set don't have a corresponding argument in `T5ForConditionalGeneration.forward` and have been ignored: corrupt_sent, correct_sent. If corrupt_sent, correct_sent are not expected by `T5ForConditionalGeneration.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 2500
  Batch size = 16


{'loss': 0.238, 'grad_norm': 0.2267482578754425, 'learning_rate': 3.2310834831939845e-05, 'epoch': 0.33}



  0%|          | 0/157 [00:00<?, ?it/s][A
  2%|▏         | 3/157 [00:00<00:08, 17.90it/s][A
  3%|▎         | 5/157 [00:00<00:10, 14.19it/s][A
  4%|▍         | 7/157 [00:00<00:11, 12.84it/s][A
  6%|▌         | 9/157 [00:00<00:12, 12.28it/s][A
  7%|▋         | 11/157 [00:00<00:12, 12.05it/s][A
  8%|▊         | 13/157 [00:01<00:12, 11.87it/s][A
 10%|▉         | 15/157 [00:01<00:12, 11.70it/s][A
 11%|█         | 17/157 [00:01<00:12, 11.66it/s][A
 12%|█▏        | 19/157 [00:01<00:11, 11.63it/s][A
 13%|█▎        | 21/157 [00:01<00:11, 11.62it/s][A
 15%|█▍        | 23/157 [00:01<00:11, 11.59it/s][A
 16%|█▌        | 25/157 [00:02<00:11, 11.51it/s][A
 17%|█▋        | 27/157 [00:02<00:11, 11.51it/s][A
 18%|█▊        | 29/157 [00:02<00:11, 11.44it/s][A
 20%|█▉        | 31/157 [00:02<00:11, 11.33it/s][A
 21%|██        | 33/157 [00:02<00:10, 11.33it/s][A
 22%|██▏       | 35/157 [00:02<00:10, 11.43it/s][A
 24%|██▎       | 37/157 [00:03<00:10, 11.41it/s][A
 25%|██▍       | 39/157 

{'eval_loss': 0.1860915571451187, 'eval_runtime': 13.71, 'eval_samples_per_second': 182.349, 'eval_steps_per_second': 11.452, 'epoch': 0.33}


Model weights saved in MT5_large_A_art/checkpoint-1100/model.safetensors
tokenizer config file saved in MT5_large_A_art/checkpoint-1100/tokenizer_config.json
Special tokens file saved in MT5_large_A_art/checkpoint-1100/special_tokens_map.json
Copy vocab file to MT5_large_A_art/checkpoint-1100/spiece.model
Deleting older checkpoint [MT5_large_A_art/checkpoint-1000] due to args.save_total_limit
 36%|███▋      | 1200/3300 [1:09:07<38:24,  1.10s/it]   The following columns in the evaluation set don't have a corresponding argument in `T5ForConditionalGeneration.forward` and have been ignored: corrupt_sent, correct_sent. If corrupt_sent, correct_sent are not expected by `T5ForConditionalGeneration.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 2500
  Batch size = 16


{'loss': 0.2414, 'grad_norm': 0.2598251700401306, 'learning_rate': 3.0765809739604044e-05, 'epoch': 0.36}



  0%|          | 0/157 [00:00<?, ?it/s][A
  2%|▏         | 3/157 [00:00<00:08, 17.61it/s][A
  3%|▎         | 5/157 [00:00<00:10, 14.10it/s][A
  4%|▍         | 7/157 [00:00<00:11, 12.91it/s][A
  6%|▌         | 9/157 [00:00<00:11, 12.35it/s][A
  7%|▋         | 11/157 [00:00<00:12, 12.03it/s][A
  8%|▊         | 13/157 [00:01<00:12, 11.86it/s][A
 10%|▉         | 15/157 [00:01<00:12, 11.74it/s][A
 11%|█         | 17/157 [00:01<00:12, 11.64it/s][A
 12%|█▏        | 19/157 [00:01<00:11, 11.60it/s][A
 13%|█▎        | 21/157 [00:01<00:11, 11.58it/s][A
 15%|█▍        | 23/157 [00:01<00:11, 11.57it/s][A
 16%|█▌        | 25/157 [00:02<00:11, 11.47it/s][A
 17%|█▋        | 27/157 [00:02<00:11, 11.44it/s][A
 18%|█▊        | 29/157 [00:02<00:11, 11.49it/s][A
 20%|█▉        | 31/157 [00:02<00:10, 11.53it/s][A
 21%|██        | 33/157 [00:02<00:10, 11.49it/s][A
 22%|██▏       | 35/157 [00:02<00:10, 11.50it/s][A
 24%|██▎       | 37/157 [00:03<00:10, 11.49it/s][A
 25%|██▍       | 39/157 

{'eval_loss': 0.18722112476825714, 'eval_runtime': 13.7235, 'eval_samples_per_second': 182.169, 'eval_steps_per_second': 11.44, 'epoch': 0.36}


 39%|███▉      | 1300/3300 [1:11:11<36:33,  1.10s/it]  The following columns in the evaluation set don't have a corresponding argument in `T5ForConditionalGeneration.forward` and have been ignored: corrupt_sent, correct_sent. If corrupt_sent, correct_sent are not expected by `T5ForConditionalGeneration.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 2500
  Batch size = 16


{'loss': 0.2378, 'grad_norm': 0.2871151864528656, 'learning_rate': 2.9091065808175034e-05, 'epoch': 0.39}



  0%|          | 0/157 [00:00<?, ?it/s][A
  2%|▏         | 3/157 [00:00<00:08, 17.60it/s][A
  3%|▎         | 5/157 [00:00<00:10, 13.91it/s][A
  4%|▍         | 7/157 [00:00<00:11, 12.79it/s][A
  6%|▌         | 9/157 [00:00<00:12, 12.21it/s][A
  7%|▋         | 11/157 [00:00<00:12, 11.98it/s][A
  8%|▊         | 13/157 [00:01<00:12, 11.76it/s][A
 10%|▉         | 15/157 [00:01<00:12, 11.71it/s][A
 11%|█         | 17/157 [00:01<00:12, 11.62it/s][A
 12%|█▏        | 19/157 [00:01<00:11, 11.57it/s][A
 13%|█▎        | 21/157 [00:01<00:11, 11.56it/s][A
 15%|█▍        | 23/157 [00:01<00:11, 11.48it/s][A
 16%|█▌        | 25/157 [00:02<00:11, 11.46it/s][A
 17%|█▋        | 27/157 [00:02<00:11, 11.45it/s][A
 18%|█▊        | 29/157 [00:02<00:11, 11.42it/s][A
 20%|█▉        | 31/157 [00:02<00:11, 11.33it/s][A
 21%|██        | 33/157 [00:02<00:10, 11.33it/s][A
 22%|██▏       | 35/157 [00:02<00:10, 11.37it/s][A
 24%|██▎       | 37/157 [00:03<00:10, 11.40it/s][A
 25%|██▍       | 39/157 

{'eval_loss': 0.18568024039268494, 'eval_runtime': 13.7366, 'eval_samples_per_second': 181.996, 'eval_steps_per_second': 11.429, 'epoch': 0.39}


Model weights saved in MT5_large_A_art/checkpoint-1300/model.safetensors
tokenizer config file saved in MT5_large_A_art/checkpoint-1300/tokenizer_config.json
Special tokens file saved in MT5_large_A_art/checkpoint-1300/special_tokens_map.json
Copy vocab file to MT5_large_A_art/checkpoint-1300/spiece.model
tokenizer config file saved in MT5_large_A_art/tokenizer_config.json
Special tokens file saved in MT5_large_A_art/special_tokens_map.json
Copy vocab file to MT5_large_A_art/spiece.model
Deleting older checkpoint [MT5_large_A_art/checkpoint-1100] due to args.save_total_limit
 42%|████▏     | 1400/3300 [1:18:59<34:26,  1.09s/it]    The following columns in the evaluation set don't have a corresponding argument in `T5ForConditionalGeneration.forward` and have been ignored: corrupt_sent, correct_sent. If corrupt_sent, correct_sent are not expected by `T5ForConditionalGeneration.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 2500
  Batch siz

{'loss': 0.2286, 'grad_norm': 0.41315189003944397, 'learning_rate': 2.7305324091070523e-05, 'epoch': 0.42}



  0%|          | 0/157 [00:00<?, ?it/s][A
  2%|▏         | 3/157 [00:00<00:08, 17.59it/s][A
  3%|▎         | 5/157 [00:00<00:10, 13.99it/s][A
  4%|▍         | 7/157 [00:00<00:11, 12.80it/s][A
  6%|▌         | 9/157 [00:00<00:11, 12.34it/s][A
  7%|▋         | 11/157 [00:00<00:12, 12.04it/s][A
  8%|▊         | 13/157 [00:01<00:12, 11.86it/s][A
 10%|▉         | 15/157 [00:01<00:12, 11.75it/s][A
 11%|█         | 17/157 [00:01<00:12, 11.64it/s][A
 12%|█▏        | 19/157 [00:01<00:11, 11.55it/s][A
 13%|█▎        | 21/157 [00:01<00:11, 11.53it/s][A
 15%|█▍        | 23/157 [00:01<00:11, 11.55it/s][A
 16%|█▌        | 25/157 [00:02<00:11, 11.53it/s][A
 17%|█▋        | 27/157 [00:02<00:11, 11.51it/s][A
 18%|█▊        | 29/157 [00:02<00:11, 11.46it/s][A
 20%|█▉        | 31/157 [00:02<00:10, 11.50it/s][A
 21%|██        | 33/157 [00:02<00:10, 11.46it/s][A
 22%|██▏       | 35/157 [00:02<00:10, 11.46it/s][A
 24%|██▎       | 37/157 [00:03<00:10, 11.48it/s][A
 25%|██▍       | 39/157 

{'eval_loss': 0.18422073125839233, 'eval_runtime': 13.6886, 'eval_samples_per_second': 182.634, 'eval_steps_per_second': 11.469, 'epoch': 0.42}


Model weights saved in MT5_large_A_art/checkpoint-1400/model.safetensors
tokenizer config file saved in MT5_large_A_art/checkpoint-1400/tokenizer_config.json
Special tokens file saved in MT5_large_A_art/checkpoint-1400/special_tokens_map.json
Copy vocab file to MT5_large_A_art/checkpoint-1400/spiece.model
Deleting older checkpoint [MT5_large_A_art/checkpoint-1300] due to args.save_total_limit
 45%|████▌     | 1500/3300 [1:25:23<33:04,  1.10s/it]   The following columns in the evaluation set don't have a corresponding argument in `T5ForConditionalGeneration.forward` and have been ignored: corrupt_sent, correct_sent. If corrupt_sent, correct_sent are not expected by `T5ForConditionalGeneration.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 2500
  Batch size = 16


{'loss': 0.2201, 'grad_norm': 0.2848554253578186, 'learning_rate': 2.5428546425708613e-05, 'epoch': 0.45}



  0%|          | 0/157 [00:00<?, ?it/s][A
  2%|▏         | 3/157 [00:00<00:08, 17.96it/s][A
  3%|▎         | 5/157 [00:00<00:10, 14.15it/s][A
  4%|▍         | 7/157 [00:00<00:11, 12.81it/s][A
  6%|▌         | 9/157 [00:00<00:12, 12.33it/s][A
  7%|▋         | 11/157 [00:00<00:12, 12.00it/s][A
  8%|▊         | 13/157 [00:01<00:12, 11.85it/s][A
 10%|▉         | 15/157 [00:01<00:12, 11.74it/s][A
 11%|█         | 17/157 [00:01<00:12, 11.65it/s][A
 12%|█▏        | 19/157 [00:01<00:11, 11.62it/s][A
 13%|█▎        | 21/157 [00:01<00:11, 11.51it/s][A
 15%|█▍        | 23/157 [00:01<00:11, 11.46it/s][A
 16%|█▌        | 25/157 [00:02<00:11, 11.51it/s][A
 17%|█▋        | 27/157 [00:02<00:11, 11.53it/s][A
 18%|█▊        | 29/157 [00:02<00:11, 11.50it/s][A
 20%|█▉        | 31/157 [00:02<00:10, 11.53it/s][A
 21%|██        | 33/157 [00:02<00:10, 11.53it/s][A
 22%|██▏       | 35/157 [00:02<00:10, 11.52it/s][A
 24%|██▎       | 37/157 [00:03<00:10, 11.51it/s][A
 25%|██▍       | 39/157 

{'eval_loss': 0.18487440049648285, 'eval_runtime': 13.6623, 'eval_samples_per_second': 182.985, 'eval_steps_per_second': 11.491, 'epoch': 0.45}


 48%|████▊     | 1600/3300 [1:27:26<30:59,  1.09s/it]  The following columns in the evaluation set don't have a corresponding argument in `T5ForConditionalGeneration.forward` and have been ignored: corrupt_sent, correct_sent. If corrupt_sent, correct_sent are not expected by `T5ForConditionalGeneration.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 2500
  Batch size = 16


{'loss': 0.2217, 'grad_norm': 0.2832561433315277, 'learning_rate': 2.3481712290961867e-05, 'epoch': 0.48}



  0%|          | 0/157 [00:00<?, ?it/s][A
  2%|▏         | 3/157 [00:00<00:08, 17.77it/s][A
  3%|▎         | 5/157 [00:00<00:10, 14.00it/s][A
  4%|▍         | 7/157 [00:00<00:11, 12.87it/s][A
  6%|▌         | 9/157 [00:00<00:11, 12.38it/s][A
  7%|▋         | 11/157 [00:00<00:12, 11.76it/s][A
  8%|▊         | 13/157 [00:01<00:12, 11.73it/s][A
 10%|▉         | 15/157 [00:01<00:12, 11.67it/s][A
 11%|█         | 17/157 [00:01<00:12, 11.62it/s][A
 12%|█▏        | 19/157 [00:01<00:11, 11.56it/s][A
 13%|█▎        | 21/157 [00:01<00:11, 11.52it/s][A
 15%|█▍        | 23/157 [00:01<00:11, 11.53it/s][A
 16%|█▌        | 25/157 [00:02<00:11, 11.55it/s][A
 17%|█▋        | 27/157 [00:02<00:11, 11.54it/s][A
 18%|█▊        | 29/157 [00:02<00:11, 11.54it/s][A
 20%|█▉        | 31/157 [00:02<00:10, 11.54it/s][A
 21%|██        | 33/157 [00:02<00:10, 11.54it/s][A
 22%|██▏       | 35/157 [00:02<00:10, 11.50it/s][A
 24%|██▎       | 37/157 [00:03<00:10, 11.49it/s][A
 25%|██▍       | 39/157 

{'eval_loss': 0.1844666302204132, 'eval_runtime': 13.6577, 'eval_samples_per_second': 183.047, 'eval_steps_per_second': 11.495, 'epoch': 0.48}


 52%|█████▏    | 1700/3300 [1:29:30<29:10,  1.09s/it]  The following columns in the evaluation set don't have a corresponding argument in `T5ForConditionalGeneration.forward` and have been ignored: corrupt_sent, correct_sent. If corrupt_sent, correct_sent are not expected by `T5ForConditionalGeneration.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 2500
  Batch size = 16


{'loss': 0.2195, 'grad_norm': 0.3690177798271179, 'learning_rate': 2.148658428894995e-05, 'epoch': 0.51}



  0%|          | 0/157 [00:00<?, ?it/s][A
  2%|▏         | 3/157 [00:00<00:08, 18.02it/s][A
  3%|▎         | 5/157 [00:00<00:10, 14.06it/s][A
  4%|▍         | 7/157 [00:00<00:11, 12.88it/s][A
  6%|▌         | 9/157 [00:00<00:12, 12.29it/s][A
  7%|▋         | 11/157 [00:00<00:12, 12.06it/s][A
  8%|▊         | 13/157 [00:01<00:12, 11.84it/s][A
 10%|▉         | 15/157 [00:01<00:12, 11.72it/s][A
 11%|█         | 17/157 [00:01<00:11, 11.67it/s][A
 12%|█▏        | 19/157 [00:01<00:11, 11.62it/s][A
 13%|█▎        | 21/157 [00:01<00:11, 11.58it/s][A
 15%|█▍        | 23/157 [00:01<00:11, 11.54it/s][A
 16%|█▌        | 25/157 [00:02<00:11, 11.55it/s][A
 17%|█▋        | 27/157 [00:02<00:11, 11.57it/s][A
 18%|█▊        | 29/157 [00:02<00:11, 11.57it/s][A
 20%|█▉        | 31/157 [00:02<00:10, 11.53it/s][A
 21%|██        | 33/157 [00:02<00:10, 11.46it/s][A
 22%|██▏       | 35/157 [00:02<00:10, 11.44it/s][A
 24%|██▎       | 37/157 [00:03<00:10, 11.40it/s][A
 25%|██▍       | 39/157 

{'eval_loss': 0.18353089690208435, 'eval_runtime': 13.6774, 'eval_samples_per_second': 182.783, 'eval_steps_per_second': 11.479, 'epoch': 0.51}


Model weights saved in MT5_large_A_art/checkpoint-1700/model.safetensors
tokenizer config file saved in MT5_large_A_art/checkpoint-1700/tokenizer_config.json
Special tokens file saved in MT5_large_A_art/checkpoint-1700/special_tokens_map.json
Copy vocab file to MT5_large_A_art/checkpoint-1700/spiece.model
Deleting older checkpoint [MT5_large_A_art/checkpoint-1400] due to args.save_total_limit
 53%|█████▎    | 1735/3300 [1:34:41<28:36,  1.10s/it]   '(MaxRetryError("HTTPSConnectionPool(host='hf-hub-lfs-us-east-1.s3-accelerate.amazonaws.com', port=443): Max retries exceeded with url: /repos/e9/26/e926eae4bb9b182029f1f693d95e00ecd84f67a4f5cbff6ce0858916c563d441/8169a4cc962f222d7bbeb89baa2e74bf356001669cb4d89ec9c02a3f2430c2f1?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Content-Sha256=UNSIGNED-PAYLOAD&X-Amz-Credential=AKIA2JU7TKAQLC2QXPN7%2F20250420%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20250420T151848Z&X-Amz-Expires=86400&X-Amz-Signature=d6fad48a0d0775acc8adc0fcef8c9f860fb7db9d949b349fd49bc

{'loss': 0.2137, 'grad_norm': 0.29071491956710815, 'learning_rate': 1.946546487272248e-05, 'epoch': 0.55}



  0%|          | 0/157 [00:00<?, ?it/s][A
  2%|▏         | 3/157 [00:00<00:08, 17.98it/s][A
  3%|▎         | 5/157 [00:00<00:12, 11.81it/s][A
  4%|▍         | 7/157 [00:00<00:12, 11.79it/s][A
  6%|▌         | 9/157 [00:00<00:13, 11.24it/s][A
  7%|▋         | 11/157 [00:00<00:13, 11.05it/s][A
  8%|▊         | 13/157 [00:01<00:12, 11.18it/s][AHTTP Error 503 thrown while requesting PUT https://hf-hub-lfs-us-east-1.s3-accelerate.amazonaws.com/repos/e9/26/e926eae4bb9b182029f1f693d95e00ecd84f67a4f5cbff6ce0858916c563d441/8169a4cc962f222d7bbeb89baa2e74bf356001669cb4d89ec9c02a3f2430c2f1?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Content-Sha256=UNSIGNED-PAYLOAD&X-Amz-Credential=AKIA2JU7TKAQLC2QXPN7%2F20250420%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20250420T151848Z&X-Amz-Expires=86400&X-Amz-Signature=1d7be8302d2ef87c7b4c778429344812fd93c582a2ce255ba1d0cb4473f80622&X-Amz-SignedHeaders=host&partNumber=66&uploadId=BzuoG_MyEAYyKsNWL_lL5r_xpmlyMH7cAatqhJNVBs4af8vF4FHX3BmMv5y08hrB8gd7B5q.ijjKgA

{'eval_loss': 0.1818058043718338, 'eval_runtime': 14.3931, 'eval_samples_per_second': 173.694, 'eval_steps_per_second': 10.908, 'epoch': 0.55}


Model weights saved in MT5_large_A_art/checkpoint-1800/model.safetensors
'(MaxRetryError("HTTPSConnectionPool(host='hf-hub-lfs-us-east-1.s3-accelerate.amazonaws.com', port=443): Max retries exceeded with url: /repos/e9/26/e926eae4bb9b182029f1f693d95e00ecd84f67a4f5cbff6ce0858916c563d441/8169a4cc962f222d7bbeb89baa2e74bf356001669cb4d89ec9c02a3f2430c2f1?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Content-Sha256=UNSIGNED-PAYLOAD&X-Amz-Credential=AKIA2JU7TKAQLC2QXPN7%2F20250420%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20250420T151848Z&X-Amz-Expires=86400&X-Amz-Signature=3a09280d53014c0a1e0e9032fed8f4c9068167c0daae17a88d01e73e86b5f5f1&X-Amz-SignedHeaders=host&partNumber=79&uploadId=BzuoG_MyEAYyKsNWL_lL5r_xpmlyMH7cAatqhJNVBs4af8vF4FHX3BmMv5y08hrB8gd7B5q.ijjKgAoZSqxv9d9w4bQ0dNurnFLERf1zBER2apiMplHlk2lX58H9fkou&x-id=UploadPart (Caused by SSLError(SSLEOFError(8, 'EOF occurred in violation of protocol (_ssl.c:2426)')))"), '(Request ID: ac5c1479-7cc5-4daf-9601-97f2ce463541)')' thrown while requesting 

{'loss': 0.2147, 'grad_norm': 0.24848876893520355, 'learning_rate': 1.744094703924136e-05, 'epoch': 0.58}



  0%|          | 0/157 [00:00<?, ?it/s][A
  2%|▏         | 3/157 [00:00<00:08, 17.77it/s][A
  3%|▎         | 5/157 [00:00<00:10, 14.16it/s][A
  4%|▍         | 7/157 [00:00<00:11, 12.93it/s][A
  6%|▌         | 9/157 [00:00<00:11, 12.36it/s][A
  7%|▋         | 11/157 [00:00<00:12, 12.05it/s][A
  8%|▊         | 13/157 [00:01<00:12, 11.87it/s][A
 10%|▉         | 15/157 [00:01<00:12, 11.71it/s][A
 11%|█         | 17/157 [00:01<00:12, 11.66it/s][A
 12%|█▏        | 19/157 [00:01<00:11, 11.58it/s][A
 13%|█▎        | 21/157 [00:01<00:11, 11.56it/s][A
 15%|█▍        | 23/157 [00:01<00:11, 11.55it/s][A
 16%|█▌        | 25/157 [00:02<00:11, 11.47it/s][A
 17%|█▋        | 27/157 [00:02<00:11, 11.43it/s][A
 18%|█▊        | 29/157 [00:02<00:11, 11.39it/s][A
 20%|█▉        | 31/157 [00:02<00:11, 11.41it/s][A
 21%|██        | 33/157 [00:02<00:10, 11.42it/s][A
 22%|██▏       | 35/157 [00:02<00:10, 11.44it/s][A
 24%|██▎       | 37/157 [00:03<00:10, 11.47it/s][A
 25%|██▍       | 39/157 

{'eval_loss': 0.18217375874519348, 'eval_runtime': 13.7638, 'eval_samples_per_second': 181.637, 'eval_steps_per_second': 11.407, 'epoch': 0.58}


 61%|██████    | 2000/3300 [1:44:23<23:43,  1.10s/it]  The following columns in the evaluation set don't have a corresponding argument in `T5ForConditionalGeneration.forward` and have been ignored: corrupt_sent, correct_sent. If corrupt_sent, correct_sent are not expected by `T5ForConditionalGeneration.forward`,  you can safely ignore this message.


{'loss': 0.2246, 'grad_norm': 0.2913977801799774, 'learning_rate': 1.5435661774530436e-05, 'epoch': 0.61}



***** Running Evaluation *****
  Num examples = 2500
  Batch size = 16

  0%|          | 0/157 [00:00<?, ?it/s][A
  2%|▏         | 3/157 [00:00<00:08, 17.79it/s][A
  3%|▎         | 5/157 [00:00<00:10, 13.88it/s][A
  4%|▍         | 7/157 [00:00<00:11, 12.79it/s][A
  6%|▌         | 9/157 [00:00<00:11, 12.34it/s][A
  7%|▋         | 11/157 [00:00<00:12, 12.04it/s][A
  8%|▊         | 13/157 [00:01<00:12, 11.86it/s][A
 10%|▉         | 15/157 [00:01<00:12, 11.76it/s][A
 11%|█         | 17/157 [00:01<00:11, 11.69it/s][A
 12%|█▏        | 19/157 [00:01<00:11, 11.62it/s][A
 13%|█▎        | 21/157 [00:01<00:11, 11.55it/s][A
 15%|█▍        | 23/157 [00:01<00:11, 11.55it/s][A
 16%|█▌        | 25/157 [00:02<00:11, 11.57it/s][A
 17%|█▋        | 27/157 [00:02<00:11, 11.53it/s][A
 18%|█▊        | 29/157 [00:02<00:11, 11.50it/s][A
 20%|█▉        | 31/157 [00:02<00:10, 11.53it/s][A
 21%|██        | 33/157 [00:02<00:10, 11.53it/s][A
 22%|██▏       | 35/157 [00:02<00:10, 11.49it/s][A
 24%

{'eval_loss': 0.1806247979402542, 'eval_runtime': 13.6744, 'eval_samples_per_second': 182.823, 'eval_steps_per_second': 11.481, 'epoch': 0.61}


Model weights saved in MT5_large_A_art/checkpoint-2000/model.safetensors
tokenizer config file saved in MT5_large_A_art/checkpoint-2000/tokenizer_config.json
Special tokens file saved in MT5_large_A_art/checkpoint-2000/special_tokens_map.json
Copy vocab file to MT5_large_A_art/checkpoint-2000/spiece.model
tokenizer config file saved in MT5_large_A_art/tokenizer_config.json
Special tokens file saved in MT5_large_A_art/special_tokens_map.json
Copy vocab file to MT5_large_A_art/spiece.model
Deleting older checkpoint [MT5_large_A_art/checkpoint-1800] due to args.save_total_limit
 64%|██████▎   | 2100/3300 [1:52:11<21:54,  1.10s/it]    The following columns in the evaluation set don't have a corresponding argument in `T5ForConditionalGeneration.forward` and have been ignored: corrupt_sent, correct_sent. If corrupt_sent, correct_sent are not expected by `T5ForConditionalGeneration.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 2500
  Batch siz

{'loss': 0.2151, 'grad_norm': 0.25606128573417664, 'learning_rate': 1.3472025074166033e-05, 'epoch': 0.64}



  0%|          | 0/157 [00:00<?, ?it/s][A
  2%|▏         | 3/157 [00:00<00:08, 17.84it/s][A
  3%|▎         | 5/157 [00:00<00:10, 14.20it/s][A
  4%|▍         | 7/157 [00:00<00:11, 12.92it/s][A
  6%|▌         | 9/157 [00:00<00:11, 12.36it/s][A
  7%|▋         | 11/157 [00:00<00:12, 12.04it/s][A
  8%|▊         | 13/157 [00:01<00:12, 11.87it/s][A
 10%|▉         | 15/157 [00:01<00:12, 11.74it/s][A
 11%|█         | 17/157 [00:01<00:11, 11.67it/s][A
 12%|█▏        | 19/157 [00:01<00:11, 11.66it/s][A
 13%|█▎        | 21/157 [00:01<00:11, 11.62it/s][A
 15%|█▍        | 23/157 [00:01<00:11, 11.61it/s][A
 16%|█▌        | 25/157 [00:02<00:11, 11.56it/s][A
 17%|█▋        | 27/157 [00:02<00:11, 11.55it/s][A
 18%|█▊        | 29/157 [00:02<00:11, 11.51it/s][A
 20%|█▉        | 31/157 [00:02<00:10, 11.52it/s][A
 21%|██        | 33/157 [00:02<00:10, 11.52it/s][A
 22%|██▏       | 35/157 [00:02<00:10, 11.58it/s][A
 24%|██▎       | 37/157 [00:03<00:10, 11.54it/s][A
 25%|██▍       | 39/157 

{'eval_loss': 0.18062323331832886, 'eval_runtime': 13.6414, 'eval_samples_per_second': 183.266, 'eval_steps_per_second': 11.509, 'epoch': 0.64}


Configuration saved in MT5_large_A_art/checkpoint-2100/generation_config.json
Model weights saved in MT5_large_A_art/checkpoint-2100/model.safetensors
tokenizer config file saved in MT5_large_A_art/checkpoint-2100/tokenizer_config.json
Special tokens file saved in MT5_large_A_art/checkpoint-2100/special_tokens_map.json
Copy vocab file to MT5_large_A_art/checkpoint-2100/spiece.model
Deleting older checkpoint [MT5_large_A_art/checkpoint-2000] due to args.save_total_limit
 67%|██████▋   | 2200/3300 [1:58:31<20:11,  1.10s/it]   The following columns in the evaluation set don't have a corresponding argument in `T5ForConditionalGeneration.forward` and have been ignored: corrupt_sent, correct_sent. If corrupt_sent, correct_sent are not expected by `T5ForConditionalGeneration.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 2500
  Batch size = 16


{'loss': 0.2179, 'grad_norm': 0.32929399609565735, 'learning_rate': 1.1571987367028998e-05, 'epoch': 0.67}



  0%|          | 0/157 [00:00<?, ?it/s][A
  2%|▏         | 3/157 [00:00<00:08, 17.97it/s][A
  3%|▎         | 5/157 [00:00<00:10, 13.98it/s][A
  4%|▍         | 7/157 [00:00<00:11, 12.86it/s][A
  6%|▌         | 9/157 [00:00<00:11, 12.39it/s][A
  7%|▋         | 11/157 [00:00<00:12, 11.87it/s][A
  8%|▊         | 13/157 [00:01<00:12, 11.70it/s][A
 10%|▉         | 15/157 [00:01<00:12, 11.59it/s][A
 11%|█         | 17/157 [00:01<00:12, 11.46it/s][A
 12%|█▏        | 19/157 [00:01<00:12, 11.48it/s][A
 13%|█▎        | 21/157 [00:01<00:11, 11.46it/s][A
 15%|█▍        | 23/157 [00:01<00:11, 11.46it/s][A
 16%|█▌        | 25/157 [00:02<00:11, 11.44it/s][A
 17%|█▋        | 27/157 [00:02<00:11, 11.51it/s][A
 18%|█▊        | 29/157 [00:02<00:11, 11.48it/s][A
 20%|█▉        | 31/157 [00:02<00:10, 11.46it/s][A
 21%|██        | 33/157 [00:02<00:10, 11.44it/s][A
 22%|██▏       | 35/157 [00:02<00:11, 11.00it/s][A
 24%|██▎       | 37/157 [00:03<00:10, 11.09it/s][A
 25%|██▍       | 39/157 

{'eval_loss': 0.180497407913208, 'eval_runtime': 13.7169, 'eval_samples_per_second': 182.257, 'eval_steps_per_second': 11.446, 'epoch': 0.67}


Model weights saved in MT5_large_A_art/checkpoint-2200/model.safetensors
tokenizer config file saved in MT5_large_A_art/checkpoint-2200/tokenizer_config.json
Special tokens file saved in MT5_large_A_art/checkpoint-2200/special_tokens_map.json
Copy vocab file to MT5_large_A_art/checkpoint-2200/spiece.model
Deleting older checkpoint [MT5_large_A_art/checkpoint-2100] due to args.save_total_limit
 70%|██████▉   | 2300/3300 [2:04:54<18:18,  1.10s/it]   The following columns in the evaluation set don't have a corresponding argument in `T5ForConditionalGeneration.forward` and have been ignored: corrupt_sent, correct_sent. If corrupt_sent, correct_sent are not expected by `T5ForConditionalGeneration.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 2500
  Batch size = 16


{'loss': 0.2219, 'grad_norm': 0.3057088255882263, 'learning_rate': 9.756788143374204e-06, 'epoch': 0.7}



  0%|          | 0/157 [00:00<?, ?it/s][A
  2%|▏         | 3/157 [00:00<00:08, 17.90it/s][A
  3%|▎         | 5/157 [00:00<00:10, 14.13it/s][A
  4%|▍         | 7/157 [00:00<00:11, 12.87it/s][A
  6%|▌         | 9/157 [00:00<00:12, 12.29it/s][A
  7%|▋         | 11/157 [00:00<00:12, 12.01it/s][A
  8%|▊         | 13/157 [00:01<00:12, 11.87it/s][A
 10%|▉         | 15/157 [00:01<00:12, 11.76it/s][A
 11%|█         | 17/157 [00:01<00:12, 11.66it/s][A
 12%|█▏        | 19/157 [00:01<00:11, 11.63it/s][A
 13%|█▎        | 21/157 [00:01<00:11, 11.59it/s][A
 15%|█▍        | 23/157 [00:01<00:11, 11.54it/s][A
 16%|█▌        | 25/157 [00:02<00:11, 11.51it/s][A
 17%|█▋        | 27/157 [00:02<00:11, 11.53it/s][A
 18%|█▊        | 29/157 [00:02<00:11, 11.57it/s][A
 20%|█▉        | 31/157 [00:02<00:10, 11.54it/s][A
 21%|██        | 33/157 [00:02<00:10, 11.52it/s][A
 22%|██▏       | 35/157 [00:02<00:10, 11.51it/s][A
 24%|██▎       | 37/157 [00:03<00:10, 11.47it/s][A
 25%|██▍       | 39/157 

{'eval_loss': 0.1805560290813446, 'eval_runtime': 13.6536, 'eval_samples_per_second': 183.101, 'eval_steps_per_second': 11.499, 'epoch': 0.7}


 73%|███████▎  | 2400/3300 [2:06:57<16:26,  1.10s/it]  The following columns in the evaluation set don't have a corresponding argument in `T5ForConditionalGeneration.forward` and have been ignored: corrupt_sent, correct_sent. If corrupt_sent, correct_sent are not expected by `T5ForConditionalGeneration.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 2500
  Batch size = 16


{'loss': 0.2126, 'grad_norm': 0.3179866075515747, 'learning_rate': 8.046718530096967e-06, 'epoch': 0.73}



  0%|          | 0/157 [00:00<?, ?it/s][A
  2%|▏         | 3/157 [00:00<00:08, 17.88it/s][A
  3%|▎         | 5/157 [00:00<00:10, 14.10it/s][A
  4%|▍         | 7/157 [00:00<00:11, 12.85it/s][A
  6%|▌         | 9/157 [00:00<00:11, 12.34it/s][A
  7%|▋         | 11/157 [00:00<00:12, 12.08it/s][A
  8%|▊         | 13/157 [00:01<00:12, 11.89it/s][A
 10%|▉         | 15/157 [00:01<00:12, 11.78it/s][A
 11%|█         | 17/157 [00:01<00:11, 11.70it/s][A
 12%|█▏        | 19/157 [00:01<00:11, 11.65it/s][A
 13%|█▎        | 21/157 [00:01<00:11, 11.59it/s][A
 15%|█▍        | 23/157 [00:01<00:11, 11.58it/s][A
 16%|█▌        | 25/157 [00:02<00:11, 11.57it/s][A
 17%|█▋        | 27/157 [00:02<00:11, 11.60it/s][A
 18%|█▊        | 29/157 [00:02<00:11, 11.54it/s][A
 20%|█▉        | 31/157 [00:02<00:10, 11.53it/s][A
 21%|██        | 33/157 [00:02<00:10, 11.54it/s][A
 22%|██▏       | 35/157 [00:02<00:10, 11.50it/s][A
 24%|██▎       | 37/157 [00:03<00:10, 11.48it/s][A
 25%|██▍       | 39/157 

{'eval_loss': 0.18078969419002533, 'eval_runtime': 13.633, 'eval_samples_per_second': 183.379, 'eval_steps_per_second': 11.516, 'epoch': 0.73}


 76%|███████▌  | 2500/3300 [2:09:01<14:39,  1.10s/it]  The following columns in the evaluation set don't have a corresponding argument in `T5ForConditionalGeneration.forward` and have been ignored: corrupt_sent, correct_sent. If corrupt_sent, correct_sent are not expected by `T5ForConditionalGeneration.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 2500
  Batch size = 16


{'loss': 0.2149, 'grad_norm': 0.32099252939224243, 'learning_rate': 6.460894467238846e-06, 'epoch': 0.76}



  0%|          | 0/157 [00:00<?, ?it/s][A
  2%|▏         | 3/157 [00:00<00:08, 17.91it/s][A
  3%|▎         | 5/157 [00:00<00:10, 14.20it/s][A
  4%|▍         | 7/157 [00:00<00:11, 12.96it/s][A
  6%|▌         | 9/157 [00:00<00:11, 12.37it/s][A
  7%|▋         | 11/157 [00:00<00:12, 12.03it/s][A
  8%|▊         | 13/157 [00:01<00:12, 11.89it/s][A
 10%|▉         | 15/157 [00:01<00:12, 11.73it/s][A
 11%|█         | 17/157 [00:01<00:12, 11.64it/s][A
 12%|█▏        | 19/157 [00:01<00:11, 11.57it/s][A
 13%|█▎        | 21/157 [00:01<00:11, 11.61it/s][A
 15%|█▍        | 23/157 [00:01<00:11, 11.57it/s][A
 16%|█▌        | 25/157 [00:02<00:11, 11.45it/s][A
 17%|█▋        | 27/157 [00:02<00:11, 11.51it/s][A
 18%|█▊        | 29/157 [00:02<00:11, 11.51it/s][A
 20%|█▉        | 31/157 [00:02<00:10, 11.47it/s][A
 21%|██        | 33/157 [00:02<00:10, 11.45it/s][A
 22%|██▏       | 35/157 [00:02<00:10, 11.52it/s][A
 24%|██▎       | 37/157 [00:03<00:10, 11.53it/s][A
 25%|██▍       | 39/157 

{'eval_loss': 0.18020594120025635, 'eval_runtime': 13.6724, 'eval_samples_per_second': 182.85, 'eval_steps_per_second': 11.483, 'epoch': 0.76}


Model weights saved in MT5_large_A_art/checkpoint-2500/model.safetensors
tokenizer config file saved in MT5_large_A_art/checkpoint-2500/tokenizer_config.json
Special tokens file saved in MT5_large_A_art/checkpoint-2500/special_tokens_map.json
Copy vocab file to MT5_large_A_art/checkpoint-2500/spiece.model
tokenizer config file saved in MT5_large_A_art/tokenizer_config.json
Special tokens file saved in MT5_large_A_art/special_tokens_map.json
Copy vocab file to MT5_large_A_art/spiece.model
Deleting older checkpoint [MT5_large_A_art/checkpoint-2200] due to args.save_total_limit
 78%|███████▊  | 2571/3300 [2:16:18<13:41,  1.13s/it]    '(MaxRetryError("HTTPSConnectionPool(host='hf-hub-lfs-us-east-1.s3-accelerate.amazonaws.com', port=443): Max retries exceeded with url: /repos/e9/26/e926eae4bb9b182029f1f693d95e00ecd84f67a4f5cbff6ce0858916c563d441/6faefeb67578541f8bbe8e8d4a89a19bf207b03b306ae2ad7fa65a6d09b3a3c6?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Content-Sha256=UNSIGNED-PAYLOAD&X-Amz-Crede

{'loss': 0.2137, 'grad_norm': 0.25741299986839294, 'learning_rate': 5.017043021269351e-06, 'epoch': 0.79}



  0%|          | 0/157 [00:00<?, ?it/s][A
  2%|▏         | 3/157 [00:00<00:09, 15.52it/s][A
  3%|▎         | 5/157 [00:00<00:11, 13.30it/s][A
  4%|▍         | 7/157 [00:00<00:13, 11.29it/s][A
  6%|▌         | 9/157 [00:00<00:12, 11.44it/s][AHTTP Error 503 thrown while requesting PUT https://hf-hub-lfs-us-east-1.s3-accelerate.amazonaws.com/repos/e9/26/e926eae4bb9b182029f1f693d95e00ecd84f67a4f5cbff6ce0858916c563d441/6faefeb67578541f8bbe8e8d4a89a19bf207b03b306ae2ad7fa65a6d09b3a3c6?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Content-Sha256=UNSIGNED-PAYLOAD&X-Amz-Credential=AKIA2JU7TKAQLC2QXPN7%2F20250420%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20250420T161636Z&X-Amz-Expires=86400&X-Amz-Signature=c92bb958268d6636ac6c84039a54a0ceb83782dc2fa7fa4ab82c2ea98ceafd63&X-Amz-SignedHeaders=host&partNumber=40&uploadId=n.rXRp67mKklzDPG4SZRwdg9YwH0BZ0PFgCM1zJmpOH3qZZpaDRV1Lyqd6Jg7e.RuItOSiHdJHpeyD6Kz.SP4HbldSef_N7Y5BDRbXIoXzHAbnfn4TT.2d3KFtpWLelI&x-id=UploadPart
Retrying in 1s [Retry 1/5].

  7%|▋  

{'eval_loss': 0.18059282004833221, 'eval_runtime': 14.4536, 'eval_samples_per_second': 172.967, 'eval_steps_per_second': 10.862, 'epoch': 0.79}


 79%|███████▉  | 2601/3300 [2:17:06<1:03:43,  5.47s/it]HTTP Error 503 thrown while requesting PUT https://hf-hub-lfs-us-east-1.s3-accelerate.amazonaws.com/repos/e9/26/e926eae4bb9b182029f1f693d95e00ecd84f67a4f5cbff6ce0858916c563d441/6faefeb67578541f8bbe8e8d4a89a19bf207b03b306ae2ad7fa65a6d09b3a3c6?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Content-Sha256=UNSIGNED-PAYLOAD&X-Amz-Credential=AKIA2JU7TKAQLC2QXPN7%2F20250420%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20250420T161636Z&X-Amz-Expires=86400&X-Amz-Signature=247ab93c5ddc14450745608eb6e46bee5ddce57dceb378fff7047a6b01264688&X-Amz-SignedHeaders=host&partNumber=55&uploadId=n.rXRp67mKklzDPG4SZRwdg9YwH0BZ0PFgCM1zJmpOH3qZZpaDRV1Lyqd6Jg7e.RuItOSiHdJHpeyD6Kz.SP4HbldSef_N7Y5BDRbXIoXzHAbnfn4TT.2d3KFtpWLelI&x-id=UploadPart
Retrying in 1s [Retry 1/5].
 79%|███████▉  | 2603/3300 [2:17:08<37:43,  3.25s/it]  HTTP Error 503 thrown while requesting PUT https://hf-hub-lfs-us-east-1.s3-accelerate.amazonaws.com/repos/e9/26/e926eae4bb9b182029f1f693d95e00ecd8

{'loss': 0.2146, 'grad_norm': 0.31132772564888, 'learning_rate': 3.7313042238314966e-06, 'epoch': 0.82}



  0%|          | 0/157 [00:00<?, ?it/s][A
  1%|▏         | 2/157 [00:00<00:07, 19.64it/s][A
  3%|▎         | 4/157 [00:00<00:11, 13.63it/s][A
  4%|▍         | 6/157 [00:00<00:12, 12.57it/s][A
  5%|▌         | 8/157 [00:00<00:13, 11.14it/s][A
  6%|▋         | 10/157 [00:00<00:13, 11.29it/s][A
  8%|▊         | 12/157 [00:01<00:13, 10.83it/s][A
  9%|▉         | 14/157 [00:01<00:13, 10.85it/s][A
 10%|█         | 16/157 [00:01<00:12, 10.88it/s][A
 11%|█▏        | 18/157 [00:01<00:12, 10.71it/s][A
 13%|█▎        | 20/157 [00:01<00:12, 10.98it/s][A
 14%|█▍        | 22/157 [00:01<00:12, 10.55it/s][A
 15%|█▌        | 24/157 [00:02<00:12, 10.93it/s][A
 17%|█▋        | 26/157 [00:02<00:11, 11.11it/s][A
 18%|█▊        | 28/157 [00:02<00:12, 10.61it/s][A
 19%|█▉        | 30/157 [00:02<00:11, 10.85it/s][A
 20%|██        | 32/157 [00:02<00:11, 10.61it/s][A
 22%|██▏       | 34/157 [00:03<00:11, 10.59it/s][A
 23%|██▎       | 36/157 [00:03<00:11, 10.84it/s][A
 24%|██▍       | 38/157 

{'eval_loss': 0.18028859794139862, 'eval_runtime': 14.9453, 'eval_samples_per_second': 167.276, 'eval_steps_per_second': 10.505, 'epoch': 0.82}


 85%|████████▍ | 2800/3300 [2:21:01<09:05,  1.09s/it]The following columns in the evaluation set don't have a corresponding argument in `T5ForConditionalGeneration.forward` and have been ignored: corrupt_sent, correct_sent. If corrupt_sent, correct_sent are not expected by `T5ForConditionalGeneration.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 2500
  Batch size = 16


{'loss': 0.2078, 'grad_norm': 0.2947494089603424, 'learning_rate': 2.6180506510883246e-06, 'epoch': 0.85}



  0%|          | 0/157 [00:00<?, ?it/s][A
  2%|▏         | 3/157 [00:00<00:08, 17.69it/s][A
  3%|▎         | 5/157 [00:00<00:11, 13.78it/s][A
  4%|▍         | 7/157 [00:00<00:11, 12.62it/s][A
  6%|▌         | 9/157 [00:00<00:12, 12.19it/s][A
  7%|▋         | 11/157 [00:00<00:12, 11.96it/s][A
  8%|▊         | 13/157 [00:01<00:12, 11.75it/s][A
 10%|▉         | 15/157 [00:01<00:12, 11.66it/s][A
 11%|█         | 17/157 [00:01<00:12, 11.63it/s][A
 12%|█▏        | 19/157 [00:01<00:11, 11.62it/s][A
 13%|█▎        | 21/157 [00:01<00:11, 11.57it/s][A
 15%|█▍        | 23/157 [00:01<00:11, 11.49it/s][A
 16%|█▌        | 25/157 [00:02<00:11, 11.48it/s][A
 17%|█▋        | 27/157 [00:02<00:11, 11.49it/s][A
 18%|█▊        | 29/157 [00:02<00:11, 11.40it/s][A
 20%|█▉        | 31/157 [00:02<00:11, 11.37it/s][A
 21%|██        | 33/157 [00:02<00:10, 11.37it/s][A
 22%|██▏       | 35/157 [00:02<00:10, 11.31it/s][A
 24%|██▎       | 37/157 [00:03<00:10, 11.29it/s][A
 25%|██▍       | 39/157 

{'eval_loss': 0.18028217554092407, 'eval_runtime': 13.7645, 'eval_samples_per_second': 181.626, 'eval_steps_per_second': 11.406, 'epoch': 0.85}


 88%|████████▊ | 2900/3300 [2:23:05<07:16,  1.09s/it]The following columns in the evaluation set don't have a corresponding argument in `T5ForConditionalGeneration.forward` and have been ignored: corrupt_sent, correct_sent. If corrupt_sent, correct_sent are not expected by `T5ForConditionalGeneration.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 2500
  Batch size = 16


{'loss': 0.2084, 'grad_norm': 0.33011287450790405, 'learning_rate': 1.6897267604948877e-06, 'epoch': 0.88}



  0%|          | 0/157 [00:00<?, ?it/s][A
  2%|▏         | 3/157 [00:00<00:08, 17.70it/s][A
  3%|▎         | 5/157 [00:00<00:10, 13.94it/s][A
  4%|▍         | 7/157 [00:00<00:11, 12.89it/s][A
  6%|▌         | 9/157 [00:00<00:11, 12.35it/s][A
  7%|▋         | 11/157 [00:00<00:12, 12.00it/s][A
  8%|▊         | 13/157 [00:01<00:12, 11.79it/s][A
 10%|▉         | 15/157 [00:01<00:12, 11.68it/s][A
 11%|█         | 17/157 [00:01<00:12, 11.66it/s][A
 12%|█▏        | 19/157 [00:01<00:11, 11.63it/s][A
 13%|█▎        | 21/157 [00:01<00:11, 11.60it/s][A
 15%|█▍        | 23/157 [00:01<00:11, 11.54it/s][A
 16%|█▌        | 25/157 [00:02<00:11, 11.52it/s][A
 17%|█▋        | 27/157 [00:02<00:11, 11.48it/s][A
 18%|█▊        | 29/157 [00:02<00:11, 11.50it/s][A
 20%|█▉        | 31/157 [00:02<00:10, 11.53it/s][A
 21%|██        | 33/157 [00:02<00:10, 11.48it/s][A
 22%|██▏       | 35/157 [00:02<00:10, 11.51it/s][A
 24%|██▎       | 37/157 [00:03<00:10, 11.42it/s][A
 25%|██▍       | 39/157 

{'eval_loss': 0.18053436279296875, 'eval_runtime': 13.6986, 'eval_samples_per_second': 182.5, 'eval_steps_per_second': 11.461, 'epoch': 0.88}


 91%|█████████ | 3000/3300 [2:25:07<05:26,  1.09s/it]The following columns in the evaluation set don't have a corresponding argument in `T5ForConditionalGeneration.forward` and have been ignored: corrupt_sent, correct_sent. If corrupt_sent, correct_sent are not expected by `T5ForConditionalGeneration.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 2500
  Batch size = 16


{'loss': 0.2153, 'grad_norm': 0.3253365755081177, 'learning_rate': 9.56709780962763e-07, 'epoch': 0.91}



  0%|          | 0/157 [00:00<?, ?it/s][A
  2%|▏         | 3/157 [00:00<00:08, 17.79it/s][A
  3%|▎         | 5/157 [00:00<00:10, 13.97it/s][A
  4%|▍         | 7/157 [00:00<00:11, 12.87it/s][A
  6%|▌         | 9/157 [00:00<00:11, 12.35it/s][A
  7%|▋         | 11/157 [00:00<00:12, 12.05it/s][A
  8%|▊         | 13/157 [00:01<00:12, 11.89it/s][A
 10%|▉         | 15/157 [00:01<00:12, 11.77it/s][A
 11%|█         | 17/157 [00:01<00:11, 11.67it/s][A
 12%|█▏        | 19/157 [00:01<00:11, 11.62it/s][A
 13%|█▎        | 21/157 [00:01<00:11, 11.56it/s][A
 15%|█▍        | 23/157 [00:01<00:11, 11.54it/s][A
 16%|█▌        | 25/157 [00:02<00:11, 11.53it/s][A
 17%|█▋        | 27/157 [00:02<00:11, 11.52it/s][A
 18%|█▊        | 29/157 [00:02<00:11, 11.52it/s][A
 20%|█▉        | 31/157 [00:02<00:10, 11.52it/s][A
 21%|██        | 33/157 [00:02<00:10, 11.51it/s][A
 22%|██▏       | 35/157 [00:02<00:10, 11.52it/s][A
 24%|██▎       | 37/157 [00:03<00:10, 11.41it/s][A
 25%|██▍       | 39/157 

{'eval_loss': 0.18012502789497375, 'eval_runtime': 13.6592, 'eval_samples_per_second': 183.027, 'eval_steps_per_second': 11.494, 'epoch': 0.91}


Model weights saved in MT5_large_A_art/checkpoint-3000/model.safetensors
tokenizer config file saved in MT5_large_A_art/checkpoint-3000/tokenizer_config.json
Special tokens file saved in MT5_large_A_art/checkpoint-3000/special_tokens_map.json
Copy vocab file to MT5_large_A_art/checkpoint-3000/spiece.model
tokenizer config file saved in MT5_large_A_art/tokenizer_config.json
Special tokens file saved in MT5_large_A_art/special_tokens_map.json
Copy vocab file to MT5_large_A_art/spiece.model
Deleting older checkpoint [MT5_large_A_art/checkpoint-2500] due to args.save_total_limit
 94%|█████████▍| 3100/3300 [2:32:53<03:38,  1.09s/it]   The following columns in the evaluation set don't have a corresponding argument in `T5ForConditionalGeneration.forward` and have been ignored: corrupt_sent, correct_sent. If corrupt_sent, correct_sent are not expected by `T5ForConditionalGeneration.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 2500
  Batch size

{'loss': 0.2134, 'grad_norm': 0.30028149485588074, 'learning_rate': 4.271937114504698e-07, 'epoch': 0.94}



  0%|          | 0/157 [00:00<?, ?it/s][A
  2%|▏         | 3/157 [00:00<00:08, 17.68it/s][A
  3%|▎         | 5/157 [00:00<00:10, 13.89it/s][A
  4%|▍         | 7/157 [00:00<00:11, 12.85it/s][A
  6%|▌         | 9/157 [00:00<00:12, 12.32it/s][A
  7%|▋         | 11/157 [00:00<00:12, 12.01it/s][A
  8%|▊         | 13/157 [00:01<00:12, 11.82it/s][A
 10%|▉         | 15/157 [00:01<00:12, 11.72it/s][A
 11%|█         | 17/157 [00:01<00:12, 11.61it/s][A
 12%|█▏        | 19/157 [00:01<00:11, 11.56it/s][A
 13%|█▎        | 21/157 [00:01<00:11, 11.54it/s][A
 15%|█▍        | 23/157 [00:01<00:11, 11.56it/s][A
 16%|█▌        | 25/157 [00:02<00:11, 11.54it/s][A
 17%|█▋        | 27/157 [00:02<00:11, 11.48it/s][A
 18%|█▊        | 29/157 [00:02<00:11, 11.52it/s][A
 20%|█▉        | 31/157 [00:02<00:10, 11.52it/s][A
 21%|██        | 33/157 [00:02<00:10, 11.48it/s][A
 22%|██▏       | 35/157 [00:02<00:10, 11.51it/s][A
 24%|██▎       | 37/157 [00:03<00:10, 11.50it/s][A
 25%|██▍       | 39/157 

{'eval_loss': 0.17994807660579681, 'eval_runtime': 13.6837, 'eval_samples_per_second': 182.699, 'eval_steps_per_second': 11.474, 'epoch': 0.94}


Saving model checkpoint to MT5_large_A_art/checkpoint-3100
Configuration saved in MT5_large_A_art/checkpoint-3100/config.json
Configuration saved in MT5_large_A_art/checkpoint-3100/generation_config.json
Model weights saved in MT5_large_A_art/checkpoint-3100/model.safetensors
tokenizer config file saved in MT5_large_A_art/checkpoint-3100/tokenizer_config.json
Special tokens file saved in MT5_large_A_art/checkpoint-3100/special_tokens_map.json
Copy vocab file to MT5_large_A_art/checkpoint-3100/spiece.model
Deleting older checkpoint [MT5_large_A_art/checkpoint-3000] due to args.save_total_limit
 94%|█████████▍| 3117/3300 [2:37:44<04:10,  1.37s/it]  

In [28]:
trainer.push_to_hub('mt5_A_v1')

Saving model checkpoint to MT5_large_A_art
Configuration saved in MT5_large_A_art/config.json
Configuration saved in MT5_large_A_art/generation_config.json
Model weights saved in MT5_large_A_art/model.safetensors
tokenizer config file saved in MT5_large_A_art/tokenizer_config.json
Special tokens file saved in MT5_large_A_art/special_tokens_map.json
Copy vocab file to MT5_large_A_art/spiece.model
Dropping the following result as it does not have all the necessary fields:
{'task': {'name': 'Sequence-to-sequence Language Modeling', 'type': 'text2text-generation'}}
events.out.tfevents.1745157618.g21-b5c2fce8-3b68-327d-ad5b-a53ce6085585.26688.0:   0%|          | 0.00/21.7k [00:00<?, ?B/s]

Upload 3 LFS files:   0%|          | 0/3 [00:00<?, ?it/s][A[A
model.safetensors:   0%|          | 0.00/4.92G [00:00<?, ?B/s][A


events.out.tfevents.1745171965.g21-b5c2fce8-3b68-327d-ad5b-a53ce6085585.26688.1:   0%|          | 0.00/359 [00:00<?, ?B/s][A[A[A
model.safetensors:   0%|          | 8.19k/

CommitInfo(commit_url='https://huggingface.co/mika5883/MT5_large_A_art/commit/400b001f40225087e576886769859bffdae7bdea', commit_message='mt5_A_v1', commit_description='', oid='400b001f40225087e576886769859bffdae7bdea', pr_url=None, repo_url=RepoUrl('https://huggingface.co/mika5883/MT5_large_A_art', endpoint='https://huggingface.co', repo_type='model', repo_id='mika5883/MT5_large_A_art'), pr_revision=None, pr_num=None)

In [27]:
trainer.evaluate(test)

The following columns in the evaluation set don't have a corresponding argument in `T5ForConditionalGeneration.forward` and have been ignored: corrupt_sent, correct_sent. If corrupt_sent, correct_sent are not expected by `T5ForConditionalGeneration.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 5000
  Batch size = 16
100%|██████████| 313/313 [00:27<00:00, 11.49it/s]


{'eval_loss': 0.200588196516037,
 'eval_runtime': 27.3217,
 'eval_samples_per_second': 183.004,
 'eval_steps_per_second': 11.456,
 'epoch': 0.9996970614965162}