# Training Abstract2Title

In [1]:
import nltk
from nltk.tokenize import sent_tokenize
import numpy as np
# import wandb
from datasets import load_from_disk, load_metric
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, \
                         DataCollatorForSeq2Seq, Seq2SeqTrainingArguments, Seq2SeqTrainer

In [2]:
nltk.download('punkt')

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\E\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [3]:
# # replace with your weights and biases username otherwise comment this
# wandb.init(project="abstract-to-title", entity="nerdimite")

## Preprocess Data

In [4]:
# Initialize T5-base tokenizer
tokenizer = AutoTokenizer.from_pretrained('t5-base')

Downloading: 100%|██████████| 1.17k/1.17k [00:00<00:00, 1.38MB/s]
Downloading: 100%|██████████| 773k/773k [00:19<00:00, 40.7kB/s] 
Downloading: 100%|██████████| 1.32M/1.32M [00:39<00:00, 34.9kB/s]


In [6]:
# Load the processed data
dataset = load_from_disk('../dataset/arxiv_AI_dataset')

In [7]:
MAX_SOURCE_LEN = 512
MAX_TARGET_LEN = 128

In [8]:
def preprocess_data(example):
    
    model_inputs = tokenizer(example['abstract'], max_length=MAX_SOURCE_LEN, padding=True, truncation=True)

    with tokenizer.as_target_tokenizer():
        labels = tokenizer(example['title'], max_length=MAX_TARGET_LEN, padding=True, truncation=True)

    # Replace all pad token ids in the labels by -100 to ignore padding in the loss
    labels["input_ids"] = [
        [(l if l != tokenizer.pad_token_id else -100) for l in label] for label in labels["input_ids"]
    ]

    model_inputs['labels'] = labels["input_ids"]

    return model_inputs

In [9]:
# Apply preprocess_data() to the whole dataset
processed_dataset = dataset.map(
    preprocess_data,
    batched=True,
    remove_columns=['abstract', 'title'],
    desc="Running tokenizer on dataset",
)

processed_dataset

Running tokenizer on dataset: 100%|██████████| 37/37 [00:08<00:00,  4.32ba/s]
Running tokenizer on dataset: 100%|██████████| 3/3 [00:00<00:00,  4.77ba/s]
Running tokenizer on dataset: 100%|██████████| 3/3 [00:00<00:00,  6.15ba/s]


DatasetDict({
    train: Dataset({
        features: ['attention_mask', 'input_ids', 'labels'],
        num_rows: 36074
    })
    test: Dataset({
        features: ['attention_mask', 'input_ids', 'labels'],
        num_rows: 2005
    })
    val: Dataset({
        features: ['attention_mask', 'input_ids', 'labels'],
        num_rows: 2004
    })
})

## Training Parameters

In [10]:
batch_size = 8
num_epochs = 5
learning_rate = 5.6e-5
weight_decay = 0.01
log_every = 50
eval_every = 1000
lr_scheduler_type = "linear"

In [11]:
# Define training arguments
training_args = Seq2SeqTrainingArguments(
    output_dir="model-t5-base",
    evaluation_strategy="steps",
    eval_steps=eval_every,
    learning_rate=learning_rate,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    weight_decay=weight_decay,
    save_steps=500,
    save_total_limit=3,
    num_train_epochs=num_epochs,
    predict_with_generate=True,
    logging_steps=log_every,
    group_by_length=True,
    lr_scheduler_type=lr_scheduler_type,
    report_to="wandb",
    resume_from_checkpoint=True,
)

## Train

In [12]:
# Initialize T5-base model
model = AutoModelForSeq2SeqLM.from_pretrained('t5-base')

Downloading: 100%|██████████| 850M/850M [00:13<00:00, 66.9MB/s] 


In [14]:
# Define ROGUE metrics on evaluation data
metric = load_metric("rouge")

def compute_metrics(eval_pred):
    predictions, labels = eval_pred

    decoded_preds = tokenizer.batch_decode(predictions, skip_special_tokens=True)
    
    # Replace -100 in the labels as we can't decode them
    labels = np.where(labels != -100, labels, tokenizer.pad_token_id)
    decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)
    
    # ROUGE expects a newline after each sentence
    decoded_preds = ["\n".join(sent_tokenize(pred.strip())) for pred in decoded_preds]
    decoded_labels = ["\n".join(sent_tokenize(label.strip())) for label in decoded_labels]
    
    # Compute ROUGE scores and get the median scores
    result = metric.compute(
        predictions=decoded_preds, references=decoded_labels, use_stemmer=True
    )
    result = {key: value.mid.fmeasure * 100 for key, value in result.items()}

    return {k: round(v, 4) for k, v in result.items()}

In [15]:
# Dynamic padding in batch using a data collator
data_collator = DataCollatorForSeq2Seq(tokenizer, model=model)

In [16]:
# Define the trainer
trainer = Seq2SeqTrainer(
    model,
    training_args,
    train_dataset=processed_dataset["train"],
    eval_dataset=processed_dataset["val"],
    data_collator=data_collator,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
)

In [17]:
%%time
trainer.train()

***** Running training *****
  Num examples = 36074
  Num Epochs = 5
  Instantaneous batch size per device = 8
  Total train batch size (w. parallel, distributed & accumulation) = 8
  Gradient Accumulation steps = 1
  Total optimization steps = 22550
Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mlianaling[0m (use `wandb login --relogin` to force relogin)
[34m[1mwandb[0m: wandb version 0.12.10 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


  0%|          | 50/22550 [00:27<2:36:32,  2.40it/s]

{'loss': 2.5518, 'learning_rate': 5.5875831485587586e-05, 'epoch': 0.01}


  0%|          | 100/22550 [00:51<3:05:51,  2.01it/s]

{'loss': 2.2225, 'learning_rate': 5.5751662971175166e-05, 'epoch': 0.02}


  1%|          | 150/22550 [01:16<2:42:23,  2.30it/s]

{'loss': 2.0629, 'learning_rate': 5.562749445676275e-05, 'epoch': 0.03}


  1%|          | 200/22550 [01:41<3:05:15,  2.01it/s]

{'loss': 2.1519, 'learning_rate': 5.5503325942350327e-05, 'epoch': 0.04}


  1%|          | 250/22550 [02:05<2:42:00,  2.29it/s]

{'loss': 1.9639, 'learning_rate': 5.5379157427937913e-05, 'epoch': 0.06}


  1%|▏         | 300/22550 [02:30<3:05:17,  2.00it/s]

{'loss': 1.9917, 'learning_rate': 5.52549889135255e-05, 'epoch': 0.07}


  2%|▏         | 350/22550 [02:54<2:30:04,  2.47it/s]

{'loss': 1.9675, 'learning_rate': 5.513082039911308e-05, 'epoch': 0.08}


  2%|▏         | 400/22550 [03:17<2:50:16,  2.17it/s]

{'loss': 2.0197, 'learning_rate': 5.500665188470067e-05, 'epoch': 0.09}


  2%|▏         | 450/22550 [03:40<2:27:03,  2.50it/s]

{'loss': 2.0085, 'learning_rate': 5.488248337028825e-05, 'epoch': 0.1}


  2%|▏         | 500/22550 [04:03<2:49:45,  2.16it/s]Saving model checkpoint to model-t5-base\checkpoint-500
Configuration saved in model-t5-base\checkpoint-500\config.json


{'loss': 2.0359, 'learning_rate': 5.475831485587583e-05, 'epoch': 0.11}


Model weights saved in model-t5-base\checkpoint-500\pytorch_model.bin
tokenizer config file saved in model-t5-base\checkpoint-500\tokenizer_config.json
Special tokens file saved in model-t5-base\checkpoint-500\special_tokens_map.json
Copy vocab file to model-t5-base\checkpoint-500\spiece.model
  2%|▏         | 550/22550 [04:29<2:28:43,  2.47it/s] 

{'loss': 2.0193, 'learning_rate': 5.4634146341463415e-05, 'epoch': 0.12}


  3%|▎         | 600/22550 [04:52<2:49:12,  2.16it/s]

{'loss': 2.0158, 'learning_rate': 5.4509977827050995e-05, 'epoch': 0.13}


  3%|▎         | 650/22550 [05:15<2:25:51,  2.50it/s]

{'loss': 2.0576, 'learning_rate': 5.438580931263858e-05, 'epoch': 0.14}


  3%|▎         | 700/22550 [05:38<2:48:41,  2.16it/s]

{'loss': 1.9207, 'learning_rate': 5.426164079822616e-05, 'epoch': 0.16}


  3%|▎         | 750/22550 [06:01<2:25:11,  2.50it/s]

{'loss': 1.956, 'learning_rate': 5.413747228381375e-05, 'epoch': 0.17}


  4%|▎         | 800/22550 [06:24<2:49:59,  2.13it/s]

{'loss': 1.9272, 'learning_rate': 5.401330376940133e-05, 'epoch': 0.18}


  4%|▍         | 850/22550 [06:47<2:24:33,  2.50it/s]

{'loss': 1.8742, 'learning_rate': 5.388913525498891e-05, 'epoch': 0.19}


  4%|▍         | 900/22550 [07:10<2:47:18,  2.16it/s]

{'loss': 1.9921, 'learning_rate': 5.3764966740576496e-05, 'epoch': 0.2}


  4%|▍         | 950/22550 [07:33<2:25:09,  2.48it/s]

{'loss': 1.9838, 'learning_rate': 5.364079822616408e-05, 'epoch': 0.21}


  4%|▍         | 1000/22550 [07:56<2:45:15,  2.17it/s]***** Running Evaluation *****
  Num examples = 2004
  Batch size = 8


{'loss': 1.9156, 'learning_rate': 5.351662971175166e-05, 'epoch': 0.22}


                                                      
  4%|▍         | 1000/22550 [10:13<2:45:15,  2.17it/s]Saving model checkpoint to model-t5-base\checkpoint-1000
Configuration saved in model-t5-base\checkpoint-1000\config.json


{'eval_loss': 1.7559678554534912, 'eval_rouge1': 44.9326, 'eval_rouge2': 25.1582, 'eval_rougeL': 39.9372, 'eval_rougeLsum': 39.9411, 'eval_runtime': 136.3143, 'eval_samples_per_second': 14.701, 'eval_steps_per_second': 1.841, 'epoch': 0.22}


Model weights saved in model-t5-base\checkpoint-1000\pytorch_model.bin
tokenizer config file saved in model-t5-base\checkpoint-1000\tokenizer_config.json
Special tokens file saved in model-t5-base\checkpoint-1000\special_tokens_map.json
Copy vocab file to model-t5-base\checkpoint-1000\spiece.model
  5%|▍         | 1050/22550 [10:39<2:22:15,  2.52it/s]  

{'loss': 1.9542, 'learning_rate': 5.339246119733925e-05, 'epoch': 0.23}


  5%|▍         | 1100/22550 [11:02<2:46:27,  2.15it/s]

{'loss': 1.888, 'learning_rate': 5.326829268292682e-05, 'epoch': 0.24}


  5%|▌         | 1150/22550 [11:25<2:24:53,  2.46it/s]

{'loss': 1.9554, 'learning_rate': 5.314412416851441e-05, 'epoch': 0.25}


  5%|▌         | 1200/22550 [11:48<2:46:03,  2.14it/s]

{'loss': 2.0085, 'learning_rate': 5.3019955654102e-05, 'epoch': 0.27}


  6%|▌         | 1250/22550 [12:11<2:24:01,  2.46it/s]

{'loss': 1.9091, 'learning_rate': 5.289578713968958e-05, 'epoch': 0.28}


  6%|▌         | 1300/22550 [12:34<2:44:02,  2.16it/s]

{'loss': 2.0034, 'learning_rate': 5.2771618625277164e-05, 'epoch': 0.29}


  6%|▌         | 1350/22550 [12:58<2:22:14,  2.48it/s]

{'loss': 1.954, 'learning_rate': 5.2647450110864744e-05, 'epoch': 0.3}


  6%|▌         | 1400/22550 [13:21<2:42:49,  2.17it/s]

{'loss': 1.8586, 'learning_rate': 5.2523281596452325e-05, 'epoch': 0.31}


  6%|▋         | 1450/22550 [13:44<2:19:33,  2.52it/s]

{'loss': 1.8859, 'learning_rate': 5.239911308203991e-05, 'epoch': 0.32}


  7%|▋         | 1500/22550 [14:07<2:42:11,  2.16it/s]Saving model checkpoint to model-t5-base\checkpoint-1500
Configuration saved in model-t5-base\checkpoint-1500\config.json


{'loss': 1.895, 'learning_rate': 5.227494456762749e-05, 'epoch': 0.33}


Model weights saved in model-t5-base\checkpoint-1500\pytorch_model.bin
tokenizer config file saved in model-t5-base\checkpoint-1500\tokenizer_config.json
Special tokens file saved in model-t5-base\checkpoint-1500\special_tokens_map.json
Copy vocab file to model-t5-base\checkpoint-1500\spiece.model
  7%|▋         | 1550/22550 [14:33<2:19:18,  2.51it/s]

{'loss': 1.9811, 'learning_rate': 5.215077605321508e-05, 'epoch': 0.34}


  7%|▋         | 1600/22550 [14:56<2:41:56,  2.16it/s]

{'loss': 1.9155, 'learning_rate': 5.202660753880266e-05, 'epoch': 0.35}


  7%|▋         | 1650/22550 [15:19<2:20:46,  2.47it/s]

{'loss': 1.8897, 'learning_rate': 5.1902439024390246e-05, 'epoch': 0.37}


  8%|▊         | 1700/22550 [15:42<2:41:28,  2.15it/s]

{'loss': 1.8841, 'learning_rate': 5.1778270509977826e-05, 'epoch': 0.38}


  8%|▊         | 1750/22550 [16:05<2:17:29,  2.52it/s]

{'loss': 1.8604, 'learning_rate': 5.1654101995565406e-05, 'epoch': 0.39}


  8%|▊         | 1800/22550 [16:28<2:38:47,  2.18it/s]

{'loss': 1.9257, 'learning_rate': 5.152993348115299e-05, 'epoch': 0.4}


  8%|▊         | 1850/22550 [16:51<2:19:47,  2.47it/s]

{'loss': 1.8557, 'learning_rate': 5.140576496674057e-05, 'epoch': 0.41}


  8%|▊         | 1900/22550 [17:15<2:39:22,  2.16it/s]

{'loss': 1.8832, 'learning_rate': 5.128159645232816e-05, 'epoch': 0.42}


  9%|▊         | 1950/22550 [17:37<2:16:05,  2.52it/s]

{'loss': 1.9125, 'learning_rate': 5.115742793791575e-05, 'epoch': 0.43}


  9%|▉         | 2000/22550 [18:01<2:37:48,  2.17it/s]***** Running Evaluation *****
  Num examples = 2004
  Batch size = 8


{'loss': 1.8438, 'learning_rate': 5.103325942350332e-05, 'epoch': 0.44}


                                                      
  9%|▉         | 2000/22550 [20:17<2:37:48,  2.17it/s]Saving model checkpoint to model-t5-base\checkpoint-2000
Configuration saved in model-t5-base\checkpoint-2000\config.json


{'eval_loss': 1.710827350616455, 'eval_rouge1': 45.9922, 'eval_rouge2': 25.7992, 'eval_rougeL': 40.7286, 'eval_rougeLsum': 40.7414, 'eval_runtime': 136.6794, 'eval_samples_per_second': 14.662, 'eval_steps_per_second': 1.836, 'epoch': 0.44}


Model weights saved in model-t5-base\checkpoint-2000\pytorch_model.bin
tokenizer config file saved in model-t5-base\checkpoint-2000\tokenizer_config.json
Special tokens file saved in model-t5-base\checkpoint-2000\special_tokens_map.json
Copy vocab file to model-t5-base\checkpoint-2000\spiece.model
Deleting older checkpoint [model-t5-base\checkpoint-500] due to args.save_total_limit
  9%|▉         | 2050/22550 [20:44<2:19:09,  2.46it/s]  

{'loss': 1.9169, 'learning_rate': 5.090909090909091e-05, 'epoch': 0.45}


  9%|▉         | 2100/22550 [21:07<2:36:23,  2.18it/s]

{'loss': 1.8942, 'learning_rate': 5.0784922394678494e-05, 'epoch': 0.47}


 10%|▉         | 2150/22550 [21:29<2:15:50,  2.50it/s]

{'loss': 1.8536, 'learning_rate': 5.0660753880266074e-05, 'epoch': 0.48}


 10%|▉         | 2200/22550 [21:53<2:37:03,  2.16it/s]

{'loss': 1.8894, 'learning_rate': 5.053658536585366e-05, 'epoch': 0.49}


 10%|▉         | 2250/22550 [22:16<2:16:13,  2.48it/s]

{'loss': 1.8732, 'learning_rate': 5.041241685144124e-05, 'epoch': 0.5}


 10%|█         | 2300/22550 [22:39<2:36:00,  2.16it/s]

{'loss': 1.8614, 'learning_rate': 5.028824833702882e-05, 'epoch': 0.51}


 10%|█         | 2350/22550 [23:02<2:15:27,  2.49it/s]

{'loss': 1.8556, 'learning_rate': 5.016407982261641e-05, 'epoch': 0.52}


 11%|█         | 2400/22550 [23:25<2:35:08,  2.16it/s]

{'loss': 1.91, 'learning_rate': 5.003991130820399e-05, 'epoch': 0.53}


 11%|█         | 2450/22550 [23:48<2:14:40,  2.49it/s]

{'loss': 1.9198, 'learning_rate': 4.9915742793791575e-05, 'epoch': 0.54}


 11%|█         | 2500/22550 [24:11<2:33:23,  2.18it/s]Saving model checkpoint to model-t5-base\checkpoint-2500
Configuration saved in model-t5-base\checkpoint-2500\config.json


{'loss': 1.9915, 'learning_rate': 4.9791574279379156e-05, 'epoch': 0.55}


Model weights saved in model-t5-base\checkpoint-2500\pytorch_model.bin
tokenizer config file saved in model-t5-base\checkpoint-2500\tokenizer_config.json
Special tokens file saved in model-t5-base\checkpoint-2500\special_tokens_map.json
Copy vocab file to model-t5-base\checkpoint-2500\spiece.model
Deleting older checkpoint [model-t5-base\checkpoint-1000] due to args.save_total_limit
 11%|█▏        | 2550/22550 [24:37<2:13:20,  2.50it/s]

{'loss': 1.8357, 'learning_rate': 4.966740576496674e-05, 'epoch': 0.57}


 12%|█▏        | 2600/22550 [25:00<2:33:42,  2.16it/s]

{'loss': 1.8699, 'learning_rate': 4.954323725055432e-05, 'epoch': 0.58}


 12%|█▏        | 2650/22550 [25:23<2:13:05,  2.49it/s]

{'loss': 1.9316, 'learning_rate': 4.94190687361419e-05, 'epoch': 0.59}


 12%|█▏        | 2700/22550 [25:47<2:34:09,  2.15it/s]

{'loss': 1.8742, 'learning_rate': 4.929490022172949e-05, 'epoch': 0.6}


 12%|█▏        | 2750/22550 [26:10<2:12:26,  2.49it/s]

{'loss': 1.8719, 'learning_rate': 4.917073170731707e-05, 'epoch': 0.61}


 12%|█▏        | 2800/22550 [26:33<2:31:11,  2.18it/s]

{'loss': 1.7992, 'learning_rate': 4.904656319290466e-05, 'epoch': 0.62}


 13%|█▎        | 2850/22550 [26:56<2:11:55,  2.49it/s]

{'loss': 1.8251, 'learning_rate': 4.8922394678492244e-05, 'epoch': 0.63}


 13%|█▎        | 2900/22550 [27:19<2:31:46,  2.16it/s]

{'loss': 1.8998, 'learning_rate': 4.879822616407982e-05, 'epoch': 0.64}


 13%|█▎        | 2950/22550 [27:42<2:12:04,  2.47it/s]

{'loss': 1.8849, 'learning_rate': 4.8674057649667404e-05, 'epoch': 0.65}


 13%|█▎        | 3000/22550 [28:05<2:32:40,  2.13it/s]***** Running Evaluation *****
  Num examples = 2004
  Batch size = 8


{'loss': 1.8939, 'learning_rate': 4.8549889135254984e-05, 'epoch': 0.67}


                                                      
 13%|█▎        | 3000/22550 [30:23<2:32:40,  2.13it/s]Saving model checkpoint to model-t5-base\checkpoint-3000
Configuration saved in model-t5-base\checkpoint-3000\config.json


{'eval_loss': 1.6880685091018677, 'eval_rouge1': 46.3145, 'eval_rouge2': 26.2967, 'eval_rougeL': 41.2322, 'eval_rougeLsum': 41.2704, 'eval_runtime': 137.7969, 'eval_samples_per_second': 14.543, 'eval_steps_per_second': 1.822, 'epoch': 0.67}


Model weights saved in model-t5-base\checkpoint-3000\pytorch_model.bin
tokenizer config file saved in model-t5-base\checkpoint-3000\tokenizer_config.json
Special tokens file saved in model-t5-base\checkpoint-3000\special_tokens_map.json
Copy vocab file to model-t5-base\checkpoint-3000\spiece.model
Deleting older checkpoint [model-t5-base\checkpoint-1500] due to args.save_total_limit
 14%|█▎        | 3050/22550 [30:50<2:16:01,  2.39it/s]  

{'loss': 1.8524, 'learning_rate': 4.842572062084257e-05, 'epoch': 0.68}


 14%|█▎        | 3100/22550 [31:13<2:30:19,  2.16it/s]

{'loss': 1.8737, 'learning_rate': 4.830155210643016e-05, 'epoch': 0.69}


 14%|█▍        | 3150/22550 [31:36<2:09:25,  2.50it/s]

{'loss': 1.8555, 'learning_rate': 4.817738359201774e-05, 'epoch': 0.7}


 14%|█▍        | 3200/22550 [31:59<2:29:29,  2.16it/s]

{'loss': 1.8752, 'learning_rate': 4.805321507760532e-05, 'epoch': 0.71}


 14%|█▍        | 3250/22550 [32:22<2:10:39,  2.46it/s]

{'loss': 1.8443, 'learning_rate': 4.7929046563192905e-05, 'epoch': 0.72}


 15%|█▍        | 3300/22550 [32:45<2:28:33,  2.16it/s]

{'loss': 1.8771, 'learning_rate': 4.7804878048780485e-05, 'epoch': 0.73}


 15%|█▍        | 3350/22550 [33:08<2:07:25,  2.51it/s]

{'loss': 1.7984, 'learning_rate': 4.768070953436807e-05, 'epoch': 0.74}


 15%|█▌        | 3400/22550 [33:32<2:28:37,  2.15it/s]

{'loss': 1.792, 'learning_rate': 4.755654101995565e-05, 'epoch': 0.75}


 15%|█▌        | 3450/22550 [33:54<2:06:58,  2.51it/s]

{'loss': 1.8169, 'learning_rate': 4.743237250554324e-05, 'epoch': 0.76}


 16%|█▌        | 3500/22550 [34:17<2:28:04,  2.14it/s]Saving model checkpoint to model-t5-base\checkpoint-3500
Configuration saved in model-t5-base\checkpoint-3500\config.json


{'loss': 1.8258, 'learning_rate': 4.730820399113082e-05, 'epoch': 0.78}


Model weights saved in model-t5-base\checkpoint-3500\pytorch_model.bin
tokenizer config file saved in model-t5-base\checkpoint-3500\tokenizer_config.json
Special tokens file saved in model-t5-base\checkpoint-3500\special_tokens_map.json
Copy vocab file to model-t5-base\checkpoint-3500\spiece.model
Deleting older checkpoint [model-t5-base\checkpoint-2000] due to args.save_total_limit
 16%|█▌        | 3550/22550 [34:44<2:05:57,  2.51it/s]

{'loss': 1.7932, 'learning_rate': 4.71840354767184e-05, 'epoch': 0.79}


 16%|█▌        | 3600/22550 [35:07<2:26:05,  2.16it/s]

{'loss': 1.8644, 'learning_rate': 4.7059866962305986e-05, 'epoch': 0.8}


 16%|█▌        | 3650/22550 [35:30<2:06:05,  2.50it/s]

{'loss': 1.87, 'learning_rate': 4.693569844789357e-05, 'epoch': 0.81}


 16%|█▋        | 3700/22550 [35:53<2:24:33,  2.17it/s]

{'loss': 1.811, 'learning_rate': 4.6811529933481154e-05, 'epoch': 0.82}


 17%|█▋        | 3750/22550 [36:16<2:06:00,  2.49it/s]

{'loss': 1.8355, 'learning_rate': 4.668736141906874e-05, 'epoch': 0.83}


 17%|█▋        | 3800/22550 [36:39<2:25:22,  2.15it/s]

{'loss': 1.8544, 'learning_rate': 4.656319290465632e-05, 'epoch': 0.84}


 17%|█▋        | 3850/22550 [37:02<2:05:18,  2.49it/s]

{'loss': 1.9026, 'learning_rate': 4.64390243902439e-05, 'epoch': 0.85}


 17%|█▋        | 3900/22550 [37:25<2:23:09,  2.17it/s]

{'loss': 1.7872, 'learning_rate': 4.631485587583148e-05, 'epoch': 0.86}


 18%|█▊        | 3950/22550 [37:48<2:04:31,  2.49it/s]

{'loss': 1.7585, 'learning_rate': 4.619068736141907e-05, 'epoch': 0.88}


 18%|█▊        | 4000/22550 [38:11<2:23:31,  2.15it/s]***** Running Evaluation *****
  Num examples = 2004
  Batch size = 8


{'loss': 1.8286, 'learning_rate': 4.6066518847006655e-05, 'epoch': 0.89}


                                                      
 18%|█▊        | 4000/22550 [40:27<2:23:31,  2.15it/s]Saving model checkpoint to model-t5-base\checkpoint-4000
Configuration saved in model-t5-base\checkpoint-4000\config.json


{'eval_loss': 1.668215274810791, 'eval_rouge1': 46.2815, 'eval_rouge2': 25.9741, 'eval_rougeL': 41.0376, 'eval_rougeLsum': 41.0662, 'eval_runtime': 135.9305, 'eval_samples_per_second': 14.743, 'eval_steps_per_second': 1.847, 'epoch': 0.89}


Model weights saved in model-t5-base\checkpoint-4000\pytorch_model.bin
tokenizer config file saved in model-t5-base\checkpoint-4000\tokenizer_config.json
Special tokens file saved in model-t5-base\checkpoint-4000\special_tokens_map.json
Copy vocab file to model-t5-base\checkpoint-4000\spiece.model
Deleting older checkpoint [model-t5-base\checkpoint-2500] due to args.save_total_limit
 18%|█▊        | 4050/22550 [40:54<2:01:47,  2.53it/s]  

{'loss': 1.7893, 'learning_rate': 4.5942350332594235e-05, 'epoch': 0.9}


 18%|█▊        | 4100/22550 [41:17<2:23:02,  2.15it/s]

{'loss': 1.8411, 'learning_rate': 4.581818181818182e-05, 'epoch': 0.91}


 18%|█▊        | 4150/22550 [41:40<2:04:07,  2.47it/s]

{'loss': 1.8863, 'learning_rate': 4.5694013303769395e-05, 'epoch': 0.92}


 19%|█▊        | 4200/22550 [42:03<2:20:38,  2.17it/s]

{'loss': 1.8697, 'learning_rate': 4.556984478935698e-05, 'epoch': 0.93}


 19%|█▉        | 4250/22550 [42:26<2:03:09,  2.48it/s]

{'loss': 1.8189, 'learning_rate': 4.544567627494457e-05, 'epoch': 0.94}


 19%|█▉        | 4300/22550 [42:49<2:21:01,  2.16it/s]

{'loss': 1.842, 'learning_rate': 4.532150776053215e-05, 'epoch': 0.95}


 19%|█▉        | 4350/22550 [43:12<2:01:44,  2.49it/s]

{'loss': 1.7831, 'learning_rate': 4.5197339246119736e-05, 'epoch': 0.96}


 20%|█▉        | 4400/22550 [43:35<2:19:48,  2.16it/s]

{'loss': 1.77, 'learning_rate': 4.5073170731707316e-05, 'epoch': 0.98}


 20%|█▉        | 4450/22550 [43:58<2:01:04,  2.49it/s]

{'loss': 1.8467, 'learning_rate': 4.4949002217294896e-05, 'epoch': 0.99}


 20%|█▉        | 4500/22550 [44:22<2:22:01,  2.12it/s]Saving model checkpoint to model-t5-base\checkpoint-4500
Configuration saved in model-t5-base\checkpoint-4500\config.json


{'loss': 1.8908, 'learning_rate': 4.482483370288248e-05, 'epoch': 1.0}


Model weights saved in model-t5-base\checkpoint-4500\pytorch_model.bin
tokenizer config file saved in model-t5-base\checkpoint-4500\tokenizer_config.json
Special tokens file saved in model-t5-base\checkpoint-4500\special_tokens_map.json
Copy vocab file to model-t5-base\checkpoint-4500\spiece.model
Deleting older checkpoint [model-t5-base\checkpoint-3000] due to args.save_total_limit
 20%|██        | 4550/22550 [44:48<2:14:25,  2.23it/s]

{'loss': 1.7662, 'learning_rate': 4.4700665188470063e-05, 'epoch': 1.01}


 20%|██        | 4600/22550 [45:11<2:33:09,  1.95it/s]

{'loss': 1.6615, 'learning_rate': 4.457649667405765e-05, 'epoch': 1.02}


 21%|██        | 4650/22550 [45:34<2:14:13,  2.22it/s]

{'loss': 1.6803, 'learning_rate': 4.445232815964523e-05, 'epoch': 1.03}


 21%|██        | 4700/22550 [45:58<2:32:13,  1.95it/s]

{'loss': 1.7612, 'learning_rate': 4.432815964523282e-05, 'epoch': 1.04}


 21%|██        | 4750/22550 [46:21<2:13:23,  2.22it/s]

{'loss': 1.6751, 'learning_rate': 4.42039911308204e-05, 'epoch': 1.05}


 21%|██▏       | 4800/22550 [46:44<2:31:16,  1.96it/s]

{'loss': 1.7161, 'learning_rate': 4.407982261640798e-05, 'epoch': 1.06}


 22%|██▏       | 4850/22550 [47:07<2:13:50,  2.20it/s]

{'loss': 1.7345, 'learning_rate': 4.3955654101995565e-05, 'epoch': 1.08}


 22%|██▏       | 4900/22550 [47:30<2:30:05,  1.96it/s]

{'loss': 1.6226, 'learning_rate': 4.383148558758315e-05, 'epoch': 1.09}


 22%|██▏       | 4950/22550 [47:53<2:12:14,  2.22it/s]

{'loss': 1.6952, 'learning_rate': 4.370731707317073e-05, 'epoch': 1.1}


 22%|██▏       | 5000/22550 [48:16<2:29:46,  1.95it/s]***** Running Evaluation *****
  Num examples = 2004
  Batch size = 8


{'loss': 1.7042, 'learning_rate': 4.358314855875832e-05, 'epoch': 1.11}


                                                      
 22%|██▏       | 5000/22550 [50:33<2:29:46,  1.95it/s]Saving model checkpoint to model-t5-base\checkpoint-5000
Configuration saved in model-t5-base\checkpoint-5000\config.json


{'eval_loss': 1.6590263843536377, 'eval_rouge1': 46.494, 'eval_rouge2': 26.1736, 'eval_rougeL': 41.1725, 'eval_rougeLsum': 41.201, 'eval_runtime': 136.7395, 'eval_samples_per_second': 14.656, 'eval_steps_per_second': 1.836, 'epoch': 1.11}


Model weights saved in model-t5-base\checkpoint-5000\pytorch_model.bin
tokenizer config file saved in model-t5-base\checkpoint-5000\tokenizer_config.json
Special tokens file saved in model-t5-base\checkpoint-5000\special_tokens_map.json
Copy vocab file to model-t5-base\checkpoint-5000\spiece.model
Deleting older checkpoint [model-t5-base\checkpoint-3500] due to args.save_total_limit
 22%|██▏       | 5050/22550 [50:59<2:09:49,  2.25it/s]  

{'loss': 1.6769, 'learning_rate': 4.345898004434589e-05, 'epoch': 1.12}


 23%|██▎       | 5100/22550 [51:22<2:27:35,  1.97it/s]

{'loss': 1.6314, 'learning_rate': 4.333481152993348e-05, 'epoch': 1.13}


 23%|██▎       | 5150/22550 [51:45<2:10:38,  2.22it/s]

{'loss': 1.7412, 'learning_rate': 4.3210643015521066e-05, 'epoch': 1.14}


 23%|██▎       | 5200/22550 [52:08<2:27:35,  1.96it/s]

{'loss': 1.7852, 'learning_rate': 4.3086474501108646e-05, 'epoch': 1.15}


 23%|██▎       | 5250/22550 [52:31<2:10:14,  2.21it/s]

{'loss': 1.7298, 'learning_rate': 4.296230598669623e-05, 'epoch': 1.16}


 24%|██▎       | 5300/22550 [52:55<2:27:07,  1.95it/s]

{'loss': 1.7088, 'learning_rate': 4.283813747228381e-05, 'epoch': 1.18}


 24%|██▎       | 5350/22550 [53:18<2:08:26,  2.23it/s]

{'loss': 1.6426, 'learning_rate': 4.271396895787139e-05, 'epoch': 1.19}


 24%|██▍       | 5400/22550 [53:41<2:26:32,  1.95it/s]

{'loss': 1.6613, 'learning_rate': 4.258980044345898e-05, 'epoch': 1.2}


 24%|██▍       | 5450/22550 [54:04<2:07:40,  2.23it/s]

{'loss': 1.5968, 'learning_rate': 4.246563192904656e-05, 'epoch': 1.21}


 24%|██▍       | 5500/22550 [54:27<2:24:13,  1.97it/s]Saving model checkpoint to model-t5-base\checkpoint-5500
Configuration saved in model-t5-base\checkpoint-5500\config.json


{'loss': 1.6332, 'learning_rate': 4.234146341463415e-05, 'epoch': 1.22}


Model weights saved in model-t5-base\checkpoint-5500\pytorch_model.bin
tokenizer config file saved in model-t5-base\checkpoint-5500\tokenizer_config.json
Special tokens file saved in model-t5-base\checkpoint-5500\special_tokens_map.json
Copy vocab file to model-t5-base\checkpoint-5500\spiece.model
Deleting older checkpoint [model-t5-base\checkpoint-4000] due to args.save_total_limit
 25%|██▍       | 5550/22550 [54:54<2:06:18,  2.24it/s]

{'loss': 1.674, 'learning_rate': 4.221729490022173e-05, 'epoch': 1.23}


 25%|██▍       | 5600/22550 [55:17<2:23:20,  1.97it/s]

{'loss': 1.7302, 'learning_rate': 4.2093126385809314e-05, 'epoch': 1.24}


 25%|██▌       | 5650/22550 [55:39<2:07:30,  2.21it/s]

{'loss': 1.7551, 'learning_rate': 4.1968957871396894e-05, 'epoch': 1.25}


 25%|██▌       | 5700/22550 [56:03<2:24:14,  1.95it/s]

{'loss': 1.7379, 'learning_rate': 4.1844789356984475e-05, 'epoch': 1.26}


 25%|██▌       | 5750/22550 [56:26<2:05:59,  2.22it/s]

{'loss': 1.7215, 'learning_rate': 4.172062084257206e-05, 'epoch': 1.27}


 26%|██▌       | 5800/22550 [56:49<2:22:16,  1.96it/s]

{'loss': 1.713, 'learning_rate': 4.159645232815964e-05, 'epoch': 1.29}


 26%|██▌       | 5850/22550 [57:12<2:04:58,  2.23it/s]

{'loss': 1.6835, 'learning_rate': 4.147228381374723e-05, 'epoch': 1.3}


 26%|██▌       | 5900/22550 [57:35<2:20:43,  1.97it/s]

{'loss': 1.6927, 'learning_rate': 4.1348115299334815e-05, 'epoch': 1.31}


 26%|██▋       | 5950/22550 [57:58<2:04:21,  2.22it/s]

{'loss': 1.7246, 'learning_rate': 4.122394678492239e-05, 'epoch': 1.32}


 27%|██▋       | 6000/22550 [58:21<2:20:18,  1.97it/s]***** Running Evaluation *****
  Num examples = 2004
  Batch size = 8


{'loss': 1.7281, 'learning_rate': 4.1099778270509976e-05, 'epoch': 1.33}


                                                      
 27%|██▋       | 6000/22550 [1:00:38<2:20:18,  1.97it/s]Saving model checkpoint to model-t5-base\checkpoint-6000
Configuration saved in model-t5-base\checkpoint-6000\config.json


{'eval_loss': 1.6463996171951294, 'eval_rouge1': 46.5243, 'eval_rouge2': 26.1726, 'eval_rougeL': 40.955, 'eval_rougeLsum': 41.0088, 'eval_runtime': 136.6092, 'eval_samples_per_second': 14.67, 'eval_steps_per_second': 1.837, 'epoch': 1.33}


Model weights saved in model-t5-base\checkpoint-6000\pytorch_model.bin
tokenizer config file saved in model-t5-base\checkpoint-6000\tokenizer_config.json
Special tokens file saved in model-t5-base\checkpoint-6000\special_tokens_map.json
Copy vocab file to model-t5-base\checkpoint-6000\spiece.model
Deleting older checkpoint [model-t5-base\checkpoint-4500] due to args.save_total_limit
 27%|██▋       | 6050/22550 [1:01:04<2:04:09,  2.21it/s]  

{'loss': 1.718, 'learning_rate': 4.097560975609756e-05, 'epoch': 1.34}


 27%|██▋       | 6100/22550 [1:01:27<2:19:58,  1.96it/s]

{'loss': 1.7338, 'learning_rate': 4.085144124168514e-05, 'epoch': 1.35}


 27%|██▋       | 6150/22550 [1:01:50<2:01:51,  2.24it/s]

{'loss': 1.7252, 'learning_rate': 4.072727272727273e-05, 'epoch': 1.36}


 27%|██▋       | 6200/22550 [1:02:13<2:28:16,  1.84it/s]

{'loss': 1.7705, 'learning_rate': 4.060310421286031e-05, 'epoch': 1.37}


 28%|██▊       | 6250/22550 [1:02:37<2:04:34,  2.18it/s]

{'loss': 1.5981, 'learning_rate': 4.047893569844789e-05, 'epoch': 1.39}


 28%|██▊       | 6300/22550 [1:03:01<2:19:23,  1.94it/s]

{'loss': 1.6304, 'learning_rate': 4.035476718403548e-05, 'epoch': 1.4}


 28%|██▊       | 6350/22550 [1:03:24<2:02:05,  2.21it/s]

{'loss': 1.6725, 'learning_rate': 4.023059866962306e-05, 'epoch': 1.41}


 28%|██▊       | 6400/22550 [1:03:47<2:19:13,  1.93it/s]

{'loss': 1.7491, 'learning_rate': 4.0106430155210644e-05, 'epoch': 1.42}


 29%|██▊       | 6450/22550 [1:04:10<2:00:49,  2.22it/s]

{'loss': 1.6317, 'learning_rate': 3.9982261640798224e-05, 'epoch': 1.43}


 29%|██▉       | 6500/22550 [1:04:33<2:17:40,  1.94it/s]Saving model checkpoint to model-t5-base\checkpoint-6500
Configuration saved in model-t5-base\checkpoint-6500\config.json


{'loss': 1.6407, 'learning_rate': 3.985809312638581e-05, 'epoch': 1.44}


Model weights saved in model-t5-base\checkpoint-6500\pytorch_model.bin
tokenizer config file saved in model-t5-base\checkpoint-6500\tokenizer_config.json
Special tokens file saved in model-t5-base\checkpoint-6500\special_tokens_map.json
Copy vocab file to model-t5-base\checkpoint-6500\spiece.model
Deleting older checkpoint [model-t5-base\checkpoint-5000] due to args.save_total_limit
 29%|██▉       | 6550/22550 [1:05:00<1:58:56,  2.24it/s]

{'loss': 1.6631, 'learning_rate': 3.97339246119734e-05, 'epoch': 1.45}


 29%|██▉       | 6600/22550 [1:05:23<2:15:27,  1.96it/s]

{'loss': 1.7673, 'learning_rate': 3.960975609756097e-05, 'epoch': 1.46}


 29%|██▉       | 6650/22550 [1:05:46<1:59:02,  2.23it/s]

{'loss': 1.6841, 'learning_rate': 3.948558758314856e-05, 'epoch': 1.47}


 30%|██▉       | 6700/22550 [1:06:09<2:14:29,  1.96it/s]

{'loss': 1.6935, 'learning_rate': 3.936141906873614e-05, 'epoch': 1.49}


 30%|██▉       | 6750/22550 [1:06:32<1:58:01,  2.23it/s]

{'loss': 1.7921, 'learning_rate': 3.9237250554323725e-05, 'epoch': 1.5}


 30%|███       | 6800/22550 [1:06:55<2:14:46,  1.95it/s]

{'loss': 1.6727, 'learning_rate': 3.911308203991131e-05, 'epoch': 1.51}


 30%|███       | 6850/22550 [1:07:18<1:56:58,  2.24it/s]

{'loss': 1.6477, 'learning_rate': 3.898891352549889e-05, 'epoch': 1.52}


 31%|███       | 6900/22550 [1:07:41<2:14:20,  1.94it/s]

{'loss': 1.6218, 'learning_rate': 3.886474501108647e-05, 'epoch': 1.53}


 31%|███       | 6950/22550 [1:08:04<1:55:25,  2.25it/s]

{'loss': 1.7492, 'learning_rate': 3.874057649667405e-05, 'epoch': 1.54}


 31%|███       | 7000/22550 [1:08:28<2:11:52,  1.97it/s]***** Running Evaluation *****
  Num examples = 2004
  Batch size = 8


{'loss': 1.691, 'learning_rate': 3.861640798226164e-05, 'epoch': 1.55}


                                                        
 31%|███       | 7000/22550 [1:10:44<2:11:52,  1.97it/s]Saving model checkpoint to model-t5-base\checkpoint-7000
Configuration saved in model-t5-base\checkpoint-7000\config.json


{'eval_loss': 1.6380325555801392, 'eval_rouge1': 47.3611, 'eval_rouge2': 27.0415, 'eval_rougeL': 42.1201, 'eval_rougeLsum': 42.149, 'eval_runtime': 136.8765, 'eval_samples_per_second': 14.641, 'eval_steps_per_second': 1.834, 'epoch': 1.55}


Model weights saved in model-t5-base\checkpoint-7000\pytorch_model.bin
tokenizer config file saved in model-t5-base\checkpoint-7000\tokenizer_config.json
Special tokens file saved in model-t5-base\checkpoint-7000\special_tokens_map.json
Copy vocab file to model-t5-base\checkpoint-7000\spiece.model
Deleting older checkpoint [model-t5-base\checkpoint-5500] due to args.save_total_limit
 31%|███▏      | 7050/22550 [1:11:11<1:54:55,  2.25it/s]  

{'loss': 1.7039, 'learning_rate': 3.8492239467849227e-05, 'epoch': 1.56}


 31%|███▏      | 7100/22550 [1:11:34<2:10:22,  1.97it/s]

{'loss': 1.6393, 'learning_rate': 3.836807095343681e-05, 'epoch': 1.57}


 32%|███▏      | 7150/22550 [1:11:57<1:55:01,  2.23it/s]

{'loss': 1.6154, 'learning_rate': 3.8243902439024394e-05, 'epoch': 1.59}


 32%|███▏      | 7200/22550 [1:12:20<2:10:25,  1.96it/s]

{'loss': 1.654, 'learning_rate': 3.8119733924611974e-05, 'epoch': 1.6}


 32%|███▏      | 7250/22550 [1:12:43<1:54:14,  2.23it/s]

{'loss': 1.6885, 'learning_rate': 3.7995565410199554e-05, 'epoch': 1.61}


 32%|███▏      | 7300/22550 [1:13:06<2:10:14,  1.95it/s]

{'loss': 1.6839, 'learning_rate': 3.787139689578714e-05, 'epoch': 1.62}


 33%|███▎      | 7350/22550 [1:13:29<1:53:54,  2.22it/s]

{'loss': 1.7161, 'learning_rate': 3.774722838137472e-05, 'epoch': 1.63}


 33%|███▎      | 7400/22550 [1:13:52<2:08:27,  1.97it/s]

{'loss': 1.704, 'learning_rate': 3.762305986696231e-05, 'epoch': 1.64}


 33%|███▎      | 7450/22550 [1:14:15<1:53:14,  2.22it/s]

{'loss': 1.6977, 'learning_rate': 3.749889135254989e-05, 'epoch': 1.65}


 33%|███▎      | 7500/22550 [1:14:38<2:07:45,  1.96it/s]Saving model checkpoint to model-t5-base\checkpoint-7500
Configuration saved in model-t5-base\checkpoint-7500\config.json


{'loss': 1.6783, 'learning_rate': 3.737472283813747e-05, 'epoch': 1.66}


Model weights saved in model-t5-base\checkpoint-7500\pytorch_model.bin
tokenizer config file saved in model-t5-base\checkpoint-7500\tokenizer_config.json
Special tokens file saved in model-t5-base\checkpoint-7500\special_tokens_map.json
Copy vocab file to model-t5-base\checkpoint-7500\spiece.model
Deleting older checkpoint [model-t5-base\checkpoint-6000] due to args.save_total_limit
 33%|███▎      | 7550/22550 [1:15:05<1:52:17,  2.23it/s]

{'loss': 1.723, 'learning_rate': 3.7250554323725055e-05, 'epoch': 1.67}


 34%|███▎      | 7600/22550 [1:15:28<2:08:03,  1.95it/s]

{'loss': 1.6366, 'learning_rate': 3.7126385809312635e-05, 'epoch': 1.69}


 34%|███▍      | 7650/22550 [1:15:51<1:52:14,  2.21it/s]

{'loss': 1.6378, 'learning_rate': 3.700221729490022e-05, 'epoch': 1.7}


 34%|███▍      | 7700/22550 [1:16:14<2:06:16,  1.96it/s]

{'loss': 1.6691, 'learning_rate': 3.687804878048781e-05, 'epoch': 1.71}


 34%|███▍      | 7750/22550 [1:16:37<1:50:26,  2.23it/s]

{'loss': 1.649, 'learning_rate': 3.675388026607539e-05, 'epoch': 1.72}


 35%|███▍      | 7800/22550 [1:17:00<2:04:37,  1.97it/s]

{'loss': 1.6831, 'learning_rate': 3.662971175166297e-05, 'epoch': 1.73}


 35%|███▍      | 7850/22550 [1:17:23<1:49:55,  2.23it/s]

{'loss': 1.6824, 'learning_rate': 3.650554323725055e-05, 'epoch': 1.74}


 35%|███▌      | 7900/22550 [1:17:46<2:04:51,  1.96it/s]

{'loss': 1.7211, 'learning_rate': 3.6381374722838136e-05, 'epoch': 1.75}


 35%|███▌      | 7950/22550 [1:18:10<1:49:40,  2.22it/s]

{'loss': 1.7348, 'learning_rate': 3.6257206208425723e-05, 'epoch': 1.76}


 35%|███▌      | 8000/22550 [1:18:33<2:03:05,  1.97it/s]***** Running Evaluation *****
  Num examples = 2004
  Batch size = 8


{'loss': 1.7049, 'learning_rate': 3.6133037694013304e-05, 'epoch': 1.77}


                                                        
 35%|███▌      | 8000/22550 [1:20:48<2:03:05,  1.97it/s]Saving model checkpoint to model-t5-base\checkpoint-8000
Configuration saved in model-t5-base\checkpoint-8000\config.json


{'eval_loss': 1.6297885179519653, 'eval_rouge1': 47.0982, 'eval_rouge2': 26.9455, 'eval_rougeL': 42.03, 'eval_rougeLsum': 42.0527, 'eval_runtime': 135.765, 'eval_samples_per_second': 14.761, 'eval_steps_per_second': 1.849, 'epoch': 1.77}


Model weights saved in model-t5-base\checkpoint-8000\pytorch_model.bin
tokenizer config file saved in model-t5-base\checkpoint-8000\tokenizer_config.json
Special tokens file saved in model-t5-base\checkpoint-8000\special_tokens_map.json
Copy vocab file to model-t5-base\checkpoint-8000\spiece.model
Deleting older checkpoint [model-t5-base\checkpoint-6500] due to args.save_total_limit
 36%|███▌      | 8050/22550 [1:21:15<1:47:46,  2.24it/s]  

{'loss': 1.7167, 'learning_rate': 3.600886917960089e-05, 'epoch': 1.78}


 36%|███▌      | 8100/22550 [1:21:38<2:03:01,  1.96it/s]

{'loss': 1.6357, 'learning_rate': 3.5884700665188464e-05, 'epoch': 1.8}


 36%|███▌      | 8150/22550 [1:22:01<1:47:18,  2.24it/s]

{'loss': 1.6551, 'learning_rate': 3.576053215077605e-05, 'epoch': 1.81}


 36%|███▋      | 8200/22550 [1:22:24<2:01:12,  1.97it/s]

{'loss': 1.7479, 'learning_rate': 3.563636363636364e-05, 'epoch': 1.82}


 37%|███▋      | 8250/22550 [1:22:47<1:47:32,  2.22it/s]

{'loss': 1.7257, 'learning_rate': 3.551219512195122e-05, 'epoch': 1.83}


 37%|███▋      | 8300/22550 [1:23:10<2:00:56,  1.96it/s]

{'loss': 1.6433, 'learning_rate': 3.5388026607538805e-05, 'epoch': 1.84}


 37%|███▋      | 8350/22550 [1:23:33<1:46:57,  2.21it/s]

{'loss': 1.6766, 'learning_rate': 3.5263858093126385e-05, 'epoch': 1.85}


 37%|███▋      | 8400/22550 [1:23:56<2:00:30,  1.96it/s]

{'loss': 1.6781, 'learning_rate': 3.5139689578713965e-05, 'epoch': 1.86}


 37%|███▋      | 8450/22550 [1:24:19<1:45:22,  2.23it/s]

{'loss': 1.7272, 'learning_rate': 3.501552106430155e-05, 'epoch': 1.87}


 38%|███▊      | 8500/22550 [1:24:42<1:59:32,  1.96it/s]Saving model checkpoint to model-t5-base\checkpoint-8500
Configuration saved in model-t5-base\checkpoint-8500\config.json


{'loss': 1.634, 'learning_rate': 3.489135254988913e-05, 'epoch': 1.88}


Model weights saved in model-t5-base\checkpoint-8500\pytorch_model.bin
tokenizer config file saved in model-t5-base\checkpoint-8500\tokenizer_config.json
Special tokens file saved in model-t5-base\checkpoint-8500\special_tokens_map.json
Copy vocab file to model-t5-base\checkpoint-8500\spiece.model
Deleting older checkpoint [model-t5-base\checkpoint-7000] due to args.save_total_limit
 38%|███▊      | 8550/22550 [1:25:09<1:45:30,  2.21it/s]

{'loss': 1.6884, 'learning_rate': 3.476718403547672e-05, 'epoch': 1.9}


 38%|███▊      | 8600/22550 [1:25:32<1:58:04,  1.97it/s]

{'loss': 1.6177, 'learning_rate': 3.46430155210643e-05, 'epoch': 1.91}


 38%|███▊      | 8650/22550 [1:25:55<1:44:21,  2.22it/s]

{'loss': 1.6681, 'learning_rate': 3.4518847006651886e-05, 'epoch': 1.92}


 39%|███▊      | 8700/22550 [1:26:18<1:57:56,  1.96it/s]

{'loss': 1.7311, 'learning_rate': 3.4394678492239466e-05, 'epoch': 1.93}


 39%|███▉      | 8750/22550 [1:26:41<1:43:14,  2.23it/s]

{'loss': 1.6907, 'learning_rate': 3.4270509977827046e-05, 'epoch': 1.94}


 39%|███▉      | 8800/22550 [1:27:04<1:56:50,  1.96it/s]

{'loss': 1.6981, 'learning_rate': 3.414634146341463e-05, 'epoch': 1.95}


 39%|███▉      | 8850/22550 [1:27:27<1:43:02,  2.22it/s]

{'loss': 1.83, 'learning_rate': 3.402217294900222e-05, 'epoch': 1.96}


 39%|███▉      | 8900/22550 [1:27:50<1:55:23,  1.97it/s]

{'loss': 1.6507, 'learning_rate': 3.38980044345898e-05, 'epoch': 1.97}


 40%|███▉      | 8950/22550 [1:28:13<1:41:48,  2.23it/s]

{'loss': 1.6885, 'learning_rate': 3.377383592017739e-05, 'epoch': 1.98}


 40%|███▉      | 9000/22550 [1:28:36<1:56:21,  1.94it/s]***** Running Evaluation *****
  Num examples = 2004
  Batch size = 8


{'loss': 1.6441, 'learning_rate': 3.364966740576496e-05, 'epoch': 2.0}


                                                        
 40%|███▉      | 9000/22550 [1:30:52<1:56:21,  1.94it/s]Saving model checkpoint to model-t5-base\checkpoint-9000
Configuration saved in model-t5-base\checkpoint-9000\config.json


{'eval_loss': 1.6224262714385986, 'eval_rouge1': 47.0211, 'eval_rouge2': 26.7549, 'eval_rougeL': 41.6723, 'eval_rougeLsum': 41.7085, 'eval_runtime': 136.4862, 'eval_samples_per_second': 14.683, 'eval_steps_per_second': 1.839, 'epoch': 2.0}


Model weights saved in model-t5-base\checkpoint-9000\pytorch_model.bin
tokenizer config file saved in model-t5-base\checkpoint-9000\tokenizer_config.json
Special tokens file saved in model-t5-base\checkpoint-9000\special_tokens_map.json
Copy vocab file to model-t5-base\checkpoint-9000\spiece.model
Deleting older checkpoint [model-t5-base\checkpoint-7500] due to args.save_total_limit
 40%|████      | 9050/22550 [1:31:19<1:45:40,  2.13it/s]  

{'loss': 1.5764, 'learning_rate': 3.352549889135255e-05, 'epoch': 2.01}


 40%|████      | 9100/22550 [1:31:42<1:59:55,  1.87it/s]

{'loss': 1.5218, 'learning_rate': 3.3401330376940135e-05, 'epoch': 2.02}


 41%|████      | 9150/22550 [1:32:05<1:43:46,  2.15it/s]

{'loss': 1.5571, 'learning_rate': 3.3277161862527715e-05, 'epoch': 2.03}


 41%|████      | 9200/22550 [1:32:28<1:58:23,  1.88it/s]

{'loss': 1.5399, 'learning_rate': 3.31529933481153e-05, 'epoch': 2.04}


 41%|████      | 9250/22550 [1:32:51<1:44:38,  2.12it/s]

{'loss': 1.5994, 'learning_rate': 3.302882483370288e-05, 'epoch': 2.05}


 41%|████      | 9300/22550 [1:33:14<1:59:28,  1.85it/s]

{'loss': 1.5728, 'learning_rate': 3.290465631929046e-05, 'epoch': 2.06}


 41%|████▏     | 9350/22550 [1:33:37<1:46:24,  2.07it/s]

{'loss': 1.5813, 'learning_rate': 3.278048780487805e-05, 'epoch': 2.07}


 42%|████▏     | 9400/22550 [1:34:01<1:58:10,  1.85it/s]

{'loss': 1.62, 'learning_rate': 3.265631929046563e-05, 'epoch': 2.08}


 42%|████▏     | 9450/22550 [1:34:24<1:43:48,  2.10it/s]

{'loss': 1.5506, 'learning_rate': 3.2532150776053216e-05, 'epoch': 2.1}


 42%|████▏     | 9500/22550 [1:34:47<1:55:55,  1.88it/s]Saving model checkpoint to model-t5-base\checkpoint-9500
Configuration saved in model-t5-base\checkpoint-9500\config.json


{'loss': 1.5444, 'learning_rate': 3.2407982261640796e-05, 'epoch': 2.11}


Model weights saved in model-t5-base\checkpoint-9500\pytorch_model.bin
tokenizer config file saved in model-t5-base\checkpoint-9500\tokenizer_config.json
Special tokens file saved in model-t5-base\checkpoint-9500\special_tokens_map.json
Copy vocab file to model-t5-base\checkpoint-9500\spiece.model
Deleting older checkpoint [model-t5-base\checkpoint-8000] due to args.save_total_limit
 42%|████▏     | 9550/22550 [1:35:13<1:42:12,  2.12it/s]

{'loss': 1.5664, 'learning_rate': 3.228381374722838e-05, 'epoch': 2.12}


 43%|████▎     | 9600/22550 [1:35:36<1:55:06,  1.87it/s]

{'loss': 1.5757, 'learning_rate': 3.215964523281596e-05, 'epoch': 2.13}


 43%|████▎     | 9650/22550 [1:35:59<1:41:35,  2.12it/s]

{'loss': 1.5965, 'learning_rate': 3.203547671840354e-05, 'epoch': 2.14}


 43%|████▎     | 9700/22550 [1:36:22<1:52:26,  1.90it/s]

{'loss': 1.566, 'learning_rate': 3.191130820399113e-05, 'epoch': 2.15}


 43%|████▎     | 9750/22550 [1:36:45<1:39:36,  2.14it/s]

{'loss': 1.613, 'learning_rate': 3.178713968957871e-05, 'epoch': 2.16}


 43%|████▎     | 9800/22550 [1:37:08<1:52:16,  1.89it/s]

{'loss': 1.5403, 'learning_rate': 3.16629711751663e-05, 'epoch': 2.17}


 44%|████▎     | 9850/22550 [1:37:31<1:40:21,  2.11it/s]

{'loss': 1.551, 'learning_rate': 3.1538802660753884e-05, 'epoch': 2.18}


 44%|████▍     | 9900/22550 [1:37:55<1:48:49,  1.94it/s]

{'loss': 1.6057, 'learning_rate': 3.1414634146341464e-05, 'epoch': 2.2}


 44%|████▍     | 9950/22550 [1:38:17<1:35:08,  2.21it/s]

{'loss': 1.5886, 'learning_rate': 3.1290465631929044e-05, 'epoch': 2.21}


 44%|████▍     | 10000/22550 [1:38:39<1:46:40,  1.96it/s]***** Running Evaluation *****
  Num examples = 2004
  Batch size = 8


{'loss': 1.5881, 'learning_rate': 3.116629711751663e-05, 'epoch': 2.22}


                                                         
 44%|████▍     | 10000/22550 [1:40:52<1:46:40,  1.96it/s]Saving model checkpoint to model-t5-base\checkpoint-10000
Configuration saved in model-t5-base\checkpoint-10000\config.json


{'eval_loss': 1.6250555515289307, 'eval_rouge1': 47.279, 'eval_rouge2': 26.9646, 'eval_rougeL': 41.7966, 'eval_rougeLsum': 41.8672, 'eval_runtime': 133.2792, 'eval_samples_per_second': 15.036, 'eval_steps_per_second': 1.883, 'epoch': 2.22}


Model weights saved in model-t5-base\checkpoint-10000\pytorch_model.bin
tokenizer config file saved in model-t5-base\checkpoint-10000\tokenizer_config.json
Special tokens file saved in model-t5-base\checkpoint-10000\special_tokens_map.json
Copy vocab file to model-t5-base\checkpoint-10000\spiece.model
Deleting older checkpoint [model-t5-base\checkpoint-8500] due to args.save_total_limit
 45%|████▍     | 10050/22550 [1:41:18<1:34:06,  2.21it/s]  

{'loss': 1.514, 'learning_rate': 3.104212860310421e-05, 'epoch': 2.23}


 45%|████▍     | 10100/22550 [1:41:40<1:47:54,  1.92it/s]

{'loss': 1.5486, 'learning_rate': 3.09179600886918e-05, 'epoch': 2.24}


 45%|████▌     | 10150/22550 [1:42:02<1:33:17,  2.22it/s]

{'loss': 1.5446, 'learning_rate': 3.079379157427938e-05, 'epoch': 2.25}


 45%|████▌     | 10200/22550 [1:42:25<1:46:34,  1.93it/s]

{'loss': 1.5571, 'learning_rate': 3.0669623059866965e-05, 'epoch': 2.26}


 45%|████▌     | 10250/22550 [1:42:47<1:35:10,  2.15it/s]

{'loss': 1.5599, 'learning_rate': 3.0545454545454546e-05, 'epoch': 2.27}


 46%|████▌     | 10300/22550 [1:43:09<1:44:58,  1.94it/s]

{'loss': 1.5816, 'learning_rate': 3.0421286031042126e-05, 'epoch': 2.28}


 46%|████▌     | 10350/22550 [1:43:31<1:32:25,  2.20it/s]

{'loss': 1.6138, 'learning_rate': 3.0297117516629713e-05, 'epoch': 2.29}


 46%|████▌     | 10400/22550 [1:43:54<1:44:29,  1.94it/s]

{'loss': 1.5758, 'learning_rate': 3.0172949002217296e-05, 'epoch': 2.31}


 46%|████▋     | 10450/22550 [1:44:16<1:32:05,  2.19it/s]

{'loss': 1.4833, 'learning_rate': 3.004878048780488e-05, 'epoch': 2.32}


 47%|████▋     | 10500/22550 [1:44:38<1:42:19,  1.96it/s]Saving model checkpoint to model-t5-base\checkpoint-10500
Configuration saved in model-t5-base\checkpoint-10500\config.json


{'loss': 1.5377, 'learning_rate': 2.9924611973392463e-05, 'epoch': 2.33}


Model weights saved in model-t5-base\checkpoint-10500\pytorch_model.bin
tokenizer config file saved in model-t5-base\checkpoint-10500\tokenizer_config.json
Special tokens file saved in model-t5-base\checkpoint-10500\special_tokens_map.json
Copy vocab file to model-t5-base\checkpoint-10500\spiece.model
Deleting older checkpoint [model-t5-base\checkpoint-9000] due to args.save_total_limit
 47%|████▋     | 10550/22550 [1:45:04<1:32:30,  2.16it/s]

{'loss': 1.5437, 'learning_rate': 2.980044345898004e-05, 'epoch': 2.34}


 47%|████▋     | 10600/22550 [1:45:26<1:43:32,  1.92it/s]

{'loss': 1.624, 'learning_rate': 2.9676274944567627e-05, 'epoch': 2.35}


 47%|████▋     | 10650/22550 [1:45:48<1:30:34,  2.19it/s]

{'loss': 1.6675, 'learning_rate': 2.955210643015521e-05, 'epoch': 2.36}


 47%|████▋     | 10700/22550 [1:46:10<1:42:54,  1.92it/s]

{'loss': 1.5678, 'learning_rate': 2.9427937915742794e-05, 'epoch': 2.37}


 48%|████▊     | 10750/22550 [1:46:33<1:31:29,  2.15it/s]

{'loss': 1.6415, 'learning_rate': 2.9303769401330378e-05, 'epoch': 2.38}


 48%|████▊     | 10800/22550 [1:46:55<1:42:19,  1.91it/s]

{'loss': 1.5871, 'learning_rate': 2.917960088691796e-05, 'epoch': 2.39}


 48%|████▊     | 10850/22550 [1:47:17<1:30:39,  2.15it/s]

{'loss': 1.5914, 'learning_rate': 2.905543237250554e-05, 'epoch': 2.41}


 48%|████▊     | 10900/22550 [1:47:40<1:41:57,  1.90it/s]

{'loss': 1.5502, 'learning_rate': 2.8931263858093125e-05, 'epoch': 2.42}


 49%|████▊     | 10950/22550 [1:48:02<1:26:04,  2.25it/s]

{'loss': 1.5734, 'learning_rate': 2.8807095343680708e-05, 'epoch': 2.43}


 49%|████▉     | 11000/22550 [1:48:24<1:39:14,  1.94it/s]***** Running Evaluation *****
  Num examples = 2004
  Batch size = 8


{'loss': 1.5734, 'learning_rate': 2.8682926829268292e-05, 'epoch': 2.44}


                                                         
 49%|████▉     | 11000/22550 [1:50:37<1:39:14,  1.94it/s]Saving model checkpoint to model-t5-base\checkpoint-11000
Configuration saved in model-t5-base\checkpoint-11000\config.json


{'eval_loss': 1.6229438781738281, 'eval_rouge1': 47.7269, 'eval_rouge2': 27.7926, 'eval_rougeL': 42.6433, 'eval_rougeLsum': 42.665, 'eval_runtime': 133.0843, 'eval_samples_per_second': 15.058, 'eval_steps_per_second': 1.886, 'epoch': 2.44}


Model weights saved in model-t5-base\checkpoint-11000\pytorch_model.bin
tokenizer config file saved in model-t5-base\checkpoint-11000\tokenizer_config.json
Special tokens file saved in model-t5-base\checkpoint-11000\special_tokens_map.json
Copy vocab file to model-t5-base\checkpoint-11000\spiece.model
Deleting older checkpoint [model-t5-base\checkpoint-9500] due to args.save_total_limit
 49%|████▉     | 11050/22550 [1:51:03<1:26:56,  2.20it/s]  

{'loss': 1.5946, 'learning_rate': 2.8558758314855875e-05, 'epoch': 2.45}


 49%|████▉     | 11100/22550 [1:51:25<1:38:15,  1.94it/s]

{'loss': 1.5711, 'learning_rate': 2.8434589800443462e-05, 'epoch': 2.46}


 49%|████▉     | 11150/22550 [1:51:47<1:26:48,  2.19it/s]

{'loss': 1.6034, 'learning_rate': 2.831042128603104e-05, 'epoch': 2.47}


 50%|████▉     | 11200/22550 [1:52:09<1:37:36,  1.94it/s]

{'loss': 1.5759, 'learning_rate': 2.8186252771618623e-05, 'epoch': 2.48}


 50%|████▉     | 11250/22550 [1:52:31<1:25:06,  2.21it/s]

{'loss': 1.5391, 'learning_rate': 2.8062084257206206e-05, 'epoch': 2.49}


 50%|█████     | 11300/22550 [1:52:54<1:35:40,  1.96it/s]

{'loss': 1.5413, 'learning_rate': 2.7937915742793793e-05, 'epoch': 2.51}


 50%|█████     | 11350/22550 [1:53:16<1:25:48,  2.18it/s]

{'loss': 1.5605, 'learning_rate': 2.7813747228381377e-05, 'epoch': 2.52}


 51%|█████     | 11400/22550 [1:53:39<1:35:28,  1.95it/s]

{'loss': 1.5555, 'learning_rate': 2.7689578713968957e-05, 'epoch': 2.53}


 51%|█████     | 11450/22550 [1:54:01<1:22:03,  2.25it/s]

{'loss': 1.4703, 'learning_rate': 2.756541019955654e-05, 'epoch': 2.54}


 51%|█████     | 11500/22550 [1:54:23<1:35:23,  1.93it/s]Saving model checkpoint to model-t5-base\checkpoint-11500
Configuration saved in model-t5-base\checkpoint-11500\config.json


{'loss': 1.5991, 'learning_rate': 2.7441241685144124e-05, 'epoch': 2.55}


Model weights saved in model-t5-base\checkpoint-11500\pytorch_model.bin
tokenizer config file saved in model-t5-base\checkpoint-11500\tokenizer_config.json
Special tokens file saved in model-t5-base\checkpoint-11500\special_tokens_map.json
Copy vocab file to model-t5-base\checkpoint-11500\spiece.model
Deleting older checkpoint [model-t5-base\checkpoint-10000] due to args.save_total_limit
 51%|█████     | 11550/22550 [1:54:48<1:23:27,  2.20it/s]

{'loss': 1.5539, 'learning_rate': 2.7317073170731707e-05, 'epoch': 2.56}


 51%|█████▏    | 11600/22550 [1:55:11<1:33:19,  1.96it/s]

{'loss': 1.608, 'learning_rate': 2.719290465631929e-05, 'epoch': 2.57}


 52%|█████▏    | 11650/22550 [1:55:33<1:22:04,  2.21it/s]

{'loss': 1.5396, 'learning_rate': 2.7068736141906874e-05, 'epoch': 2.58}


 52%|█████▏    | 11700/22550 [1:55:55<1:32:12,  1.96it/s]

{'loss': 1.5498, 'learning_rate': 2.6944567627494455e-05, 'epoch': 2.59}


 52%|█████▏    | 11750/22550 [1:56:17<1:19:17,  2.27it/s]

{'loss': 1.6315, 'learning_rate': 2.682039911308204e-05, 'epoch': 2.61}


 52%|█████▏    | 11800/22550 [1:56:39<1:32:58,  1.93it/s]

{'loss': 1.518, 'learning_rate': 2.6696230598669625e-05, 'epoch': 2.62}


 53%|█████▎    | 11850/22550 [1:57:02<1:21:34,  2.19it/s]

{'loss': 1.5282, 'learning_rate': 2.6572062084257205e-05, 'epoch': 2.63}


 53%|█████▎    | 11900/22550 [1:57:24<1:33:06,  1.91it/s]

{'loss': 1.54, 'learning_rate': 2.644789356984479e-05, 'epoch': 2.64}


 53%|█████▎    | 11950/22550 [1:57:47<1:24:04,  2.10it/s]

{'loss': 1.5851, 'learning_rate': 2.6323725055432372e-05, 'epoch': 2.65}


 53%|█████▎    | 12000/22550 [1:58:11<1:30:10,  1.95it/s]***** Running Evaluation *****
  Num examples = 2004
  Batch size = 8


{'loss': 1.637, 'learning_rate': 2.6199556541019956e-05, 'epoch': 2.66}


                                                         
 53%|█████▎    | 12000/22550 [2:00:23<1:30:10,  1.95it/s]Saving model checkpoint to model-t5-base\checkpoint-12000
Configuration saved in model-t5-base\checkpoint-12000\config.json


{'eval_loss': 1.6123322248458862, 'eval_rouge1': 47.5314, 'eval_rouge2': 27.1266, 'eval_rougeL': 42.2511, 'eval_rougeLsum': 42.2927, 'eval_runtime': 132.1687, 'eval_samples_per_second': 15.162, 'eval_steps_per_second': 1.899, 'epoch': 2.66}


Model weights saved in model-t5-base\checkpoint-12000\pytorch_model.bin
tokenizer config file saved in model-t5-base\checkpoint-12000\tokenizer_config.json
Special tokens file saved in model-t5-base\checkpoint-12000\special_tokens_map.json
Copy vocab file to model-t5-base\checkpoint-12000\spiece.model
Deleting older checkpoint [model-t5-base\checkpoint-10500] due to args.save_total_limit
 53%|█████▎    | 12050/22550 [2:00:48<1:18:39,  2.22it/s]  

{'loss': 1.5343, 'learning_rate': 2.607538802660754e-05, 'epoch': 2.67}


 54%|█████▎    | 12100/22550 [2:01:10<1:27:02,  2.00it/s]

{'loss': 1.5989, 'learning_rate': 2.5951219512195123e-05, 'epoch': 2.68}


 54%|█████▍    | 12150/22550 [2:01:32<1:16:07,  2.28it/s]

{'loss': 1.5548, 'learning_rate': 2.5827050997782703e-05, 'epoch': 2.69}


 54%|█████▍    | 12200/22550 [2:01:54<1:27:29,  1.97it/s]

{'loss': 1.6118, 'learning_rate': 2.5702882483370286e-05, 'epoch': 2.71}


 54%|█████▍    | 12250/22550 [2:02:16<1:18:38,  2.18it/s]

{'loss': 1.5714, 'learning_rate': 2.5578713968957873e-05, 'epoch': 2.72}


 55%|█████▍    | 12300/22550 [2:02:38<1:26:37,  1.97it/s]

{'loss': 1.6125, 'learning_rate': 2.5454545454545454e-05, 'epoch': 2.73}


 55%|█████▍    | 12350/22550 [2:03:00<1:14:56,  2.27it/s]

{'loss': 1.5353, 'learning_rate': 2.5330376940133037e-05, 'epoch': 2.74}


 55%|█████▍    | 12400/22550 [2:03:22<1:24:28,  2.00it/s]

{'loss': 1.6069, 'learning_rate': 2.520620842572062e-05, 'epoch': 2.75}


 55%|█████▌    | 12450/22550 [2:03:44<1:13:50,  2.28it/s]

{'loss': 1.6046, 'learning_rate': 2.5082039911308204e-05, 'epoch': 2.76}


 55%|█████▌    | 12500/22550 [2:04:06<1:24:24,  1.98it/s]Saving model checkpoint to model-t5-base\checkpoint-12500
Configuration saved in model-t5-base\checkpoint-12500\config.json


{'loss': 1.5942, 'learning_rate': 2.4957871396895788e-05, 'epoch': 2.77}


Model weights saved in model-t5-base\checkpoint-12500\pytorch_model.bin
tokenizer config file saved in model-t5-base\checkpoint-12500\tokenizer_config.json
Special tokens file saved in model-t5-base\checkpoint-12500\special_tokens_map.json
Copy vocab file to model-t5-base\checkpoint-12500\spiece.model
Deleting older checkpoint [model-t5-base\checkpoint-11000] due to args.save_total_limit
 56%|█████▌    | 12550/22550 [2:04:31<1:15:54,  2.20it/s]

{'loss': 1.5504, 'learning_rate': 2.483370288248337e-05, 'epoch': 2.78}


 56%|█████▌    | 12600/22550 [2:04:53<1:23:13,  1.99it/s]

{'loss': 1.5815, 'learning_rate': 2.470953436807095e-05, 'epoch': 2.79}


 56%|█████▌    | 12650/22550 [2:05:15<1:13:12,  2.25it/s]

{'loss': 1.6016, 'learning_rate': 2.4585365853658535e-05, 'epoch': 2.8}


 56%|█████▋    | 12700/22550 [2:05:37<1:22:10,  2.00it/s]

{'loss': 1.5642, 'learning_rate': 2.4461197339246122e-05, 'epoch': 2.82}


 57%|█████▋    | 12750/22550 [2:05:59<1:12:10,  2.26it/s]

{'loss': 1.6679, 'learning_rate': 2.4337028824833702e-05, 'epoch': 2.83}


 57%|█████▋    | 12800/22550 [2:06:21<1:22:47,  1.96it/s]

{'loss': 1.5538, 'learning_rate': 2.4212860310421285e-05, 'epoch': 2.84}


 57%|█████▋    | 12850/22550 [2:06:43<1:13:15,  2.21it/s]

{'loss': 1.5602, 'learning_rate': 2.408869179600887e-05, 'epoch': 2.85}


 57%|█████▋    | 12900/22550 [2:07:05<1:21:19,  1.98it/s]

{'loss': 1.5998, 'learning_rate': 2.3964523281596453e-05, 'epoch': 2.86}


 57%|█████▋    | 12950/22550 [2:07:27<1:12:09,  2.22it/s]

{'loss': 1.5738, 'learning_rate': 2.3840354767184036e-05, 'epoch': 2.87}


 58%|█████▊    | 13000/22550 [2:07:49<1:20:43,  1.97it/s]***** Running Evaluation *****
  Num examples = 2004
  Batch size = 8


{'loss': 1.6084, 'learning_rate': 2.371618625277162e-05, 'epoch': 2.88}


                                                         
 58%|█████▊    | 13000/22550 [2:10:01<1:20:43,  1.97it/s]Saving model checkpoint to model-t5-base\checkpoint-13000
Configuration saved in model-t5-base\checkpoint-13000\config.json


{'eval_loss': 1.6066685914993286, 'eval_rouge1': 47.5485, 'eval_rouge2': 27.2871, 'eval_rougeL': 42.4197, 'eval_rougeLsum': 42.4106, 'eval_runtime': 132.1444, 'eval_samples_per_second': 15.165, 'eval_steps_per_second': 1.899, 'epoch': 2.88}


Model weights saved in model-t5-base\checkpoint-13000\pytorch_model.bin
tokenizer config file saved in model-t5-base\checkpoint-13000\tokenizer_config.json
Special tokens file saved in model-t5-base\checkpoint-13000\special_tokens_map.json
Copy vocab file to model-t5-base\checkpoint-13000\spiece.model
Deleting older checkpoint [model-t5-base\checkpoint-11500] due to args.save_total_limit
 58%|█████▊    | 13050/22550 [2:10:26<1:09:36,  2.27it/s]  

{'loss': 1.5498, 'learning_rate': 2.35920177383592e-05, 'epoch': 2.89}


 58%|█████▊    | 13100/22550 [2:10:48<1:21:00,  1.94it/s]

{'loss': 1.5866, 'learning_rate': 2.3467849223946783e-05, 'epoch': 2.9}


 58%|█████▊    | 13150/22550 [2:11:10<1:12:07,  2.17it/s]

{'loss': 1.6041, 'learning_rate': 2.334368070953437e-05, 'epoch': 2.92}


 59%|█████▊    | 13200/22550 [2:11:33<1:19:26,  1.96it/s]

{'loss': 1.5967, 'learning_rate': 2.321951219512195e-05, 'epoch': 2.93}


 59%|█████▉    | 13250/22550 [2:11:55<1:10:32,  2.20it/s]

{'loss': 1.5991, 'learning_rate': 2.3095343680709534e-05, 'epoch': 2.94}


 59%|█████▉    | 13300/22550 [2:12:17<1:20:02,  1.93it/s]

{'loss': 1.582, 'learning_rate': 2.2971175166297117e-05, 'epoch': 2.95}


 59%|█████▉    | 13350/22550 [2:12:39<1:07:58,  2.26it/s]

{'loss': 1.633, 'learning_rate': 2.2847006651884698e-05, 'epoch': 2.96}


 59%|█████▉    | 13400/22550 [2:13:01<1:16:21,  2.00it/s]

{'loss': 1.5371, 'learning_rate': 2.2722838137472285e-05, 'epoch': 2.97}


 60%|█████▉    | 13450/22550 [2:13:22<1:08:23,  2.22it/s]

{'loss': 1.6988, 'learning_rate': 2.2598669623059868e-05, 'epoch': 2.98}


 60%|█████▉    | 13500/22550 [2:13:45<1:17:11,  1.95it/s]Saving model checkpoint to model-t5-base\checkpoint-13500
Configuration saved in model-t5-base\checkpoint-13500\config.json


{'loss': 1.5629, 'learning_rate': 2.2474501108647448e-05, 'epoch': 2.99}


Model weights saved in model-t5-base\checkpoint-13500\pytorch_model.bin
tokenizer config file saved in model-t5-base\checkpoint-13500\tokenizer_config.json
Special tokens file saved in model-t5-base\checkpoint-13500\special_tokens_map.json
Copy vocab file to model-t5-base\checkpoint-13500\spiece.model
Deleting older checkpoint [model-t5-base\checkpoint-12000] due to args.save_total_limit
 60%|██████    | 13550/22550 [2:14:10<1:06:17,  2.26it/s]

{'loss': 1.5121, 'learning_rate': 2.2350332594235032e-05, 'epoch': 3.0}


 60%|██████    | 13600/22550 [2:14:32<1:16:32,  1.95it/s]

{'loss': 1.4537, 'learning_rate': 2.2226164079822615e-05, 'epoch': 3.02}


 61%|██████    | 13650/22550 [2:14:54<1:07:28,  2.20it/s]

{'loss': 1.4581, 'learning_rate': 2.21019955654102e-05, 'epoch': 3.03}


 61%|██████    | 13700/22550 [2:15:16<1:14:41,  1.97it/s]

{'loss': 1.476, 'learning_rate': 2.1977827050997782e-05, 'epoch': 3.04}


 61%|██████    | 13750/22550 [2:15:38<1:06:43,  2.20it/s]

{'loss': 1.4814, 'learning_rate': 2.1853658536585366e-05, 'epoch': 3.05}


 61%|██████    | 13800/22550 [2:16:00<1:14:36,  1.95it/s]

{'loss': 1.4684, 'learning_rate': 2.1729490022172946e-05, 'epoch': 3.06}


 61%|██████▏   | 13850/22550 [2:16:21<1:06:02,  2.20it/s]

{'loss': 1.5202, 'learning_rate': 2.1605321507760533e-05, 'epoch': 3.07}


 62%|██████▏   | 13900/22550 [2:16:44<1:14:04,  1.95it/s]

{'loss': 1.5546, 'learning_rate': 2.1481152993348116e-05, 'epoch': 3.08}


 62%|██████▏   | 13950/22550 [2:17:06<1:05:53,  2.18it/s]

{'loss': 1.4831, 'learning_rate': 2.1356984478935697e-05, 'epoch': 3.09}


 62%|██████▏   | 14000/22550 [2:17:28<1:11:50,  1.98it/s]***** Running Evaluation *****
  Num examples = 2004
  Batch size = 8


{'loss': 1.4323, 'learning_rate': 2.123281596452328e-05, 'epoch': 3.1}


                                                         
 62%|██████▏   | 14000/22550 [2:19:40<1:11:50,  1.98it/s]Saving model checkpoint to model-t5-base\checkpoint-14000
Configuration saved in model-t5-base\checkpoint-14000\config.json


{'eval_loss': 1.6205980777740479, 'eval_rouge1': 47.7426, 'eval_rouge2': 27.2515, 'eval_rougeL': 42.25, 'eval_rougeLsum': 42.284, 'eval_runtime': 132.2813, 'eval_samples_per_second': 15.15, 'eval_steps_per_second': 1.897, 'epoch': 3.1}


Model weights saved in model-t5-base\checkpoint-14000\pytorch_model.bin
tokenizer config file saved in model-t5-base\checkpoint-14000\tokenizer_config.json
Special tokens file saved in model-t5-base\checkpoint-14000\special_tokens_map.json
Copy vocab file to model-t5-base\checkpoint-14000\spiece.model
Deleting older checkpoint [model-t5-base\checkpoint-12500] due to args.save_total_limit
 62%|██████▏   | 14050/22550 [2:20:05<1:02:39,  2.26it/s] 

{'loss': 1.4753, 'learning_rate': 2.1108647450110864e-05, 'epoch': 3.12}


 63%|██████▎   | 14100/22550 [2:20:27<1:11:03,  1.98it/s]

{'loss': 1.501, 'learning_rate': 2.0984478935698447e-05, 'epoch': 3.13}


 63%|██████▎   | 14150/22550 [2:20:49<1:02:57,  2.22it/s]

{'loss': 1.4526, 'learning_rate': 2.086031042128603e-05, 'epoch': 3.14}


 63%|██████▎   | 14200/22550 [2:21:11<1:11:52,  1.94it/s]

{'loss': 1.5066, 'learning_rate': 2.0736141906873614e-05, 'epoch': 3.15}


 63%|██████▎   | 14250/22550 [2:21:33<1:04:10,  2.16it/s]

{'loss': 1.4499, 'learning_rate': 2.0611973392461194e-05, 'epoch': 3.16}


 63%|██████▎   | 14300/22550 [2:21:55<1:10:10,  1.96it/s]

{'loss': 1.526, 'learning_rate': 2.048780487804878e-05, 'epoch': 3.17}


 64%|██████▎   | 14350/22550 [2:22:17<1:00:38,  2.25it/s]

{'loss': 1.4643, 'learning_rate': 2.0363636363636365e-05, 'epoch': 3.18}


 64%|██████▍   | 14400/22550 [2:22:39<1:08:44,  1.98it/s]

{'loss': 1.5591, 'learning_rate': 2.0239467849223945e-05, 'epoch': 3.19}


 64%|██████▍   | 14450/22550 [2:23:01<1:00:25,  2.23it/s]

{'loss': 1.547, 'learning_rate': 2.011529933481153e-05, 'epoch': 3.2}


 64%|██████▍   | 14500/22550 [2:23:23<1:09:19,  1.94it/s]Saving model checkpoint to model-t5-base\checkpoint-14500
Configuration saved in model-t5-base\checkpoint-14500\config.json


{'loss': 1.5163, 'learning_rate': 1.9991130820399112e-05, 'epoch': 3.22}


Model weights saved in model-t5-base\checkpoint-14500\pytorch_model.bin
tokenizer config file saved in model-t5-base\checkpoint-14500\tokenizer_config.json
Special tokens file saved in model-t5-base\checkpoint-14500\special_tokens_map.json
Copy vocab file to model-t5-base\checkpoint-14500\spiece.model
Deleting older checkpoint [model-t5-base\checkpoint-13000] due to args.save_total_limit
 65%|██████▍   | 14550/22550 [2:23:48<59:48,  2.23it/s]  

{'loss': 1.5095, 'learning_rate': 1.98669623059867e-05, 'epoch': 3.23}


 65%|██████▍   | 14600/22550 [2:24:11<1:06:31,  1.99it/s]

{'loss': 1.5067, 'learning_rate': 1.974279379157428e-05, 'epoch': 3.24}


 65%|██████▍   | 14650/22550 [2:24:32<59:35,  2.21it/s]  

{'loss': 1.5444, 'learning_rate': 1.9618625277161863e-05, 'epoch': 3.25}


 65%|██████▌   | 14700/22550 [2:24:54<1:05:50,  1.99it/s]

{'loss': 1.498, 'learning_rate': 1.9494456762749446e-05, 'epoch': 3.26}


 65%|██████▌   | 14750/22550 [2:25:16<59:09,  2.20it/s]  

{'loss': 1.5385, 'learning_rate': 1.9370288248337026e-05, 'epoch': 3.27}


 66%|██████▌   | 14800/22550 [2:25:38<1:05:00,  1.99it/s]

{'loss': 1.5147, 'learning_rate': 1.9246119733924613e-05, 'epoch': 3.28}


 66%|██████▌   | 14850/22550 [2:26:00<58:18,  2.20it/s]  

{'loss': 1.5128, 'learning_rate': 1.9121951219512197e-05, 'epoch': 3.29}


 66%|██████▌   | 14900/22550 [2:26:23<1:04:48,  1.97it/s]

{'loss': 1.5633, 'learning_rate': 1.8997782705099777e-05, 'epoch': 3.3}


 66%|██████▋   | 14950/22550 [2:26:44<56:40,  2.23it/s]  

{'loss': 1.5613, 'learning_rate': 1.887361419068736e-05, 'epoch': 3.31}


 67%|██████▋   | 15000/22550 [2:27:06<1:03:07,  1.99it/s]***** Running Evaluation *****
  Num examples = 2004
  Batch size = 8


{'loss': 1.4957, 'learning_rate': 1.8749445676274944e-05, 'epoch': 3.33}


                                                         
 67%|██████▋   | 15000/22550 [2:29:18<1:03:07,  1.99it/s]Saving model checkpoint to model-t5-base\checkpoint-15000
Configuration saved in model-t5-base\checkpoint-15000\config.json


{'eval_loss': 1.6118144989013672, 'eval_rouge1': 47.3849, 'eval_rouge2': 26.8466, 'eval_rougeL': 42.1647, 'eval_rougeLsum': 42.1834, 'eval_runtime': 132.0309, 'eval_samples_per_second': 15.178, 'eval_steps_per_second': 1.901, 'epoch': 3.33}


Model weights saved in model-t5-base\checkpoint-15000\pytorch_model.bin
tokenizer config file saved in model-t5-base\checkpoint-15000\tokenizer_config.json
Special tokens file saved in model-t5-base\checkpoint-15000\special_tokens_map.json
Copy vocab file to model-t5-base\checkpoint-15000\spiece.model
Deleting older checkpoint [model-t5-base\checkpoint-13500] due to args.save_total_limit
 67%|██████▋   | 15050/22550 [2:29:44<56:55,  2.20it/s]   

{'loss': 1.4917, 'learning_rate': 1.8625277161862528e-05, 'epoch': 3.34}


 67%|██████▋   | 15100/22550 [2:30:06<1:03:14,  1.96it/s]

{'loss': 1.5164, 'learning_rate': 1.850110864745011e-05, 'epoch': 3.35}


 67%|██████▋   | 15150/22550 [2:30:27<55:48,  2.21it/s]  

{'loss': 1.4878, 'learning_rate': 1.8376940133037695e-05, 'epoch': 3.36}


 67%|██████▋   | 15200/22550 [2:30:49<1:02:24,  1.96it/s]

{'loss': 1.4621, 'learning_rate': 1.8252771618625275e-05, 'epoch': 3.37}


 68%|██████▊   | 15250/22550 [2:31:11<53:04,  2.29it/s]  

{'loss': 1.5241, 'learning_rate': 1.8128603104212862e-05, 'epoch': 3.38}


 68%|██████▊   | 15300/22550 [2:31:33<1:00:43,  1.99it/s]

{'loss': 1.4417, 'learning_rate': 1.8004434589800445e-05, 'epoch': 3.39}


 68%|██████▊   | 15350/22550 [2:31:55<53:54,  2.23it/s]  

{'loss': 1.4742, 'learning_rate': 1.7880266075388025e-05, 'epoch': 3.4}


 68%|██████▊   | 15400/22550 [2:32:17<1:00:16,  1.98it/s]

{'loss': 1.5019, 'learning_rate': 1.775609756097561e-05, 'epoch': 3.41}


 69%|██████▊   | 15450/22550 [2:32:39<52:46,  2.24it/s]  

{'loss': 1.5219, 'learning_rate': 1.7631929046563192e-05, 'epoch': 3.43}


 69%|██████▊   | 15500/22550 [2:33:01<59:57,  1.96it/s]Saving model checkpoint to model-t5-base\checkpoint-15500
Configuration saved in model-t5-base\checkpoint-15500\config.json


{'loss': 1.4944, 'learning_rate': 1.7507760532150776e-05, 'epoch': 3.44}


Model weights saved in model-t5-base\checkpoint-15500\pytorch_model.bin
tokenizer config file saved in model-t5-base\checkpoint-15500\tokenizer_config.json
Special tokens file saved in model-t5-base\checkpoint-15500\special_tokens_map.json
Copy vocab file to model-t5-base\checkpoint-15500\spiece.model
Deleting older checkpoint [model-t5-base\checkpoint-14000] due to args.save_total_limit
 69%|██████▉   | 15550/22550 [2:33:26<52:27,  2.22it/s]  

{'loss': 1.5083, 'learning_rate': 1.738359201773836e-05, 'epoch': 3.45}


 69%|██████▉   | 15600/22550 [2:33:48<58:43,  1.97it/s]

{'loss': 1.5368, 'learning_rate': 1.7259423503325943e-05, 'epoch': 3.46}


 69%|██████▉   | 15650/22550 [2:34:10<52:04,  2.21it/s]

{'loss': 1.5485, 'learning_rate': 1.7135254988913523e-05, 'epoch': 3.47}


 70%|██████▉   | 15700/22550 [2:34:32<57:58,  1.97it/s]

{'loss': 1.4667, 'learning_rate': 1.701108647450111e-05, 'epoch': 3.48}


 70%|██████▉   | 15750/22550 [2:34:54<50:17,  2.25it/s]

{'loss': 1.554, 'learning_rate': 1.6886917960088694e-05, 'epoch': 3.49}


 70%|███████   | 15800/22550 [2:35:16<56:13,  2.00it/s]

{'loss': 1.4614, 'learning_rate': 1.6762749445676274e-05, 'epoch': 3.5}


 70%|███████   | 15850/22550 [2:35:38<50:35,  2.21it/s]

{'loss': 1.4653, 'learning_rate': 1.6638580931263857e-05, 'epoch': 3.51}


 71%|███████   | 15900/22550 [2:36:00<55:44,  1.99it/s]

{'loss': 1.4817, 'learning_rate': 1.651441241685144e-05, 'epoch': 3.53}


 71%|███████   | 15950/22550 [2:36:22<48:43,  2.26it/s]

{'loss': 1.478, 'learning_rate': 1.6390243902439024e-05, 'epoch': 3.54}


 71%|███████   | 16000/22550 [2:36:44<55:52,  1.95it/s]***** Running Evaluation *****
  Num examples = 2004
  Batch size = 8


{'loss': 1.488, 'learning_rate': 1.6266075388026608e-05, 'epoch': 3.55}


                                                       
 71%|███████   | 16000/22550 [2:38:56<55:52,  1.95it/s]Saving model checkpoint to model-t5-base\checkpoint-16000
Configuration saved in model-t5-base\checkpoint-16000\config.json


{'eval_loss': 1.6109421253204346, 'eval_rouge1': 47.8742, 'eval_rouge2': 27.4848, 'eval_rougeL': 42.5575, 'eval_rougeLsum': 42.5976, 'eval_runtime': 132.1961, 'eval_samples_per_second': 15.159, 'eval_steps_per_second': 1.899, 'epoch': 3.55}


Model weights saved in model-t5-base\checkpoint-16000\pytorch_model.bin
tokenizer config file saved in model-t5-base\checkpoint-16000\tokenizer_config.json
Special tokens file saved in model-t5-base\checkpoint-16000\special_tokens_map.json
Copy vocab file to model-t5-base\checkpoint-16000\spiece.model
Deleting older checkpoint [model-t5-base\checkpoint-14500] due to args.save_total_limit
 71%|███████   | 16050/22550 [2:39:21<47:19,  2.29it/s]   

{'loss': 1.4537, 'learning_rate': 1.614190687361419e-05, 'epoch': 3.56}


 71%|███████▏  | 16100/22550 [2:39:43<53:30,  2.01it/s]

{'loss': 1.5073, 'learning_rate': 1.601773835920177e-05, 'epoch': 3.57}


 72%|███████▏  | 16150/22550 [2:40:05<47:04,  2.27it/s]

{'loss': 1.4894, 'learning_rate': 1.5893569844789355e-05, 'epoch': 3.58}


 72%|███████▏  | 16200/22550 [2:40:27<53:59,  1.96it/s]

{'loss': 1.4422, 'learning_rate': 1.5769401330376942e-05, 'epoch': 3.59}


 72%|███████▏  | 16250/22550 [2:40:49<47:03,  2.23it/s]

{'loss': 1.4624, 'learning_rate': 1.5645232815964522e-05, 'epoch': 3.6}


 72%|███████▏  | 16300/22550 [2:41:11<53:35,  1.94it/s]

{'loss': 1.4785, 'learning_rate': 1.5521064301552106e-05, 'epoch': 3.61}


 73%|███████▎  | 16350/22550 [2:41:33<46:20,  2.23it/s]

{'loss': 1.5031, 'learning_rate': 1.539689578713969e-05, 'epoch': 3.63}


 73%|███████▎  | 16400/22550 [2:41:55<51:57,  1.97it/s]

{'loss': 1.4406, 'learning_rate': 1.5272727272727273e-05, 'epoch': 3.64}


 73%|███████▎  | 16450/22550 [2:42:17<45:07,  2.25it/s]

{'loss': 1.471, 'learning_rate': 1.5148558758314856e-05, 'epoch': 3.65}


 73%|███████▎  | 16500/22550 [2:42:39<51:38,  1.95it/s]Saving model checkpoint to model-t5-base\checkpoint-16500
Configuration saved in model-t5-base\checkpoint-16500\config.json


{'loss': 1.5215, 'learning_rate': 1.502439024390244e-05, 'epoch': 3.66}


Model weights saved in model-t5-base\checkpoint-16500\pytorch_model.bin
tokenizer config file saved in model-t5-base\checkpoint-16500\tokenizer_config.json
Special tokens file saved in model-t5-base\checkpoint-16500\special_tokens_map.json
Copy vocab file to model-t5-base\checkpoint-16500\spiece.model
Deleting older checkpoint [model-t5-base\checkpoint-15000] due to args.save_total_limit
 73%|███████▎  | 16550/22550 [2:43:04<44:00,  2.27it/s]  

{'loss': 1.5081, 'learning_rate': 1.490022172949002e-05, 'epoch': 3.67}


 74%|███████▎  | 16600/22550 [2:43:26<48:51,  2.03it/s]

{'loss': 1.4895, 'learning_rate': 1.4776053215077605e-05, 'epoch': 3.68}


 74%|███████▍  | 16650/22550 [2:43:48<44:08,  2.23it/s]

{'loss': 1.5473, 'learning_rate': 1.4651884700665189e-05, 'epoch': 3.69}


 74%|███████▍  | 16700/22550 [2:44:10<49:54,  1.95it/s]

{'loss': 1.5226, 'learning_rate': 1.452771618625277e-05, 'epoch': 3.7}


 74%|███████▍  | 16750/22550 [2:44:32<43:20,  2.23it/s]

{'loss': 1.4264, 'learning_rate': 1.4403547671840354e-05, 'epoch': 3.71}


 75%|███████▍  | 16800/22550 [2:44:54<48:50,  1.96it/s]

{'loss': 1.4646, 'learning_rate': 1.4279379157427938e-05, 'epoch': 3.73}


 75%|███████▍  | 16850/22550 [2:45:16<41:57,  2.26it/s]

{'loss': 1.4377, 'learning_rate': 1.415521064301552e-05, 'epoch': 3.74}


 75%|███████▍  | 16900/22550 [2:45:38<47:50,  1.97it/s]

{'loss': 1.4834, 'learning_rate': 1.4031042128603103e-05, 'epoch': 3.75}


 75%|███████▌  | 16950/22550 [2:46:00<41:07,  2.27it/s]

{'loss': 1.4562, 'learning_rate': 1.3906873614190688e-05, 'epoch': 3.76}


 75%|███████▌  | 17000/22550 [2:46:22<46:33,  1.99it/s]***** Running Evaluation *****
  Num examples = 2004
  Batch size = 8


{'loss': 1.5263, 'learning_rate': 1.378270509977827e-05, 'epoch': 3.77}


                                                       
 75%|███████▌  | 17000/22550 [2:48:34<46:33,  1.99it/s]Saving model checkpoint to model-t5-base\checkpoint-17000
Configuration saved in model-t5-base\checkpoint-17000\config.json


{'eval_loss': 1.6099307537078857, 'eval_rouge1': 47.5866, 'eval_rouge2': 27.0153, 'eval_rougeL': 42.1857, 'eval_rougeLsum': 42.1908, 'eval_runtime': 132.1643, 'eval_samples_per_second': 15.163, 'eval_steps_per_second': 1.899, 'epoch': 3.77}


Model weights saved in model-t5-base\checkpoint-17000\pytorch_model.bin
tokenizer config file saved in model-t5-base\checkpoint-17000\tokenizer_config.json
Special tokens file saved in model-t5-base\checkpoint-17000\special_tokens_map.json
Copy vocab file to model-t5-base\checkpoint-17000\spiece.model
Deleting older checkpoint [model-t5-base\checkpoint-15500] due to args.save_total_limit
 76%|███████▌  | 17050/22550 [2:48:59<39:54,  2.30it/s]   

{'loss': 1.5407, 'learning_rate': 1.3658536585365854e-05, 'epoch': 3.78}


 76%|███████▌  | 17100/22550 [2:49:21<45:20,  2.00it/s]

{'loss': 1.4913, 'learning_rate': 1.3534368070953437e-05, 'epoch': 3.79}


 76%|███████▌  | 17150/22550 [2:49:43<41:22,  2.17it/s]

{'loss': 1.5257, 'learning_rate': 1.341019955654102e-05, 'epoch': 3.8}


 76%|███████▋  | 17200/22550 [2:50:05<45:12,  1.97it/s]

{'loss': 1.5032, 'learning_rate': 1.3286031042128603e-05, 'epoch': 3.81}


 76%|███████▋  | 17250/22550 [2:50:27<39:51,  2.22it/s]

{'loss': 1.4837, 'learning_rate': 1.3161862527716186e-05, 'epoch': 3.82}


 77%|███████▋  | 17300/22550 [2:50:49<44:37,  1.96it/s]

{'loss': 1.4891, 'learning_rate': 1.303769401330377e-05, 'epoch': 3.84}


 77%|███████▋  | 17350/22550 [2:51:11<39:48,  2.18it/s]

{'loss': 1.4956, 'learning_rate': 1.2913525498891351e-05, 'epoch': 3.85}


 77%|███████▋  | 17400/22550 [2:51:33<43:02,  1.99it/s]

{'loss': 1.5167, 'learning_rate': 1.2789356984478937e-05, 'epoch': 3.86}


 77%|███████▋  | 17450/22550 [2:51:55<38:33,  2.20it/s]

{'loss': 1.4703, 'learning_rate': 1.2665188470066519e-05, 'epoch': 3.87}


 78%|███████▊  | 17500/22550 [2:52:17<42:10,  2.00it/s]Saving model checkpoint to model-t5-base\checkpoint-17500
Configuration saved in model-t5-base\checkpoint-17500\config.json


{'loss': 1.4285, 'learning_rate': 1.2541019955654102e-05, 'epoch': 3.88}


Model weights saved in model-t5-base\checkpoint-17500\pytorch_model.bin
tokenizer config file saved in model-t5-base\checkpoint-17500\tokenizer_config.json
Special tokens file saved in model-t5-base\checkpoint-17500\special_tokens_map.json
Copy vocab file to model-t5-base\checkpoint-17500\spiece.model
Deleting older checkpoint [model-t5-base\checkpoint-16000] due to args.save_total_limit
 78%|███████▊  | 17550/22550 [2:52:42<36:36,  2.28it/s]  

{'loss': 1.524, 'learning_rate': 1.2416851441241686e-05, 'epoch': 3.89}


 78%|███████▊  | 17600/22550 [2:53:04<42:23,  1.95it/s]

{'loss': 1.4458, 'learning_rate': 1.2292682926829267e-05, 'epoch': 3.9}


 78%|███████▊  | 17650/22550 [2:53:26<36:09,  2.26it/s]

{'loss': 1.4884, 'learning_rate': 1.2168514412416851e-05, 'epoch': 3.91}


 78%|███████▊  | 17700/22550 [2:53:48<41:29,  1.95it/s]

{'loss': 1.5182, 'learning_rate': 1.2044345898004435e-05, 'epoch': 3.92}


 79%|███████▊  | 17750/22550 [2:54:10<35:40,  2.24it/s]

{'loss': 1.4368, 'learning_rate': 1.1920177383592018e-05, 'epoch': 3.94}


 79%|███████▉  | 17800/22550 [2:54:32<40:13,  1.97it/s]

{'loss': 1.5322, 'learning_rate': 1.17960088691796e-05, 'epoch': 3.95}


 79%|███████▉  | 17850/22550 [2:54:54<35:39,  2.20it/s]

{'loss': 1.483, 'learning_rate': 1.1671840354767185e-05, 'epoch': 3.96}


 79%|███████▉  | 17900/22550 [2:55:16<38:37,  2.01it/s]

{'loss': 1.554, 'learning_rate': 1.1547671840354767e-05, 'epoch': 3.97}


 80%|███████▉  | 17950/22550 [2:55:38<35:17,  2.17it/s]

{'loss': 1.5303, 'learning_rate': 1.1423503325942349e-05, 'epoch': 3.98}


 80%|███████▉  | 18000/22550 [2:56:00<38:25,  1.97it/s]***** Running Evaluation *****
  Num examples = 2004
  Batch size = 8


{'loss': 1.5411, 'learning_rate': 1.1299334811529934e-05, 'epoch': 3.99}


                                                       
 80%|███████▉  | 18000/22550 [2:58:12<38:25,  1.97it/s]Saving model checkpoint to model-t5-base\checkpoint-18000
Configuration saved in model-t5-base\checkpoint-18000\config.json


{'eval_loss': 1.6069130897521973, 'eval_rouge1': 47.8659, 'eval_rouge2': 27.4457, 'eval_rougeL': 42.5113, 'eval_rougeLsum': 42.543, 'eval_runtime': 132.3127, 'eval_samples_per_second': 15.146, 'eval_steps_per_second': 1.897, 'epoch': 3.99}


Model weights saved in model-t5-base\checkpoint-18000\pytorch_model.bin
tokenizer config file saved in model-t5-base\checkpoint-18000\tokenizer_config.json
Special tokens file saved in model-t5-base\checkpoint-18000\special_tokens_map.json
Copy vocab file to model-t5-base\checkpoint-18000\spiece.model
Deleting older checkpoint [model-t5-base\checkpoint-16500] due to args.save_total_limit
 80%|████████  | 18050/22550 [2:58:37<33:05,  2.27it/s]   

{'loss': 1.5045, 'learning_rate': 1.1175166297117516e-05, 'epoch': 4.0}


 80%|████████  | 18100/22550 [2:58:59<37:01,  2.00it/s]

{'loss': 1.4606, 'learning_rate': 1.10509977827051e-05, 'epoch': 4.01}


 80%|████████  | 18150/22550 [2:59:20<32:39,  2.24it/s]

{'loss': 1.435, 'learning_rate': 1.0926829268292683e-05, 'epoch': 4.02}


 81%|████████  | 18200/22550 [2:59:42<35:39,  2.03it/s]

{'loss': 1.3457, 'learning_rate': 1.0802660753880266e-05, 'epoch': 4.04}


 81%|████████  | 18250/22550 [3:00:04<31:49,  2.25it/s]

{'loss': 1.4604, 'learning_rate': 1.0678492239467848e-05, 'epoch': 4.05}


 81%|████████  | 18300/22550 [3:00:26<36:00,  1.97it/s]

{'loss': 1.4409, 'learning_rate': 1.0554323725055432e-05, 'epoch': 4.06}


 81%|████████▏ | 18350/22550 [3:00:48<31:26,  2.23it/s]

{'loss': 1.4178, 'learning_rate': 1.0430155210643015e-05, 'epoch': 4.07}


 82%|████████▏ | 18400/22550 [3:01:10<35:10,  1.97it/s]

{'loss': 1.4159, 'learning_rate': 1.0305986696230597e-05, 'epoch': 4.08}


 82%|████████▏ | 18450/22550 [3:01:32<30:19,  2.25it/s]

{'loss': 1.4707, 'learning_rate': 1.0181818181818182e-05, 'epoch': 4.09}


 82%|████████▏ | 18500/22550 [3:01:54<34:22,  1.96it/s]Saving model checkpoint to model-t5-base\checkpoint-18500
Configuration saved in model-t5-base\checkpoint-18500\config.json


{'loss': 1.4518, 'learning_rate': 1.0057649667405764e-05, 'epoch': 4.1}


Model weights saved in model-t5-base\checkpoint-18500\pytorch_model.bin
tokenizer config file saved in model-t5-base\checkpoint-18500\tokenizer_config.json
Special tokens file saved in model-t5-base\checkpoint-18500\special_tokens_map.json
Copy vocab file to model-t5-base\checkpoint-18500\spiece.model
Deleting older checkpoint [model-t5-base\checkpoint-17000] due to args.save_total_limit
 82%|████████▏ | 18550/22550 [3:02:19<30:29,  2.19it/s]  

{'loss': 1.3716, 'learning_rate': 9.93348115299335e-06, 'epoch': 4.11}


 82%|████████▏ | 18600/22550 [3:02:41<33:30,  1.96it/s]

{'loss': 1.4757, 'learning_rate': 9.809312638580931e-06, 'epoch': 4.12}


 83%|████████▎ | 18650/22550 [3:03:03<28:52,  2.25it/s]

{'loss': 1.522, 'learning_rate': 9.685144124168513e-06, 'epoch': 4.14}


 83%|████████▎ | 18700/22550 [3:03:25<32:16,  1.99it/s]

{'loss': 1.4833, 'learning_rate': 9.560975609756098e-06, 'epoch': 4.15}


 83%|████████▎ | 18750/22550 [3:03:47<28:28,  2.22it/s]

{'loss': 1.4582, 'learning_rate': 9.43680709534368e-06, 'epoch': 4.16}


 83%|████████▎ | 18800/22550 [3:04:09<31:35,  1.98it/s]

{'loss': 1.4337, 'learning_rate': 9.312638580931264e-06, 'epoch': 4.17}


 84%|████████▎ | 18850/22550 [3:04:31<27:30,  2.24it/s]

{'loss': 1.4488, 'learning_rate': 9.188470066518847e-06, 'epoch': 4.18}


 84%|████████▍ | 18900/22550 [3:04:53<31:16,  1.94it/s]

{'loss': 1.455, 'learning_rate': 9.064301552106431e-06, 'epoch': 4.19}


 84%|████████▍ | 18950/22550 [3:05:15<26:59,  2.22it/s]

{'loss': 1.3936, 'learning_rate': 8.940133037694013e-06, 'epoch': 4.2}


 84%|████████▍ | 19000/22550 [3:05:37<29:51,  1.98it/s]***** Running Evaluation *****
  Num examples = 2004
  Batch size = 8


{'loss': 1.3489, 'learning_rate': 8.815964523281596e-06, 'epoch': 4.21}


                                                       
 84%|████████▍ | 19000/22550 [3:07:49<29:51,  1.98it/s]Saving model checkpoint to model-t5-base\checkpoint-19000
Configuration saved in model-t5-base\checkpoint-19000\config.json


{'eval_loss': 1.6194851398468018, 'eval_rouge1': 47.7717, 'eval_rouge2': 27.3407, 'eval_rougeL': 42.3645, 'eval_rougeLsum': 42.39, 'eval_runtime': 132.6122, 'eval_samples_per_second': 15.112, 'eval_steps_per_second': 1.893, 'epoch': 4.21}


Model weights saved in model-t5-base\checkpoint-19000\pytorch_model.bin
tokenizer config file saved in model-t5-base\checkpoint-19000\tokenizer_config.json
Special tokens file saved in model-t5-base\checkpoint-19000\special_tokens_map.json
Copy vocab file to model-t5-base\checkpoint-19000\spiece.model
Deleting older checkpoint [model-t5-base\checkpoint-17500] due to args.save_total_limit
 84%|████████▍ | 19050/22550 [3:08:15<26:01,  2.24it/s]   

{'loss': 1.4366, 'learning_rate': 8.69179600886918e-06, 'epoch': 4.22}


 85%|████████▍ | 19100/22550 [3:08:37<29:14,  1.97it/s]

{'loss': 1.4703, 'learning_rate': 8.567627494456762e-06, 'epoch': 4.24}


 85%|████████▍ | 19150/22550 [3:08:58<25:00,  2.27it/s]

{'loss': 1.3996, 'learning_rate': 8.443458980044347e-06, 'epoch': 4.25}


 85%|████████▌ | 19200/22550 [3:09:20<28:21,  1.97it/s]

{'loss': 1.4334, 'learning_rate': 8.319290465631929e-06, 'epoch': 4.26}


 85%|████████▌ | 19250/22550 [3:09:42<24:45,  2.22it/s]

{'loss': 1.4617, 'learning_rate': 8.195121951219512e-06, 'epoch': 4.27}


 86%|████████▌ | 19300/22550 [3:10:04<27:19,  1.98it/s]

{'loss': 1.4512, 'learning_rate': 8.070953436807096e-06, 'epoch': 4.28}


 86%|████████▌ | 19350/22550 [3:10:26<23:49,  2.24it/s]

{'loss': 1.4395, 'learning_rate': 7.946784922394678e-06, 'epoch': 4.29}


 86%|████████▌ | 19400/22550 [3:10:48<26:29,  1.98it/s]

{'loss': 1.4296, 'learning_rate': 7.822616407982261e-06, 'epoch': 4.3}


 86%|████████▋ | 19450/22550 [3:11:10<23:15,  2.22it/s]

{'loss': 1.4556, 'learning_rate': 7.698447893569845e-06, 'epoch': 4.31}


 86%|████████▋ | 19500/22550 [3:11:32<25:29,  1.99it/s]Saving model checkpoint to model-t5-base\checkpoint-19500
Configuration saved in model-t5-base\checkpoint-19500\config.json


{'loss': 1.5385, 'learning_rate': 7.574279379157428e-06, 'epoch': 4.32}


Model weights saved in model-t5-base\checkpoint-19500\pytorch_model.bin
tokenizer config file saved in model-t5-base\checkpoint-19500\tokenizer_config.json
Special tokens file saved in model-t5-base\checkpoint-19500\special_tokens_map.json
Copy vocab file to model-t5-base\checkpoint-19500\spiece.model
Deleting older checkpoint [model-t5-base\checkpoint-18000] due to args.save_total_limit
 87%|████████▋ | 19550/22550 [3:11:57<22:08,  2.26it/s]  

{'loss': 1.4611, 'learning_rate': 7.45011086474501e-06, 'epoch': 4.33}


 87%|████████▋ | 19600/22550 [3:12:19<25:01,  1.96it/s]

{'loss': 1.4716, 'learning_rate': 7.325942350332594e-06, 'epoch': 4.35}


 87%|████████▋ | 19650/22550 [3:12:41<21:34,  2.24it/s]

{'loss': 1.4634, 'learning_rate': 7.201773835920177e-06, 'epoch': 4.36}


 87%|████████▋ | 19700/22550 [3:13:03<24:12,  1.96it/s]

{'loss': 1.4074, 'learning_rate': 7.07760532150776e-06, 'epoch': 4.37}


 88%|████████▊ | 19750/22550 [3:13:25<20:42,  2.25it/s]

{'loss': 1.461, 'learning_rate': 6.953436807095344e-06, 'epoch': 4.38}


 88%|████████▊ | 19800/22550 [3:13:47<23:41,  1.93it/s]

{'loss': 1.3824, 'learning_rate': 6.829268292682927e-06, 'epoch': 4.39}


 88%|████████▊ | 19850/22550 [3:14:09<19:55,  2.26it/s]

{'loss': 1.4358, 'learning_rate': 6.70509977827051e-06, 'epoch': 4.4}


 88%|████████▊ | 19900/22550 [3:14:31<22:28,  1.96it/s]

{'loss': 1.5085, 'learning_rate': 6.580931263858093e-06, 'epoch': 4.41}


 88%|████████▊ | 19950/22550 [3:14:53<19:34,  2.21it/s]

{'loss': 1.3906, 'learning_rate': 6.456762749445676e-06, 'epoch': 4.42}


 89%|████████▊ | 20000/22550 [3:15:15<21:41,  1.96it/s]***** Running Evaluation *****
  Num examples = 2004
  Batch size = 8


{'loss': 1.4863, 'learning_rate': 6.332594235033259e-06, 'epoch': 4.43}


                                                       
 89%|████████▊ | 20000/22550 [3:17:28<21:41,  1.96it/s]Saving model checkpoint to model-t5-base\checkpoint-20000
Configuration saved in model-t5-base\checkpoint-20000\config.json


{'eval_loss': 1.6156609058380127, 'eval_rouge1': 47.685, 'eval_rouge2': 26.9536, 'eval_rougeL': 42.333, 'eval_rougeLsum': 42.3204, 'eval_runtime': 132.517, 'eval_samples_per_second': 15.123, 'eval_steps_per_second': 1.894, 'epoch': 4.43}


Model weights saved in model-t5-base\checkpoint-20000\pytorch_model.bin
tokenizer config file saved in model-t5-base\checkpoint-20000\tokenizer_config.json
Special tokens file saved in model-t5-base\checkpoint-20000\special_tokens_map.json
Copy vocab file to model-t5-base\checkpoint-20000\spiece.model
Deleting older checkpoint [model-t5-base\checkpoint-18500] due to args.save_total_limit
 89%|████████▉ | 20050/22550 [3:17:53<18:37,  2.24it/s]   

{'loss': 1.4331, 'learning_rate': 6.208425720620843e-06, 'epoch': 4.45}


 89%|████████▉ | 20100/22550 [3:18:15<20:35,  1.98it/s]

{'loss': 1.4116, 'learning_rate': 6.0842572062084255e-06, 'epoch': 4.46}


 89%|████████▉ | 20150/22550 [3:18:36<17:34,  2.28it/s]

{'loss': 1.3967, 'learning_rate': 5.960088691796009e-06, 'epoch': 4.47}


 90%|████████▉ | 20200/22550 [3:18:58<19:26,  2.01it/s]

{'loss': 1.4079, 'learning_rate': 5.8359201773835926e-06, 'epoch': 4.48}


 90%|████████▉ | 20250/22550 [3:19:20<17:10,  2.23it/s]

{'loss': 1.4326, 'learning_rate': 5.711751662971174e-06, 'epoch': 4.49}


 90%|█████████ | 20300/22550 [3:19:42<18:50,  1.99it/s]

{'loss': 1.4376, 'learning_rate': 5.587583148558758e-06, 'epoch': 4.5}


 90%|█████████ | 20350/22550 [3:20:04<16:23,  2.24it/s]

{'loss': 1.4289, 'learning_rate': 5.4634146341463415e-06, 'epoch': 4.51}


 90%|█████████ | 20400/22550 [3:20:26<17:59,  1.99it/s]

{'loss': 1.5134, 'learning_rate': 5.339246119733924e-06, 'epoch': 4.52}


 91%|█████████ | 20450/22550 [3:20:48<15:26,  2.27it/s]

{'loss': 1.4529, 'learning_rate': 5.215077605321508e-06, 'epoch': 4.53}


 91%|█████████ | 20500/22550 [3:21:09<17:08,  1.99it/s]Saving model checkpoint to model-t5-base\checkpoint-20500
Configuration saved in model-t5-base\checkpoint-20500\config.json


{'loss': 1.4432, 'learning_rate': 5.090909090909091e-06, 'epoch': 4.55}


Model weights saved in model-t5-base\checkpoint-20500\pytorch_model.bin
tokenizer config file saved in model-t5-base\checkpoint-20500\tokenizer_config.json
Special tokens file saved in model-t5-base\checkpoint-20500\special_tokens_map.json
Copy vocab file to model-t5-base\checkpoint-20500\spiece.model
Deleting older checkpoint [model-t5-base\checkpoint-19000] due to args.save_total_limit
 91%|█████████ | 20550/22550 [3:21:35<14:50,  2.25it/s]

{'loss': 1.5065, 'learning_rate': 4.966740576496675e-06, 'epoch': 4.56}


 91%|█████████▏| 20600/22550 [3:21:57<16:38,  1.95it/s]

{'loss': 1.3789, 'learning_rate': 4.842572062084257e-06, 'epoch': 4.57}


 92%|█████████▏| 20650/22550 [3:22:19<14:07,  2.24it/s]

{'loss': 1.4841, 'learning_rate': 4.71840354767184e-06, 'epoch': 4.58}


 92%|█████████▏| 20700/22550 [3:22:41<15:39,  1.97it/s]

{'loss': 1.435, 'learning_rate': 4.594235033259424e-06, 'epoch': 4.59}


 92%|█████████▏| 20750/22550 [3:23:03<13:31,  2.22it/s]

{'loss': 1.4356, 'learning_rate': 4.470066518847006e-06, 'epoch': 4.6}


 92%|█████████▏| 20800/22550 [3:23:25<14:56,  1.95it/s]

{'loss': 1.411, 'learning_rate': 4.34589800443459e-06, 'epoch': 4.61}


 92%|█████████▏| 20850/22550 [3:23:46<12:34,  2.25it/s]

{'loss': 1.4525, 'learning_rate': 4.221729490022173e-06, 'epoch': 4.62}


 93%|█████████▎| 20900/22550 [3:24:08<14:14,  1.93it/s]

{'loss': 1.4581, 'learning_rate': 4.097560975609756e-06, 'epoch': 4.63}


 93%|█████████▎| 20950/22550 [3:24:30<12:08,  2.20it/s]

{'loss': 1.357, 'learning_rate': 3.973392461197339e-06, 'epoch': 4.65}


 93%|█████████▎| 21000/22550 [3:24:52<13:07,  1.97it/s]***** Running Evaluation *****
  Num examples = 2004
  Batch size = 8


{'loss': 1.4614, 'learning_rate': 3.849223946784922e-06, 'epoch': 4.66}


                                                       
 93%|█████████▎| 21000/22550 [3:27:05<13:07,  1.97it/s]Saving model checkpoint to model-t5-base\checkpoint-21000
Configuration saved in model-t5-base\checkpoint-21000\config.json


{'eval_loss': 1.6162070035934448, 'eval_rouge1': 47.7095, 'eval_rouge2': 27.0597, 'eval_rougeL': 42.4422, 'eval_rougeLsum': 42.4241, 'eval_runtime': 132.4129, 'eval_samples_per_second': 15.134, 'eval_steps_per_second': 1.896, 'epoch': 4.66}


Model weights saved in model-t5-base\checkpoint-21000\pytorch_model.bin
tokenizer config file saved in model-t5-base\checkpoint-21000\tokenizer_config.json
Special tokens file saved in model-t5-base\checkpoint-21000\special_tokens_map.json
Copy vocab file to model-t5-base\checkpoint-21000\spiece.model
Deleting older checkpoint [model-t5-base\checkpoint-19500] due to args.save_total_limit
 93%|█████████▎| 21050/22550 [3:27:30<11:02,  2.26it/s]   

{'loss': 1.4651, 'learning_rate': 3.725055432372505e-06, 'epoch': 4.67}


 94%|█████████▎| 21100/22550 [3:27:52<12:18,  1.96it/s]

{'loss': 1.471, 'learning_rate': 3.6008869179600885e-06, 'epoch': 4.68}


 94%|█████████▍| 21150/22550 [3:28:14<10:14,  2.28it/s]

{'loss': 1.4334, 'learning_rate': 3.476718403547672e-06, 'epoch': 4.69}


 94%|█████████▍| 21200/22550 [3:28:36<11:25,  1.97it/s]

{'loss': 1.458, 'learning_rate': 3.352549889135255e-06, 'epoch': 4.7}


 94%|█████████▍| 21250/22550 [3:28:58<09:35,  2.26it/s]

{'loss': 1.4303, 'learning_rate': 3.228381374722838e-06, 'epoch': 4.71}


 94%|█████████▍| 21300/22550 [3:29:20<10:33,  1.97it/s]

{'loss': 1.5048, 'learning_rate': 3.1042128603104214e-06, 'epoch': 4.72}


 95%|█████████▍| 21350/22550 [3:29:42<08:57,  2.23it/s]

{'loss': 1.4475, 'learning_rate': 2.9800443458980045e-06, 'epoch': 4.73}


 95%|█████████▍| 21400/22550 [3:30:04<09:30,  2.02it/s]

{'loss': 1.4554, 'learning_rate': 2.855875831485587e-06, 'epoch': 4.75}


 95%|█████████▌| 21450/22550 [3:30:25<08:10,  2.24it/s]

{'loss': 1.5026, 'learning_rate': 2.7317073170731707e-06, 'epoch': 4.76}


 95%|█████████▌| 21500/22550 [3:30:48<08:54,  1.96it/s]Saving model checkpoint to model-t5-base\checkpoint-21500
Configuration saved in model-t5-base\checkpoint-21500\config.json


{'loss': 1.3796, 'learning_rate': 2.607538802660754e-06, 'epoch': 4.77}


Model weights saved in model-t5-base\checkpoint-21500\pytorch_model.bin
tokenizer config file saved in model-t5-base\checkpoint-21500\tokenizer_config.json
Special tokens file saved in model-t5-base\checkpoint-21500\special_tokens_map.json
Copy vocab file to model-t5-base\checkpoint-21500\spiece.model
Deleting older checkpoint [model-t5-base\checkpoint-20000] due to args.save_total_limit
 96%|█████████▌| 21550/22550 [3:31:13<07:23,  2.26it/s]

{'loss': 1.3946, 'learning_rate': 2.4833702882483374e-06, 'epoch': 4.78}


 96%|█████████▌| 21600/22550 [3:31:35<07:52,  2.01it/s]

{'loss': 1.4716, 'learning_rate': 2.35920177383592e-06, 'epoch': 4.79}


 96%|█████████▌| 21650/22550 [3:31:56<06:42,  2.23it/s]

{'loss': 1.4308, 'learning_rate': 2.235033259423503e-06, 'epoch': 4.8}


 96%|█████████▌| 21700/22550 [3:32:18<07:04,  2.00it/s]

{'loss': 1.4607, 'learning_rate': 2.1108647450110867e-06, 'epoch': 4.81}


 96%|█████████▋| 21750/22550 [3:32:40<05:46,  2.31it/s]

{'loss': 1.4957, 'learning_rate': 1.9866962305986694e-06, 'epoch': 4.82}


 97%|█████████▋| 21800/22550 [3:33:02<06:26,  1.94it/s]

{'loss': 1.4522, 'learning_rate': 1.8625277161862525e-06, 'epoch': 4.83}


 97%|█████████▋| 21850/22550 [3:33:24<05:14,  2.22it/s]

{'loss': 1.4858, 'learning_rate': 1.738359201773836e-06, 'epoch': 4.84}


 97%|█████████▋| 21900/22550 [3:33:46<05:26,  1.99it/s]

{'loss': 1.54, 'learning_rate': 1.614190687361419e-06, 'epoch': 4.86}


 97%|█████████▋| 21950/22550 [3:34:08<04:30,  2.22it/s]

{'loss': 1.3895, 'learning_rate': 1.4900221729490023e-06, 'epoch': 4.87}


 98%|█████████▊| 22000/22550 [3:34:30<04:32,  2.02it/s]***** Running Evaluation *****
  Num examples = 2004
  Batch size = 8


{'loss': 1.3943, 'learning_rate': 1.3658536585365854e-06, 'epoch': 4.88}


                                                       
 98%|█████████▊| 22000/22550 [3:36:42<04:32,  2.02it/s]Saving model checkpoint to model-t5-base\checkpoint-22000
Configuration saved in model-t5-base\checkpoint-22000\config.json


{'eval_loss': 1.6126153469085693, 'eval_rouge1': 47.5439, 'eval_rouge2': 26.964, 'eval_rougeL': 42.1825, 'eval_rougeLsum': 42.1968, 'eval_runtime': 132.338, 'eval_samples_per_second': 15.143, 'eval_steps_per_second': 1.897, 'epoch': 4.88}


Model weights saved in model-t5-base\checkpoint-22000\pytorch_model.bin
tokenizer config file saved in model-t5-base\checkpoint-22000\tokenizer_config.json
Special tokens file saved in model-t5-base\checkpoint-22000\special_tokens_map.json
Copy vocab file to model-t5-base\checkpoint-22000\spiece.model
Deleting older checkpoint [model-t5-base\checkpoint-20500] due to args.save_total_limit
 98%|█████████▊| 22050/22550 [3:37:07<03:37,  2.30it/s]  

{'loss': 1.3957, 'learning_rate': 1.2416851441241687e-06, 'epoch': 4.89}


 98%|█████████▊| 22100/22550 [3:37:29<03:44,  2.00it/s]

{'loss': 1.4521, 'learning_rate': 1.1175166297117516e-06, 'epoch': 4.9}


 98%|█████████▊| 22150/22550 [3:37:51<02:55,  2.28it/s]

{'loss': 1.447, 'learning_rate': 9.933481152993347e-07, 'epoch': 4.91}


 98%|█████████▊| 22200/22550 [3:38:13<02:55,  2.00it/s]

{'loss': 1.3873, 'learning_rate': 8.69179600886918e-07, 'epoch': 4.92}


 99%|█████████▊| 22250/22550 [3:38:35<02:12,  2.26it/s]

{'loss': 1.4257, 'learning_rate': 7.450110864745011e-07, 'epoch': 4.93}


 99%|█████████▉| 22300/22550 [3:38:57<02:07,  1.96it/s]

{'loss': 1.4345, 'learning_rate': 6.208425720620843e-07, 'epoch': 4.94}


 99%|█████████▉| 22350/22550 [3:39:19<01:27,  2.28it/s]

{'loss': 1.4788, 'learning_rate': 4.966740576496673e-07, 'epoch': 4.96}


 99%|█████████▉| 22400/22550 [3:39:41<01:14,  2.01it/s]

{'loss': 1.4822, 'learning_rate': 3.7250554323725056e-07, 'epoch': 4.97}


100%|█████████▉| 22450/22550 [3:40:03<00:44,  2.23it/s]

{'loss': 1.4351, 'learning_rate': 2.483370288248337e-07, 'epoch': 4.98}


100%|█████████▉| 22500/22550 [3:40:25<00:25,  2.00it/s]Saving model checkpoint to model-t5-base\checkpoint-22500
Configuration saved in model-t5-base\checkpoint-22500\config.json


{'loss': 1.4022, 'learning_rate': 1.2416851441241684e-07, 'epoch': 4.99}


Model weights saved in model-t5-base\checkpoint-22500\pytorch_model.bin
tokenizer config file saved in model-t5-base\checkpoint-22500\tokenizer_config.json
Special tokens file saved in model-t5-base\checkpoint-22500\special_tokens_map.json
Copy vocab file to model-t5-base\checkpoint-22500\spiece.model
Deleting older checkpoint [model-t5-base\checkpoint-21000] due to args.save_total_limit
100%|██████████| 22550/22550 [3:40:50<00:00,  2.89it/s]

Training completed. Do not forget to share your model on huggingface.co/models =)


100%|██████████| 22550/22550 [3:40:50<00:00,  1.70it/s]

{'loss': 1.4761, 'learning_rate': 0.0, 'epoch': 5.0}
{'train_runtime': 13259.9791, 'train_samples_per_second': 13.603, 'train_steps_per_second': 1.701, 'train_loss': 1.62230531476818, 'epoch': 5.0}
Wall time: 3h 41min 13s





TrainOutput(global_step=22550, training_loss=1.62230531476818, metrics={'train_runtime': 13259.9791, 'train_samples_per_second': 13.603, 'train_steps_per_second': 1.701, 'train_loss': 1.62230531476818, 'epoch': 5.0})

## Evaluate

In [19]:
model = AutoModelForSeq2SeqLM.from_pretrained('model-t5-base/checkpoint-22500/').to('cuda')
tokenizer = AutoTokenizer.from_pretrained('model-t5-base/checkpoint-22500/')

loading configuration file model-t5-base/checkpoint-22500/config.json
Model config T5Config {
  "_name_or_path": "model-t5-base/checkpoint-22500/",
  "architectures": [
    "T5ForConditionalGeneration"
  ],
  "d_ff": 3072,
  "d_kv": 64,
  "d_model": 768,
  "decoder_start_token_id": 0,
  "dropout_rate": 0.1,
  "eos_token_id": 1,
  "feed_forward_proj": "relu",
  "initializer_factor": 1.0,
  "is_encoder_decoder": true,
  "layer_norm_epsilon": 1e-06,
  "model_type": "t5",
  "n_positions": 512,
  "num_decoder_layers": 12,
  "num_heads": 12,
  "num_layers": 12,
  "output_past": true,
  "pad_token_id": 0,
  "relative_attention_num_buckets": 32,
  "task_specific_params": {
    "summarization": {
      "early_stopping": true,
      "length_penalty": 2.0,
      "max_length": 200,
      "min_length": 30,
      "no_repeat_ngram_size": 3,
      "num_beams": 4,
      "prefix": "summarize: "
    },
    "translation_en_to_de": {
      "early_stopping": true,
      "max_length": 300,
      "num_beams":

In [20]:
%%time
trainer.evaluate(eval_dataset=processed_dataset['test'])

***** Running Evaluation *****
  Num examples = 2005
  Batch size = 8
100%|██████████| 251/251 [02:21<00:00,  1.78it/s]

Wall time: 2min 21s





{'eval_loss': 1.6426174640655518,
 'eval_rouge1': 47.9784,
 'eval_rouge2': 27.3502,
 'eval_rougeL': 42.4659,
 'eval_rougeLsum': 42.5108,
 'eval_runtime': 141.6856,
 'eval_samples_per_second': 14.151,
 'eval_steps_per_second': 1.772,
 'epoch': 5.0}

## Predict

In [21]:
temperature = 0.9
num_beams = 4
max_gen_length = 128

In [22]:
abstract = """In this paper, we question if self-supervised learning provides
new properties to Vision Transformer (ViT) [19] that
stand out compared to convolutional networks (convnets).
Beyond the fact that adapting self-supervised methods to this
architecture works particularly well, we make the following
observations: first, self-supervised ViT features contain
explicit information about the semantic segmentation of an
image, which does not emerge as clearly with supervised
ViTs, nor with convnets. Second, these features are also excellent
k-NN classifiers, reaching 78.3% top-1 on ImageNet
with a small ViT. Our study also underlines the importance of
momentum encoder [33], multi-crop training [10], and the
use of small patches with ViTs. We implement our findings
into a simple self-supervised method, called DINO, which
we interpret as a form of self-distillation with no labels.
We show the synergy between DINO and ViTs by achieving
80.1% top-1 on ImageNet in linear evaluation with ViT-Base"""
# abstract = dataset['test'][0]['abstract']
inputs = tokenizer([abstract], max_length=512, return_tensors='pt')

title_ids = model.generate(
    inputs['input_ids'].to('cuda'), 
    num_beams=num_beams, 
    temperature=temperature, 
    max_length=max_gen_length, 
    early_stopping=True
)
title = tokenizer.decode(title_ids[0].tolist(), skip_special_tokens=True, clean_up_tokenization_spaces=False)
print(title)

Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.


Self-Supervised Vision Transformers


In [26]:
abstract = " Reception of AI paintings was positive when human subjects were unaware of the painter’s identity [24, 26] When people were told certain paintings were created by AI (attributed artist identity = AI), they rated the paintings significantly lower than other people who thought the artist was human . Therefore, we want to replicate the experiment and find out how attributed artist identity might affect the judgement on AI art ."

inputs = tokenizer([abstract], max_length=512, return_tensors='pt')

title_ids = model.generate(
    inputs['input_ids'].to('cuda'), 
    num_beams=num_beams, 
    temperature=temperature, 
    max_length=max_gen_length, 
    early_stopping=True
)
title = tokenizer.decode(title_ids[0].tolist(), skip_special_tokens=True, clean_up_tokenization_spaces=False)
print(title)

Identifying AI Artists: The Impact of Attributed Artist Identity
