# Finetuning Ranker

Gratidão aos colegas Thiago Soares Laitz e Hugo (hugo@maritaca.ai) pelo apoio e código base fornecidos

# Installs

pip install bitsandbytes

pip install accelerate -U

pip install sentencepiece

!pip install transformers[torch]

!pip install datasets

## Neptune rastro

In [None]:
import neptune.new as neptune 

In [None]:
neptune.__version__

'0.16.18'

In [None]:
import getpass


In [None]:
os.environ['NEPTUNE_ALLOW_SELF_SIGNED_CERTIFICATE'] = 'TRUE'
os.environ['NEPTUNE_PROJECT'] = 'marcusborela/IA386DD'
os.environ['NEPTUNE_API_TOKEN'] = getpass.getpass('Informe NEPTUNE_API_TOKEN')

tag_contexto_rastro = 'INDIR_PTT5'
neptune_version = 0

In [None]:
def converte_optimizer_state_dict(parm_optimizer)-> dict:
  """
    Recebe um objeto "parm_optimizer" que é do tipo "torch.optim.Optimizer" e retorna um dicionário 
    com informações sobre o otimizador.

    O dicionário de retorno é gerado a partir do estado do otimizador que é extraído da propriedade
    "state_dict()" do objeto "parm_optimizer", seu primeiro grupo de parâmetros do otimizador.
  """
  # return str(hparam['optimizer'])
  return parm_optimizer.state_dict()['param_groups'][0]
if neptune_version == 0:
  import neptune.new as neptune  
  class NeptuneRastroRun():
      se_geracao_rastro = True 
      neptune_project = ""
      tag_contexto_rastro = ""
      neptune_api_token = ""

      def __init__(self, parm_params:dict,  parm_lista_tag:list = None):
        # print(f"NeptuneRastroRun.init: se_geracao_rastro {self.__class__.se_geracao_rastro} parm_params `{parm_params} ")
        if self.__class__.se_geracao_rastro:      
          self.run_neptune = neptune.init(project=self.__class__.neptune_project, api_token=self.__class__.neptune_api_token, capture_hardware_metrics=True)
          self.run_neptune['sys/name'] = self.__class__.tag_contexto_rastro
          vparams = copy.deepcopy(parm_params)
          if "optimizer" in vparams:
            vparams["optimizer"] = converte_optimizer_state_dict(vparams["optimizer"])
          if 'criterion'  in vparams:
            vparams["criterion"] = str(vparams["criterion"])
          if 'scheduler'  in vparams:
            vparams["scheduler"] = str(type(vparams["scheduler"]))
          if 'device' in vparams:
            vparams['device'] = str(vparams["device"])
          self.device = vparams["device"]
          for tag in parm_lista_tag:
            self.run_neptune['sys/tags'].add(tag)
          self.run_neptune['parameters'] = vparams
          self.tmpDir = tempfile.mkdtemp()

      @property
      def run():
        return self.run_neptune

      @classmethod
      def ativa_geracao_rastro(cls):
        cls.se_geracao_rastro = True      

      @classmethod
      def def_contexto(cls):
        cls.se_geracao_rastro = True      

      @classmethod
      def desativa_geracao_rastro(cls):
        cls.se_geracao_rastro = False      

      @classmethod
      def retorna_status_geracao_rastro(cls):
        return cls.se_geracao_rastro      

      @classmethod
      def retorna_tag_contexto_rastro(cls):
        return cls.tag_contexto_rastro 

      @classmethod
      def inicia_contexto(cls, neptune_project, tag_contexto_rastro, neptune_api_token):
        assert '.' not in tag_contexto_rastro, "NeptuneRastroRun.init(): tag_contexto_rastro não pode possuir ponto, pois será usado para gravar nome de arquivo"      
        cls.neptune_api_token = neptune_api_token
        cls.tag_contexto_rastro = tag_contexto_rastro
        cls.neptune_project = neptune_project

      def salva_metrica(self, parm_metricas={}):
        #print(f"NeptuneRastroRun.salva_metrica: se_geracao_rastro {self.__class__.se_geracao_rastro} parm_metricas:{parm_metricas} ")
        if self.__class__.se_geracao_rastro:
          for metrica, valor in parm_metricas.items(): 
            self.run_neptune[metrica].log(valor)
  
      def gera_grafico_modelo(self, loader_train, model):
        if self.__class__.se_geracao_rastro: 
          # efetuar um forward 
          """
          se dataloader devolver x e y:
          """
          x_, y_ = next(iter(loader_train))
          x_ = x_.to(self.device)
          outputs = model(x_)
          """
          # se dataloader devolver dict:
          dados_ = next(iter(loader_train))
          outputs = model(dados_['x'].to(self.device))
          #outputs = model(x_['input_ids'].to(self.device), x_['attention_mask'].to(self.device))
          """
          nome_arquivo = os.path.join(self.tmpDir, "modelo "+ self.__class__.tag_contexto_rastro + time.strftime("%Y-%b-%d %H:%M:%S"))
          make_dot(outputs, params=dict(model.named_parameters()), show_attrs=True, show_saved=True).render(nome_arquivo, format="png")
          self.run_neptune["parameters/model_graph"].upload(nome_arquivo+'.png')
          self.run_neptune['parameters/model'] = re.sub('<bound method Module.state_dict of ', '',str(model.state_dict))      



      def stop(self):
        if self.__class__.se_geracao_rastro:         
          self.run_neptune.stop()

if neptune_version == 1:
  import neptune
  class NeptuneRastroRun():
      """
        Classe para geração de rastro de experimento utilizando a ferramenta Neptune.

        Busca implementar o rastro proposto em [Rastro-DM: Mineração de Dados com Rastro](https://revista.tcu.gov.br/ojs/index.php/RTCU/article/view/1664),
        autores Marcus Vinícius Borela de Castro e Remis Balaniuk, com o apoio da [solução Neptune](https://app.neptune.ai/)

        Attributes:
        -----------
        se_geracao_rastro : bool
            Indica se deve ser gerado rastro de experimento. 
        neptune_project : str
            Nome do projeto criado no Neptune. 
        tag_contexto_rastro : str
            Nome da tag utilizada para identificar o experimento.
        neptune_api_token : str
            Token utilizado para autenticação na API do Neptune. 
        run_neptune : object
            Objeto que representa o experimento no Neptune.
        device : str
            Dispositivo utilizado para o treinamento do modelo.
        tmpDir : str
          Diretório temporário utilizado para salvar gráfico do modelo.          
      """
      se_geracao_rastro = True 
      neptune_project = ""
      tag_contexto_rastro = ""
      neptune_api_token = ""

      def __init__(self, parm_params:dict,  parm_lista_tag:list = None):
        """
          Método construtor da classe NeptuneRastroRun.
          
          Args:
          - parm_params: dicionário contendo os parâmetros do modelo.
          - parm_lista_tag: lista contendo tags adicionais para o experimento.
        """      
        # print(f"NeptuneRastroRun.init: se_geracao_rastro {self.__class__.se_geracao_rastro} parm_params `{parm_params} ")
        if self.__class__.se_geracao_rastro:      
          self.run_neptune = neptune.init_run(project=self.__class__.neptune_project, api_token=self.__class__.neptune_api_token, capture_hardware_metrics=True)
          self.run_neptune['sys/name'] = self.__class__.tag_contexto_rastro
          vparams = copy.deepcopy(parm_params)
          if "optimizer" in vparams:
            vparams["optimizer"] = converte_optimizer_state_dict(vparams["optimizer"])
          if 'criterion'  in vparams:
            vparams["criterion"] = str(vparams["criterion"])
          if 'scheduler'  in vparams:
            vparams["scheduler"] = str(type(vparams["scheduler"]))
          if 'device' in vparams:
            vparams['device'] = str(vparams["device"])
          self.device = vparams["device"]
          for tag in parm_lista_tag:
            self.run_neptune['sys/tags'].add(tag)
          self.run_neptune['parameters'] = vparams
          # self.tmpDir = tempfile.mkdtemp()

      @property
      def run():
        """
        Retorna a instância do objeto run_neptune.
        """      
        return self.run_neptune

      @classmethod
      def ativa_geracao_rastro(cls):
        """
        Ativa a geração de rastro.
        """      
        cls.se_geracao_rastro = True      

      @classmethod
      def def_contexto(cls):
        """
        Define o contexto para a geração de rastro.
        """      
        cls.se_geracao_rastro = True      

      @classmethod
      def desativa_geracao_rastro(cls):
        """
        Desativa a geração de rastro.
        """      
        cls.se_geracao_rastro = False      

      @classmethod
      def retorna_status_geracao_rastro(cls):
        """
          Retorna o status da geração de rastro.
          
          Returns:
          - True se a geração de rastro está ativada, False caso contrário.
        """      
        return cls.se_geracao_rastro      

      @classmethod
      def retorna_tag_contexto_rastro(cls):
        """
          Retorna a tag do contexto de rastro.
        """      
        return cls.tag_contexto_rastro 

      @classmethod
      def inicia_contexto(cls, neptune_project, tag_contexto_rastro, neptune_api_token):
        """
        Inicia o contexto de execução no Neptune.

        Args:
            neptune_project (str): Nome do projeto no Neptune.
            tag_contexto_rastro (str): Tag que identifica o contexto de execução no Neptune.
            neptune_api_token (str): Token de acesso à API do Neptune.

        Raises:
            AssertionError: Caso a tag_contexto_rastro possua um ponto (.), 
              o que pode gerar erros na gravação de arquivo.
        """      
        assert '.' not in tag_contexto_rastro, "NeptuneRastroRun.init(): tag_contexto_rastro não pode possuir ponto, pois será usado para gravar nome de arquivo"      
        cls.neptune_api_token = neptune_api_token
        cls.tag_contexto_rastro = tag_contexto_rastro
        cls.neptune_project = neptune_project

      def salva_metrica(self, parm_metricas={}):
        """
          Salva as métricas no Neptune Run caso a geração de rastro esteja ativa.

          Parameters
          ----------
          parm_metricas: dict
              Dicionário contendo as métricas a serem salvas. As chaves devem ser os nomes das métricas e os valores devem ser
              os valores das métricas.
        """
        #print(f"NeptuneRastroRun.salva_metrica: se_geracao_rastro {self.__class__.se_geracao_rastro} parm_metricas:{parm_metricas} ")
        if self.__class__.se_geracao_rastro:
          for metrica, valor in parm_metricas.items(): 
            self.run_neptune[metrica].append(valor)
  
      def gera_grafico_modelo(self, loader_train, model):
        """
          Gera um gráfico do modelo e o envia para o Neptune. 
          Para gerar o gráfico, um forward pass é realizado em um batch de exemplos 
          de treino e o resultado é renderizado como um gráfico de nós conectados. 
          O gráfico é salvo em um arquivo .png e enviado para o Neptune como um arquivo anexo.

          Args:
              loader_train (torch.utils.data.DataLoader): DataLoader do conjunto de treinamento.
              model (torch.nn.Module): Modelo a ser visualizado.
          
          Pendente:
            Evolui para usar from io import StringIO (buffer = io.StringIO()) ao invés de tempdir 
        """    
        return

        """
        falta ajustar make_dot
        if self.__class__.se_geracao_rastro: 
          # efetuar um forward 
          batch = next(iter(loader_train))
          # falta generalizar linha abaixo. Criar função que recebe modelo e batch como parâmetro?
          outputs = model(input_ids=batch['input_ids'].to(hparam['device']), attention_mask=batch['attention_mask'].to(hparam['device']), token_type_ids=batch['token_type_ids'].to(hparam['device']), labels=batch['labels'].to(hparam['device']))
          nome_arquivo = os.path.join(self.tmpDir, "modelo "+ self.__class__.tag_contexto_rastro + time.strftime("%Y-%b-%d %H:%M:%S"))
          make_dot(outputs, params=dict(model.named_parameters()), show_attrs=True, show_saved=True).render(nome_arquivo, format="png")
          self.run_neptune["parameters/model_graph"].upload(nome_arquivo+'.png')
          self.run_neptune['parameters/model'] = re.sub('<bound method Module.state_dict of ', '',str(model.state_dict))      
        """


      def stop(self):
        """
          Para a execução do objeto Neptune. Todos os experimentos do Neptune são sincronizados com o servidor, e nenhum outro 
          experimento poderá ser adicionado a este objeto após a chamada a este método.
        """
        if self.__class__.se_geracao_rastro:         
          self.run_neptune.stop()

### Definindo parâmetros para o rastro


NeptuneRastroRun.inicia_contexto(os.environ['NEPTUNE_PROJECT'], tag_contexto_rastro,  os.environ['NEPTUNE_API_TOKEN'])
#NeptuneRastroRun.desativa_geracao_rastro()

In [None]:
from transformers.integrations import NeptuneCallback

# Infra

## Paths

In [1]:
PATH_LOCAL_DATA = '../..'

In [5]:
PATH_TRAIN_MODEL_LOCAL = f"{PATH_LOCAL_DATA}/model/train/ptt5-base"

In [6]:
# path_data = '/content/drive/MyDrive/treinamento/202301_IA368DD/indir/data/train_data_juris_tcu_index_bm25.csv'

# PATH_TRAIN_DATA_ZIP = f"{PATH_LOCAL_DATA}/data/train_data_juris_tcu_index.zip"
PATH_TRAIN_DATA = f"{PATH_LOCAL_DATA}/data/train_juris_tcu_index/train_data_juris_tcu_index.csv"

In [7]:
os.path.exists(PATH_TRAIN_MODEL_LOCAL)

True

In [8]:
os.path.exists(PATH_TRAIN_DATA)

True

## Função de verificação de memória

In [9]:
from psutil import virtual_memory

In [10]:
gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
  print('Not connected to a GPU')
else:
  print(gpu_info)

Wed Jul  5 19:28:08 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 525.116.03   Driver Version: 525.116.03   CUDA Version: 12.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA GeForce ...  Off  | 00000000:02:00.0 Off |                  N/A |
|  0%   48C    P8    26W / 370W |     58MiB / 24576MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [11]:
def mostra_memoria(lista_mem=['cpu']):
  """
  Esta função exibe informações de memória da CPU e/ou GPU, conforme parâmetros fornecidos.

  Parâmetros:
  -----------
  lista_mem : list, opcional
      Lista com strings 'cpu' e/ou 'gpu'.
      'cpu' - exibe informações de memória da CPU.
      'gpu' - exibe informações de memória da GPU (se disponível).
      O valor padrão é ['cpu'].

  Saída:
  -------
  A função não retorna nada, apenas exibe as informações na tela.

  Exemplo de uso:
  ---------------
  Para exibir informações de memória da CPU:
      mostra_memoria(['cpu'])

  Para exibir informações de memória da CPU e GPU:
      mostra_memoria(['cpu', 'gpu'])

  Autor: Marcus Vinícius Borela de Castro

  """
  if 'cpu' in lista_mem:
    vm = virtual_memory()
    ram={}
    ram['total']=round(vm.total / 1e9,2)
    ram['available']=round(virtual_memory().available / 1e9,2)
    # ram['percent']=round(virtual_memory().percent / 1e9,2)
    ram['used']=round(virtual_memory().used / 1e9,2)
    ram['free']=round(virtual_memory().free / 1e9,2)
    ram['active']=round(virtual_memory().active / 1e9,2)
    ram['inactive']=round(virtual_memory().inactive / 1e9,2)
    ram['buffers']=round(virtual_memory().buffers / 1e9,2)
    ram['cached']=round(virtual_memory().cached/1e9 ,2)
    print(f"Your runtime RAM in gb: \n total {ram['total']}\n available {ram['available']}\n used {ram['used']}\n free {ram['free']}\n cached {ram['cached']}\n buffers {ram['buffers']}")
    print('/nGPU')
    gpu_info = !nvidia-smi
  if 'gpu' in lista_mem:
    gpu_info = '\n'.join(gpu_info)
    if gpu_info.find('failed') >= 0:
      print('Not connected to a GPU')
    else:
      print(gpu_info)


In [12]:
mostra_memoria(['cpu','gpu'])

Your runtime RAM in gb: 
 total 67.35
 available 55.33
 used 10.97
 free 19.13
 cached 35.27
 buffers 1.97
/nGPU
Wed Jul  5 19:28:13 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 525.116.03   Driver Version: 525.116.03   CUDA Version: 12.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA GeForce ...  Off  | 00000000:02:00.0 Off |                  N/A |
|  0%   49C    P8    37W / 370W |     58MiB / 24576MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                       

## Imports

In [13]:
import os

In [14]:
from transformers import Trainer
import numpy as np
import torch


  from .autonotebook import tqdm as notebook_tqdm


In [15]:
import pandas as pd

In [16]:
from dataclasses import dataclass, field

In [17]:
from datasets import Dataset

In [18]:
import json
from transformers import (
    AutoTokenizer,
    AutoConfig,
    MT5Tokenizer,
    AutoModelForSeq2SeqLM,
    AutoModelForSequenceClassification,
    Trainer,
    Seq2SeqTrainer,
    Seq2SeqTrainingArguments,
    DataCollatorWithPadding,
    DataCollatorForSeq2Seq,
)
from tqdm.auto import tqdm
from datasets import Dataset
from dataclasses import dataclass, field


## Preparando para debug e display

In [19]:
def config_display():
  """
  Esta função configura as opções de display do Pandas.
  """

  # Configurando formato saída Pandas
  # define o número máximo de colunas que serão exibidas
  pd.options.display.max_columns = None

  # define a largura máxima de uma linha
  pd.options.display.width = 1000

  # define o número máximo de linhas que serão exibidas
  pd.options.display.max_rows = 100

  # define o número máximo de caracteres por coluna
  pd.options.display.max_colwidth = 50

  # se deve exibir o número de linhas e colunas de um DataFrame.
  pd.options.display.show_dimensions = True

  # número de dígitos após a vírgula decimal a serem exibidos para floats.
  pd.options.display.precision = 7


In [20]:
def config_debug():
  """
  Esta função configura as opções de debug do PyTorch e dos pacotes
  transformers e datasets.
  """

  # Define opções de impressão de tensores para o modo científico
  torch.set_printoptions(sci_mode=True)
  """
    Significa que valores muito grandes ou muito pequenos são mostrados em notação científica.
    Por exemplo, em vez de imprimir o número 0.0000012345 como 0.0000012345,
    ele seria impresso como 1.2345e-06. Isso é útil em situações em que os valores dos tensores
    envolvidos nas operações são muito grandes ou pequenos, e a notação científica permite
    uma melhor compreensão dos números envolvidos.
  """

  # Habilita detecção de anomalias no autograd do PyTorch
  torch.autograd.set_detect_anomaly(True)
  """
    Permite identificar operações que podem causar problemas de estabilidade numérica,
    como gradientes explodindo ou desaparecendo. Quando essa opção é ativada,
    o PyTorch verifica se há operações que geram valores NaN ou infinitos nos tensores
    envolvidos no cálculo do gradiente. Se for detectado um valor anômalo, o PyTorch
    interrompe a execução e gera uma exceção, permitindo que o erro seja corrigido
    antes que se torne um problema maior.

    É importante notar que a detecção de anomalias pode ter um impacto significativo
    no desempenho, especialmente em modelos grandes e complexos. Por esse motivo,
    ela deve ser usada com cautela e apenas para depuração.
  """

  # Configura variável de ambiente para habilitar a execução síncrona (bloqueante) das chamadas da API do CUDA.
  os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
  """
    o Python aguarda o término da execução de uma chamada da API do CUDA antes de executar a próxima chamada.
    Isso é útil para depurar erros no código que envolve operações na GPU, pois permite que o erro seja capturado
    no momento em que ocorre, e não depois de uma sequência de operações que pode tornar a origem do erro mais difícil de determinar.
    No entanto, é importante lembrar que esse modo de execução é significativamente mais lento do que a execução assíncrona,
    que é o comportamento padrão do CUDA. Por isso, é recomendado utilizar esse comando apenas em situações de depuração
    e removê-lo após a solução do problema.
  """

  # Define o nível de verbosity do pacote transformers para info
  # transformers.utils.logging.set_verbosity_info()


  """
    Define o nível de detalhamento das mensagens de log geradas pela biblioteca Hugging Face Transformers
    para o nível info. Isso significa que a biblioteca irá imprimir mensagens de log informativas sobre
    o andamento da execução, tais como tempo de execução, tamanho de batches, etc.

    Essas informações podem ser úteis para entender o que está acontecendo durante a execução da tarefa
    e auxiliar no processo de debug. É importante notar que, em alguns casos, a quantidade de informações
    geradas pode ser muito grande, o que pode afetar o desempenho do sistema e dificultar a visualização
    das informações relevantes. Por isso, é importante ajustar o nível de detalhamento de acordo com a
    necessidade de cada tarefa.

    Caso queira reduzir a quantidade de mensagens, comentar a linha acima e
      descomentar as duas linhas abaixo, para definir o nível de verbosity como error ou warning

    transformers.utils.logging.set_verbosity_error()
    transformers.utils.logging.set_verbosity_warning()
  """


  # Define o modo verbose do xmode, que é utilizado no debug
  # %xmode Verbose

  """
    Comando usado no Jupyter Notebook para controlar o modo de exibição das informações de exceções.
    O modo verbose é um modo detalhado que exibe informações adicionais ao imprimir as exceções.
    Ele inclui as informações de pilha de chamadas completa e valores de variáveis locais e globais
    no momento da exceção. Isso pode ser útil para depurar e encontrar a causa de exceções em seu código.
    Ao usar %xmode Verbose, as informações de exceção serão impressas com mais detalhes e informações adicionais serão incluídas.

    Caso queira desabilitar o modo verbose e utilizar o modo plain,
    comentar a linha acima e descomentar a linha abaixo:
    %xmode Plain
  """

  """
    Dica:
    1.  pdb (Python Debugger)
      Quando ocorre uma exceção em uma parte do código, o programa para a execução e exibe uma mensagem de erro
      com informações sobre a exceção, como a linha do código em que ocorreu o erro e o tipo da exceção.

      Se você estiver depurando o código e quiser examinar o estado das variáveis ​​e executar outras operações
      no momento em que a exceção ocorreu, pode usar o pdb (Python Debugger). Para isso, é preciso colocar o comando %debug
      logo após ocorrer a exceção. Isso fará com que o programa pare na linha em que ocorreu a exceção e abra o pdb,
      permitindo que você explore o estado das variáveis, examine a pilha de chamadas e execute outras operações para depurar o código.


    2. ipdb
      O ipdb é um depurador interativo para o Python que oferece recursos mais avançados do que o pdb,
      incluindo a capacidade de navegar pelo código fonte enquanto depura.

      Você pode começar a depurar seu código inserindo o comando ipdb.set_trace() em qualquer lugar do
      seu código onde deseja pausar a execução e começar a depurar. Quando a execução chegar nessa linha,
      o depurador entrará em ação, permitindo que você examine o estado atual do seu programa e execute
      comandos para investigar o comportamento.

      Durante a depuração, você pode usar comandos:
        next (para executar a próxima linha de código),
        step (para entrar em uma função chamada na próxima linha de código)
        continue (para continuar a execução normalmente até o próximo ponto de interrupção).

      Ao contrário do pdb, o ipdb é um depurador interativo que permite navegar pelo código fonte em que
      está trabalhando enquanto depura, permitindo que você inspecione variáveis, defina pontos de interrupção
      adicionais e até mesmo execute expressões Python no contexto do seu programa.
  """


In [21]:
config_display()

In [22]:
config_debug()

In [23]:
current_dir = os.getcwd()
print("Current directory:", current_dir)

Current directory: /home/borela/fontes/ind-ir/code/train


## Constants

In [24]:
TOKEN_FALSE = '▁não'
TOKEN_TRUE = '▁sim'

In [25]:
MODEL_NAME = 'unicamp-dl/ptt5-base-pt-msmarco-100k-v2'
# 'unicamp-dl/mt5-3B-mmarco-en-pt'

# Carga dos dados

In [27]:
df = pd.read_csv(PATH_TRAIN_DATA)

In [28]:
df.shape
# lim 100(111852, 6)

(402738, 7)

Verificando correção do arquivo!

In [29]:
print(df.isnull().sum())

QUERY_ID      0
DOC_ID        0
RELEVANCE     0
SCORE         0
TYPE          0
DOC_TEXT      0
QUERY_TEXT    0
Length: 7, dtype: int64


In [30]:
df[['QUERY_TEXT','DOC_TEXT']].applymap(len).describe()

Unnamed: 0,QUERY_TEXT,DOC_TEXT
count,402738.0,402738.0
mean,322.8252313,830.6957451
std,165.8299958,398.1844365
min,41.0,86.0
25%,217.0,572.0
50%,294.0,759.0
75%,391.0,1020.0
max,4212.0,3739.0


Para cada positivo, tem 5 negativos

In [31]:
df['RELEVANCE'].describe()

count    402738.0000000
mean          0.1666667
std           0.3726785
min           0.0000000
25%           0.0000000
50%           0.0000000
75%           0.0000000
max           1.0000000
Name: RELEVANCE, Length: 8, dtype: float64

In [32]:
df.head()

Unnamed: 0,QUERY_ID,DOC_ID,RELEVANCE,SCORE,TYPE,DOC_TEXT,QUERY_TEXT
0,151655,1943,1,0.897,TEMA,"O termo é ""Agente público"".\nAgente público te...",O dever de observância à hierarquia militar nã...
1,151655,15441,0,0.732,"relevant:TEMA, not relevant:TOTAL","O termo é ""Reforma-prêmio"".\nReforma-prêmio te...",O dever de observância à hierarquia militar nã...
2,151655,6373,0,0.717,"relevant:TEMA, not relevant:TOTAL","O termo é ""Exercício financeiro anterior"".\nEx...",O dever de observância à hierarquia militar nã...
3,151655,6973,0,0.68,"relevant:TEMA, not relevant:TOTAL","O termo é ""CJF"".\nCJF é classificado como uma ...",O dever de observância à hierarquia militar nã...
4,151655,7201,0,0.751,"relevant:TEMA, not relevant:TOTAL","O termo é ""Embratur"".\nEmbratur é classificado...",O dever de observância à hierarquia militar nã...


In [33]:
df["label"] = [TOKEN_FALSE if relevance == 0 else TOKEN_TRUE for relevance in df["RELEVANCE"]]

In [34]:
df.head()

Unnamed: 0,QUERY_ID,DOC_ID,RELEVANCE,SCORE,TYPE,DOC_TEXT,QUERY_TEXT,label
0,151655,1943,1,0.897,TEMA,"O termo é ""Agente público"".\nAgente público te...",O dever de observância à hierarquia militar nã...,▁sim
1,151655,15441,0,0.732,"relevant:TEMA, not relevant:TOTAL","O termo é ""Reforma-prêmio"".\nReforma-prêmio te...",O dever de observância à hierarquia militar nã...,▁não
2,151655,6373,0,0.717,"relevant:TEMA, not relevant:TOTAL","O termo é ""Exercício financeiro anterior"".\nEx...",O dever de observância à hierarquia militar nã...,▁não
3,151655,6973,0,0.68,"relevant:TEMA, not relevant:TOTAL","O termo é ""CJF"".\nCJF é classificado como uma ...",O dever de observância à hierarquia militar nã...,▁não
4,151655,7201,0,0.751,"relevant:TEMA, not relevant:TOTAL","O termo é ""Embratur"".\nEmbratur é classificado...",O dever de observância à hierarquia militar nã...,▁não


In [35]:
df.rename(columns={'DOC_TEXT': 'text', 'QUERY_TEXT':'query'},inplace=True)

In [36]:
df.head()

Unnamed: 0,QUERY_ID,DOC_ID,RELEVANCE,SCORE,TYPE,text,query,label
0,151655,1943,1,0.897,TEMA,"O termo é ""Agente público"".\nAgente público te...",O dever de observância à hierarquia militar nã...,▁sim
1,151655,15441,0,0.732,"relevant:TEMA, not relevant:TOTAL","O termo é ""Reforma-prêmio"".\nReforma-prêmio te...",O dever de observância à hierarquia militar nã...,▁não
2,151655,6373,0,0.717,"relevant:TEMA, not relevant:TOTAL","O termo é ""Exercício financeiro anterior"".\nEx...",O dever de observância à hierarquia militar nã...,▁não
3,151655,6973,0,0.68,"relevant:TEMA, not relevant:TOTAL","O termo é ""CJF"".\nCJF é classificado como uma ...",O dever de observância à hierarquia militar nã...,▁não
4,151655,7201,0,0.751,"relevant:TEMA, not relevant:TOTAL","O termo é ""Embratur"".\nEmbratur é classificado...",O dever de observância à hierarquia militar nã...,▁não


In [37]:
df = df[['query', 'text', 'label']]


In [38]:

df.shape

(402738, 3)

In [39]:
import gc

# ... código anterior ...

# Liberar memória utilizando gc.collect()
gc.collect()

20

# Separating evaluation data and prepare dataset tokenized

In [40]:
from sklearn.model_selection import train_test_split

In [43]:
train_df, valid_df = train_test_split(df, test_size=0.01,
                                      stratify=df['label'].values, random_state=123)
# Definir os argumentos de treinamento

In [44]:
train_df.shape, valid_df.shape

((398710, 3), (4028, 3))

In [45]:
valid_df[:2]

Unnamed: 0,query,text,label
270246,Verificado sobrepreço em contrato de obra públ...,"O termo é ""Fatura"".\nFatura tem nota de escopo...",▁sim
106266,Faculta-se aos médicos do poder judiciário o e...,"O termo é ""Médico"".\nMédico tem definição: ""Aq...",▁sim


In [46]:
print(np.unique(train_df['label'], return_counts=True), '\n', np.unique(valid_df['label'], return_counts=True))

(array(['▁não', '▁sim'], dtype=object), array([332258,  66452])) 
 (array(['▁não', '▁sim'], dtype=object), array([3357,  671]))


In [47]:
train_dataset = Dataset.from_pandas(train_df[["query", "text", "label"]].reset_index(drop=True))
valid_dataset = Dataset.from_pandas(valid_df[["query", "text", "label"]].reset_index(drop=True))

In [48]:
len(train_dataset), len(valid_dataset)

(398710, 4028)

In [49]:
valid_dataset[0]

{'query': 'Verificado sobrepreço em contrato de obra pública, a Administração deve promover ajuste do valor apurado nas faturas vincendas. Não existindo saldo financeiro, deve providenciar a instauração da competente tomada de contas especial.',
 'text': 'O termo é "Fatura".\nFatura tem nota de escopo: "É um documento onde está registrado um valor em debito com prazo de quitação da dívida seja parcelado ou à vista.".\nFatura tem termo relacionado: "Duplicata", "Nota fiscal eletrônica", "Nota fiscal" e "Cartão de crédito".\nFatura tem tradução em espanhol: "Factura".\nFatura tem tradução em inglês: "Invoice".',
 'label': '▁sim'}

In [50]:
del df, train_df, valid_df


In [51]:
gc.collect()

20

# Tokenize

In [52]:
# tokenizer = MT5Tokenizer.from_pretrained(MODEL_NAME)
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

Downloading (…)okenizer_config.json: 100%|██████████| 1.98k/1.98k [00:00<00:00, 554kB/s]
Downloading spiece.model: 100%|██████████| 756k/756k [00:00<00:00, 793kB/s]
Downloading (…)cial_tokens_map.json: 100%|██████████| 1.79k/1.79k [00:00<00:00, 1.00MB/s]


In [53]:
def tokenize(batch):
    queries_documents = [f"Query: {query} Document: {text} Relevant:" for query, text in zip(batch["query"], batch["text"])]
    print(f"Chamado tokenize len(queries_documents): {len(queries_documents)}")
    tokenized = tokenizer(
        queries_documents,
        padding=True, # "max_length",
        truncation=True,
        # return_tensors="pt",
        max_length= 512
    )
    # tokenized["labels"] = [[label] for label in batch["label"]]
    # tokenized['label'] = [[token_false, token_true][int(pairs["label"][i])]
    tokenized["labels"] = tokenizer(batch['label'])['input_ids']
    # tokenized["labels"] = [tokenizer.get_vocab()[token] for token in batch['label']]
    # tokenized["labels"] = [token_id_true if label == 'true' else token_id_false for label in batch["label"]]
    return tokenized


In [54]:
%%time
# valid_dataset.set_transform(tokenize)
valid_dataset = valid_dataset.map(
        tokenize,
        remove_columns=('query', 'text', 'label'),
        batched=True,
        desc='Tokenizing',
    )

Tokenizing:   0%|          | 0/4028 [00:00<?, ? examples/s]

Chamado tokenize len(queries_documents): 1000


Tokenizing:  50%|████▉     | 2000/4028 [00:00<00:00, 3247.28 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


                                                                        

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 28
CPU times: user 3.68 s, sys: 138 ms, total: 3.82 s
Wall time: 1.06 s




In [55]:
%%time
# train_dataset.set_transform(tokenize)
train_dataset = train_dataset.map(
        tokenize,
        remove_columns=('query', 'text', 'label'),
        batched=True,
        desc='Tokenizing',
    )

Tokenizing:   0%|          | 1000/398710 [00:00<00:59, 6739.97 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:   1%|          | 3000/398710 [00:00<00:56, 7036.97 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:   1%|▏         | 5000/398710 [00:00<00:57, 6825.44 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:   2%|▏         | 7000/398710 [00:01<00:58, 6728.33 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:   2%|▏         | 9000/398710 [00:01<01:03, 6145.39 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:   3%|▎         | 11000/398710 [00:01<00:59, 6563.53 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:   3%|▎         | 13000/398710 [00:01<00:57, 6707.86 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:   4%|▍         | 15000/398710 [00:02<00:55, 6853.75 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:   4%|▍         | 17000/398710 [00:02<00:55, 6904.80 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:   5%|▍         | 19000/398710 [00:02<00:59, 6405.97 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:   5%|▌         | 21000/398710 [00:03<00:56, 6718.08 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:   6%|▌         | 23000/398710 [00:03<00:55, 6810.92 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:   6%|▋         | 25000/398710 [00:03<00:54, 6878.63 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:   7%|▋         | 27000/398710 [00:04<00:55, 6718.76 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:   7%|▋         | 28000/398710 [00:04<00:54, 6801.39 examples/s]

Chamado tokenize len(queries_documents): 1000


Tokenizing:   8%|▊         | 30000/398710 [00:04<00:58, 6289.07 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:   8%|▊         | 32000/398710 [00:04<00:57, 6393.56 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:   9%|▊         | 34000/398710 [00:05<00:54, 6724.92 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:   9%|▉         | 36000/398710 [00:05<00:53, 6782.30 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  10%|▉         | 38000/398710 [00:05<00:51, 6949.49 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  10%|█         | 40000/398710 [00:05<00:51, 6984.47 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  11%|█         | 42000/398710 [00:06<00:54, 6508.35 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  11%|█         | 44000/398710 [00:06<00:52, 6820.80 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  12%|█▏        | 46000/398710 [00:06<00:50, 7024.47 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  12%|█▏        | 48000/398710 [00:07<00:48, 7164.00 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  13%|█▎        | 50000/398710 [00:07<00:48, 7183.44 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  13%|█▎        | 52000/398710 [00:07<00:57, 6029.52 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  14%|█▎        | 54000/398710 [00:08<00:53, 6447.34 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  14%|█▍        | 56000/398710 [00:08<00:52, 6548.55 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  15%|█▍        | 58000/398710 [00:08<00:50, 6747.66 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  15%|█▌        | 60000/398710 [00:08<00:49, 6789.01 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  15%|█▌        | 61000/398710 [00:09<00:49, 6754.49 examples/s]

Chamado tokenize len(queries_documents): 1000


Tokenizing:  16%|█▌        | 63000/398710 [00:09<00:52, 6334.32 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  16%|█▋        | 65000/398710 [00:09<00:49, 6706.97 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  17%|█▋        | 67000/398710 [00:10<00:47, 6990.35 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  17%|█▋        | 69000/398710 [00:10<00:46, 7072.88 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  18%|█▊        | 71000/398710 [00:10<00:45, 7138.53 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  18%|█▊        | 73000/398710 [00:10<00:45, 7137.19 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  19%|█▉        | 75000/398710 [00:11<00:49, 6561.75 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  19%|█▉        | 77000/398710 [00:11<00:46, 6846.18 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  20%|█▉        | 79000/398710 [00:11<00:45, 6992.43 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  20%|██        | 81000/398710 [00:12<00:44, 7073.74 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  21%|██        | 83000/398710 [00:12<00:44, 7160.56 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  21%|██▏       | 85000/398710 [00:12<00:48, 6493.28 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  22%|██▏       | 87000/398710 [00:12<00:45, 6878.81 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  22%|██▏       | 89000/398710 [00:13<00:43, 7084.60 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  23%|██▎       | 91000/398710 [00:13<00:42, 7157.32 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  23%|██▎       | 93000/398710 [00:13<00:42, 7192.81 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  24%|██▍       | 95000/398710 [00:14<00:48, 6286.14 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  24%|██▍       | 97000/398710 [00:14<00:44, 6730.40 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  25%|██▍       | 99000/398710 [00:14<00:43, 6886.14 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  25%|██▌       | 101000/398710 [00:14<00:42, 6988.41 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  26%|██▌       | 103000/398710 [00:15<00:42, 7026.00 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  26%|██▋       | 105000/398710 [00:15<00:46, 6276.02 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  27%|██▋       | 107000/398710 [00:15<00:48, 6023.08 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  27%|██▋       | 109000/398710 [00:16<00:44, 6573.47 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  28%|██▊       | 111000/398710 [00:16<00:41, 6909.89 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  28%|██▊       | 113000/398710 [00:16<00:40, 7103.16 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  29%|██▉       | 115000/398710 [00:17<00:39, 7132.05 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  29%|██▉       | 117000/398710 [00:17<00:44, 6270.08 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  30%|██▉       | 119000/398710 [00:17<00:41, 6755.66 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  30%|███       | 121000/398710 [00:17<00:40, 6876.00 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  31%|███       | 123000/398710 [00:18<00:39, 7002.31 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  31%|███▏      | 125000/398710 [00:18<00:38, 7083.89 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  32%|███▏      | 127000/398710 [00:18<00:38, 7068.15 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  32%|███▏      | 129000/398710 [00:19<00:41, 6423.67 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  33%|███▎      | 131000/398710 [00:19<00:39, 6758.49 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  33%|███▎      | 133000/398710 [00:19<00:39, 6796.54 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  34%|███▍      | 135000/398710 [00:20<00:37, 6946.71 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  34%|███▍      | 137000/398710 [00:20<00:36, 7090.01 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  35%|███▍      | 139000/398710 [00:20<00:36, 7139.33 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  35%|███▌      | 141000/398710 [00:20<00:39, 6512.19 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  36%|███▌      | 143000/398710 [00:21<00:37, 6777.61 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  36%|███▋      | 145000/398710 [00:21<00:36, 6978.72 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  37%|███▋      | 147000/398710 [00:21<00:35, 7036.77 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  37%|███▋      | 149000/398710 [00:22<00:35, 7022.14 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  38%|███▊      | 151000/398710 [00:22<00:39, 6319.45 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  38%|███▊      | 153000/398710 [00:22<00:36, 6690.46 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  39%|███▉      | 155000/398710 [00:22<00:35, 6848.72 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  39%|███▉      | 157000/398710 [00:23<00:34, 7064.03 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  40%|███▉      | 159000/398710 [00:23<00:34, 6962.69 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  40%|████      | 160000/398710 [00:23<00:34, 6971.44 examples/s]

Chamado tokenize len(queries_documents): 1000


Tokenizing:  41%|████      | 162000/398710 [00:24<00:37, 6331.65 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  41%|████      | 164000/398710 [00:24<00:34, 6711.11 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  42%|████▏     | 166000/398710 [00:24<00:34, 6829.36 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  42%|████▏     | 168000/398710 [00:24<00:33, 6897.91 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  43%|████▎     | 170000/398710 [00:25<00:32, 7048.96 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  43%|████▎     | 172000/398710 [00:25<00:31, 7087.58 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  44%|████▎     | 174000/398710 [00:25<00:34, 6426.23 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  44%|████▍     | 176000/398710 [00:26<00:35, 6209.75 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  45%|████▍     | 178000/398710 [00:26<00:33, 6575.87 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  45%|████▌     | 180000/398710 [00:26<00:31, 6883.72 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  46%|████▌     | 182000/398710 [00:26<00:30, 7074.63 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  46%|████▌     | 184000/398710 [00:27<00:33, 6488.62 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  47%|████▋     | 186000/398710 [00:27<00:30, 6880.59 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  47%|████▋     | 188000/398710 [00:27<00:29, 7045.97 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  48%|████▊     | 190000/398710 [00:28<00:30, 6815.44 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  48%|████▊     | 192000/398710 [00:28<00:29, 6927.97 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  49%|████▊     | 194000/398710 [00:28<00:32, 6215.09 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  49%|████▉     | 196000/398710 [00:29<00:30, 6744.60 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  50%|████▉     | 198000/398710 [00:29<00:30, 6657.29 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  50%|█████     | 200000/398710 [00:29<00:28, 6902.22 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  51%|█████     | 202000/398710 [00:29<00:28, 6983.05 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  51%|█████     | 204000/398710 [00:30<00:27, 6998.85 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  52%|█████▏    | 206000/398710 [00:30<00:30, 6324.68 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  52%|█████▏    | 207000/398710 [00:30<00:29, 6590.46 examples/s]

Chamado tokenize len(queries_documents): 1000


Tokenizing:  52%|█████▏    | 209000/398710 [00:31<00:34, 5543.37 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  53%|█████▎    | 211000/398710 [00:31<00:29, 6258.78 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  53%|█████▎    | 213000/398710 [00:31<00:27, 6695.50 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  54%|█████▍    | 215000/398710 [00:31<00:26, 6932.26 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  54%|█████▍    | 217000/398710 [00:32<00:27, 6518.60 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  55%|█████▍    | 219000/398710 [00:32<00:26, 6855.88 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  55%|█████▌    | 221000/398710 [00:32<00:25, 7067.68 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  56%|█████▌    | 223000/398710 [00:33<00:25, 7017.22 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  56%|█████▋    | 225000/398710 [00:33<00:24, 7105.26 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  57%|█████▋    | 227000/398710 [00:33<00:27, 6234.10 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  57%|█████▋    | 229000/398710 [00:34<00:25, 6695.40 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  58%|█████▊    | 231000/398710 [00:34<00:24, 6898.84 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  58%|█████▊    | 233000/398710 [00:34<00:23, 6999.03 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  59%|█████▉    | 235000/398710 [00:34<00:22, 7145.54 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  59%|█████▉    | 237000/398710 [00:35<00:22, 7166.78 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  60%|█████▉    | 239000/398710 [00:35<00:25, 6319.56 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  60%|██████    | 241000/398710 [00:35<00:23, 6755.90 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  61%|██████    | 243000/398710 [00:36<00:22, 6949.57 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  61%|██████▏   | 245000/398710 [00:36<00:21, 7043.16 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  62%|██████▏   | 247000/398710 [00:36<00:21, 7156.70 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  62%|██████▏   | 249000/398710 [00:36<00:23, 6305.14 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  63%|██████▎   | 251000/398710 [00:37<00:21, 6729.24 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  63%|██████▎   | 253000/398710 [00:37<00:20, 6944.01 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  64%|██████▍   | 255000/398710 [00:37<00:20, 7092.61 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  64%|██████▍   | 257000/398710 [00:38<00:19, 7180.78 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  65%|██████▍   | 259000/398710 [00:38<00:19, 7136.68 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  65%|██████▌   | 261000/398710 [00:38<00:21, 6513.18 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  66%|██████▌   | 263000/398710 [00:38<00:19, 6856.93 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  66%|██████▋   | 265000/398710 [00:39<00:18, 7054.77 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  67%|██████▋   | 267000/398710 [00:39<00:18, 7153.54 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  67%|██████▋   | 269000/398710 [00:39<00:18, 7074.74 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  68%|██████▊   | 271000/398710 [00:40<00:17, 7147.26 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  68%|██████▊   | 273000/398710 [00:40<00:19, 6572.24 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  69%|██████▉   | 275000/398710 [00:40<00:17, 6924.55 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  69%|██████▉   | 277000/398710 [00:40<00:17, 7064.84 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  70%|██████▉   | 279000/398710 [00:41<00:16, 7091.46 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  70%|███████   | 281000/398710 [00:41<00:16, 6932.19 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  71%|███████   | 283000/398710 [00:41<00:18, 6155.65 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  71%|███████▏  | 285000/398710 [00:42<00:17, 6452.26 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  72%|███████▏  | 287000/398710 [00:42<00:16, 6684.51 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  72%|███████▏  | 289000/398710 [00:42<00:16, 6786.00 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  73%|███████▎  | 291000/398710 [00:43<00:15, 6961.35 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  73%|███████▎  | 292000/398710 [00:43<00:15, 7004.88 examples/s]

Chamado tokenize len(queries_documents): 1000


Tokenizing:  74%|███████▎  | 294000/398710 [00:43<00:16, 6376.87 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  74%|███████▍  | 296000/398710 [00:43<00:15, 6672.83 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  75%|███████▍  | 298000/398710 [00:44<00:14, 6949.19 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  75%|███████▌  | 300000/398710 [00:44<00:14, 7018.28 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  76%|███████▌  | 302000/398710 [00:44<00:13, 7047.52 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  76%|███████▌  | 304000/398710 [00:44<00:13, 7056.03 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  77%|███████▋  | 306000/398710 [00:45<00:14, 6416.98 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  77%|███████▋  | 308000/398710 [00:45<00:13, 6790.78 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  78%|███████▊  | 310000/398710 [00:45<00:12, 6963.70 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  78%|███████▊  | 312000/398710 [00:46<00:12, 6951.49 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  79%|███████▉  | 314000/398710 [00:46<00:12, 6937.77 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  79%|███████▉  | 316000/398710 [00:46<00:13, 6311.81 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  80%|███████▉  | 318000/398710 [00:47<00:12, 6607.48 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  80%|████████  | 320000/398710 [00:47<00:11, 6767.15 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  81%|████████  | 322000/398710 [00:47<00:11, 6952.45 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  81%|████████▏ | 324000/398710 [00:47<00:10, 6977.77 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  82%|████████▏ | 326000/398710 [00:48<00:11, 6152.07 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  82%|████████▏ | 328000/398710 [00:48<00:10, 6589.98 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  83%|████████▎ | 330000/398710 [00:48<00:10, 6804.43 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  83%|████████▎ | 332000/398710 [00:49<00:09, 6936.84 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  84%|████████▍ | 334000/398710 [00:49<00:09, 6950.52 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  84%|████████▍ | 336000/398710 [00:49<00:09, 6934.92 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  85%|████████▍ | 338000/398710 [00:50<00:09, 6252.01 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  85%|████████▌ | 340000/398710 [00:50<00:08, 6662.91 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  86%|████████▌ | 342000/398710 [00:50<00:09, 6194.54 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  86%|████████▋ | 344000/398710 [00:51<00:08, 6457.67 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  87%|████████▋ | 346000/398710 [00:51<00:07, 6732.96 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  87%|████████▋ | 347000/398710 [00:51<00:08, 6173.03 examples/s]

Chamado tokenize len(queries_documents): 1000


Tokenizing:  88%|████████▊ | 349000/398710 [00:51<00:08, 6008.09 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  88%|████████▊ | 351000/398710 [00:52<00:07, 6380.65 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  89%|████████▊ | 353000/398710 [00:52<00:06, 6585.35 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  89%|████████▉ | 355000/398710 [00:52<00:06, 6813.97 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  90%|████████▉ | 357000/398710 [00:53<00:06, 6778.26 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  90%|████████▉ | 358000/398710 [00:53<00:06, 6719.82 examples/s]

Chamado tokenize len(queries_documents): 1000


Tokenizing:  90%|█████████ | 360000/398710 [00:53<00:06, 6278.57 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  91%|█████████ | 362000/398710 [00:53<00:05, 6629.60 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  91%|█████████▏| 364000/398710 [00:54<00:05, 6788.79 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  92%|█████████▏| 366000/398710 [00:54<00:04, 6951.30 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  92%|█████████▏| 368000/398710 [00:54<00:04, 6898.70 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  93%|█████████▎| 370000/398710 [00:54<00:04, 7018.46 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  93%|█████████▎| 372000/398710 [00:55<00:04, 6483.09 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  94%|█████████▍| 374000/398710 [00:55<00:03, 6786.85 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  94%|█████████▍| 376000/398710 [00:55<00:03, 6977.84 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  95%|█████████▍| 378000/398710 [00:56<00:02, 7069.54 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  95%|█████████▌| 380000/398710 [00:56<00:02, 7121.19 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  96%|█████████▌| 382000/398710 [00:56<00:02, 6428.51 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  96%|█████████▋| 384000/398710 [00:57<00:02, 6744.67 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  97%|█████████▋| 386000/398710 [00:57<00:01, 6924.36 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  97%|█████████▋| 388000/398710 [00:57<00:01, 7023.97 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  98%|█████████▊| 390000/398710 [00:57<00:01, 7101.03 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  98%|█████████▊| 391000/398710 [00:58<00:01, 7026.50 examples/s]

Chamado tokenize len(queries_documents): 1000


Tokenizing:  99%|█████████▊| 393000/398710 [00:58<00:00, 6318.44 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing:  99%|█████████▉| 395000/398710 [00:58<00:00, 6467.16 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


Tokenizing: 100%|█████████▉| 397000/398710 [00:59<00:00, 6497.92 examples/s]

Chamado tokenize len(queries_documents): 1000
Chamado tokenize len(queries_documents): 1000


                                                                            

Chamado tokenize len(queries_documents): 710
CPU times: user 5min 27s, sys: 2.66 s, total: 5min 30s
Wall time: 59.3 s




In [56]:
print(valid_dataset[0])

{'input_ids': [2094, 540, 46, 1231, 6916, 81, 1044, 1164, 12, 1170, 4, 516, 1450, 3, 7, 5271, 697, 3072, 16630, 10, 941, 7, 8644, 53, 117, 1593, 1083, 6, 14591, 4575, 178, 5, 852, 18113, 17832, 6551, 3, 697, 17668, 33, 7, 14232, 1343, 93, 11, 18294, 4931, 4, 12558, 918, 5, 745, 797, 1806, 46, 28, 762, 21, 15, 3528, 1083, 64, 931, 1083, 87, 2649, 4, 19414, 46, 15, 3921, 16, 4064, 83, 141, 7578, 16, 941, 12, 4, 719, 123, 18, 4178, 4, 3366, 2424, 11, 9578, 474, 11084, 53, 52, 48, 1355, 5, 64, 931, 1083, 87, 762, 5993, 46, 15, 5787, 4152, 121, 49, 15, 1931, 121, 11408, 5441, 49, 15, 1931, 121, 11408, 27, 8, 15, 3022, 1380, 4, 8025, 64, 931, 1083, 87, 4101, 12, 1830, 46, 15, 3528, 167, 1083, 64, 931, 1083, 87, 4101, 12, 792, 46, 15, 2028, 601, 3249, 64, 294, 8985, 5572, 46, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0

# Train

## setup

In [58]:
PATH_TRAIN_MODEL_LOCAL

'../../model/train/ptt5-base'

In [96]:
num_step_alert = 200
training_args = Seq2SeqTrainingArguments(output_dir=PATH_TRAIN_MODEL_LOCAL)
# Needed to make the Trainer work with an on-the-fly transformation on the dataset
# training_args.remove_unused_columns = False
training_args.output_dir = PATH_TRAIN_MODEL_LOCAL
training_args.warmup_steps=400 # Alterar!
training_args.num_train_epochs=4.0 # Alterar!
training_args.logging_steps=num_step_alert # Alterar!
training_args.save_strategy="steps"
training_args.save_steps=num_step_alert
training_args.save_total_limit=10
training_args.learning_rate=5e-5
training_args.per_device_train_batch_size=16 # t4: 8, a100-40: 32
training_args.gradient_accumulation_steps=4 # t4: 4, a100-40: 2
#training_args._n_gpu = 1
# training_args.bf16 = True # se for usar a100, 3090, 4090 -> usar
training_args.ignore_data_skip = True
training_args.load_best_model_at_end = True
training_args.evaluation_strategy='steps'
training_args.eval_steps=num_step_alert
training_args.do_eval = True
# training_args.optim='adamw_hf' #default
training_args.gradient_checkpointing = False # True
# se precisar economizar gpu
# training_args.optim='adamw_bnb_8bit'
# training_args.gradient_checkpointing = True
training_args.report_to="neptune",
# training_args.report_to = 'None'

PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).


In [59]:
!ls

drive  sample_data


In [97]:
print(training_args)

Seq2SeqTrainingArguments(
_n_gpu=1,
adafactor=False,
adam_beta1=0.9,
adam_beta2=0.999,
adam_epsilon=1e-08,
auto_find_batch_size=False,
bf16=False,
bf16_full_eval=False,
data_seed=None,
dataloader_drop_last=False,
dataloader_num_workers=0,
dataloader_pin_memory=True,
ddp_bucket_cap_mb=None,
ddp_find_unused_parameters=None,
ddp_timeout=1800,
debug=[],
deepspeed=None,
disable_tqdm=False,
do_eval=True,
do_predict=False,
do_train=False,
eval_accumulation_steps=None,
eval_delay=0,
eval_steps=200,
evaluation_strategy=steps,
fp16=False,
fp16_backend=auto,
fp16_full_eval=False,
fp16_opt_level=O1,
fsdp=[],
fsdp_min_num_params=0,
fsdp_transformer_layer_cls_to_wrap=None,
full_determinism=False,
generation_max_length=None,
generation_num_beams=None,
gradient_accumulation_steps=4,
gradient_checkpointing=False,
greater_is_better=None,
group_by_length=False,
half_precision_backend=auto,
hub_model_id=None,
hub_private_repo=False,
hub_strategy=every_save,
hub_token=<HUB_TOKEN>,
ignore_data_skip=True,
in

In [61]:
# from transformers.integrations import NeptuneCallback
# rastro_neptune = NeptuneRastroRun(hparam, parm_lista_tag= tag_contexto_rastro)
# neptune_callback = NeptuneCallback(run=rastro_neptune)

In [71]:
# se local
nome_caminho_modelo = "/home/borela/fontes/relevar-busca/modelo/" + MODEL_NAME
assert os.path.exists(nome_caminho_modelo), f"Path para {MODEL_NAME} não existe!"


In [61]:
%%time
model = AutoModelForSeq2SeqLM.from_pretrained(nome_caminho_modelo)


Downloading (…)lve/main/config.json: 100%|██████████| 635/635 [00:00<00:00, 230kB/s]
Downloading pytorch_model.bin: 100%|██████████| 892M/892M [01:01<00:00, 14.6MB/s] 


CPU times: user 3.88 s, sys: 2.27 s, total: 6.15 s
Wall time: 1min 5s


In [74]:
trainer_cls = Seq2SeqTrainer
data_collator = DataCollatorForSeq2Seq(tokenizer)

In [106]:
# Limpa o cache da memória da GPU
# del trainer
torch.cuda.empty_cache()

In [107]:
gc.collect()

480

## Train

In [66]:
raise Exception('Parar aqui reinício!')

dicas para integrar com Neptune 
https://docs.neptune.ai/integrations/transformers/#__tabbed_2_2

In [108]:
trainer = trainer_cls(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=valid_dataset,
    tokenizer=tokenizer,
    data_collator=data_collator,
)

In [None]:
del trainer

NameError: name 'trainer' is not defined

reiniciando checkpoint 5200

In [109]:
train_metrics = trainer.train(resume_from_checkpoint=True)

Loading model from ../../model/train/ptt5-base/checkpoint-5200.
You are resuming training from a checkpoint trained with 4.30.2 of Transformers but your current version is 4.25.1. This is not recommended and could yield to errors or unwanted behaviors.
***** Running training *****
  Num examples = 398710
  Num Epochs = 4
  Instantaneous batch size per device = 16
  Total train batch size (w. parallel, distributed & accumulation) = 64
  Gradient Accumulation steps = 4
  Total optimization steps = 24920
  Number of trainable parameters = 222903552
  Continuing training from checkpoint, will skip to saved global_step
  Continuing training from epoch 0
  Continuing training from global step 5200


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/24920 [00:00<?, ?it/s]Didn't manage to set back the RNG states of the GPU because of the following error:
 RNG state is wrong size
This won't yield the same results as if the training had not been interrupted.
 22%|██▏       | 5400/24920 [14:04<22:45:30,  4.20s/it]***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.0386, 'learning_rate': 3.9804241435562806e-05, 'epoch': 0.03}


                                                       
 22%|██▏       | 5400/24920 [14:59<22:45:30,  4.20s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-5400
Configuration saved in ../../model/train/ptt5-base/checkpoint-5400/config.json


{'eval_loss': 0.03830071911215782, 'eval_runtime': 55.5829, 'eval_samples_per_second': 72.468, 'eval_steps_per_second': 9.068, 'epoch': 0.03}


Model weights saved in ../../model/train/ptt5-base/checkpoint-5400/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-5400/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-5400/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-5400/spiece.model
 22%|██▏       | 5600/24920 [29:02<22:07:17,  4.12s/it] ***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.0397, 'learning_rate': 3.9396411092985316e-05, 'epoch': 0.06}


                                                       
 22%|██▏       | 5600/24920 [29:56<22:07:17,  4.12s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-5600
Configuration saved in ../../model/train/ptt5-base/checkpoint-5600/config.json


{'eval_loss': 0.039101142436265945, 'eval_runtime': 54.7467, 'eval_samples_per_second': 73.575, 'eval_steps_per_second': 9.206, 'epoch': 0.06}


Model weights saved in ../../model/train/ptt5-base/checkpoint-5600/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-5600/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-5600/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-5600/spiece.model
 23%|██▎       | 5800/24920 [43:35<21:40:49,  4.08s/it] ***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.0359, 'learning_rate': 3.898858075040783e-05, 'epoch': 0.1}


                                                       
 23%|██▎       | 5800/24920 [44:30<21:40:49,  4.08s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-5800
Configuration saved in ../../model/train/ptt5-base/checkpoint-5800/config.json


{'eval_loss': 0.03769339248538017, 'eval_runtime': 54.5549, 'eval_samples_per_second': 73.834, 'eval_steps_per_second': 9.238, 'epoch': 0.1}


Model weights saved in ../../model/train/ptt5-base/checkpoint-5800/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-5800/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-5800/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-5800/spiece.model
 24%|██▍       | 6000/24920 [58:09<21:29:50,  4.09s/it] ***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.0352, 'learning_rate': 3.858075040783034e-05, 'epoch': 0.13}


                                                       
 24%|██▍       | 6000/24920 [59:04<21:29:50,  4.09s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-6000
Configuration saved in ../../model/train/ptt5-base/checkpoint-6000/config.json


{'eval_loss': 0.037992507219314575, 'eval_runtime': 54.6378, 'eval_samples_per_second': 73.722, 'eval_steps_per_second': 9.224, 'epoch': 0.13}


Model weights saved in ../../model/train/ptt5-base/checkpoint-6000/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-6000/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-6000/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-6000/spiece.model
 25%|██▍       | 6200/24920 [1:12:45<21:15:49,  4.09s/it]***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.0411, 'learning_rate': 3.817292006525285e-05, 'epoch': 0.16}


                                                         
 25%|██▍       | 6200/24920 [1:13:40<21:15:49,  4.09s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-6200
Configuration saved in ../../model/train/ptt5-base/checkpoint-6200/config.json


{'eval_loss': 0.035734109580516815, 'eval_runtime': 54.4601, 'eval_samples_per_second': 73.962, 'eval_steps_per_second': 9.254, 'epoch': 0.16}


Model weights saved in ../../model/train/ptt5-base/checkpoint-6200/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-6200/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-6200/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-6200/spiece.model
 26%|██▌       | 6400/24920 [1:27:19<21:08:14,  4.11s/it] ***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.0402, 'learning_rate': 3.776508972267537e-05, 'epoch': 0.19}


                                                         
 26%|██▌       | 6400/24920 [1:28:13<21:08:14,  4.11s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-6400
Configuration saved in ../../model/train/ptt5-base/checkpoint-6400/config.json


{'eval_loss': 0.03558366373181343, 'eval_runtime': 54.5072, 'eval_samples_per_second': 73.899, 'eval_steps_per_second': 9.246, 'epoch': 0.19}


Model weights saved in ../../model/train/ptt5-base/checkpoint-6400/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-6400/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-6400/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-6400/spiece.model
 26%|██▋       | 6600/24920 [1:41:52<20:49:54,  4.09s/it] ***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.036, 'learning_rate': 3.735725938009788e-05, 'epoch': 0.22}


                                                         
 26%|██▋       | 6600/24920 [1:42:47<20:49:54,  4.09s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-6600
Configuration saved in ../../model/train/ptt5-base/checkpoint-6600/config.json


{'eval_loss': 0.035693056881427765, 'eval_runtime': 54.5174, 'eval_samples_per_second': 73.885, 'eval_steps_per_second': 9.245, 'epoch': 0.22}


Model weights saved in ../../model/train/ptt5-base/checkpoint-6600/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-6600/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-6600/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-6600/spiece.model
 27%|██▋       | 6800/24920 [1:56:25<20:32:25,  4.08s/it] ***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.0396, 'learning_rate': 3.69494290375204e-05, 'epoch': 0.26}


                                                         
 27%|██▋       | 6800/24920 [1:57:19<20:32:25,  4.08s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-6800
Configuration saved in ../../model/train/ptt5-base/checkpoint-6800/config.json


{'eval_loss': 0.0364278145134449, 'eval_runtime': 54.4275, 'eval_samples_per_second': 74.007, 'eval_steps_per_second': 9.26, 'epoch': 0.26}


Model weights saved in ../../model/train/ptt5-base/checkpoint-6800/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-6800/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-6800/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-6800/spiece.model
 28%|██▊       | 7000/24920 [2:10:59<20:18:37,  4.08s/it] ***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.0383, 'learning_rate': 3.654159869494291e-05, 'epoch': 0.29}


                                                         
 28%|██▊       | 7000/24920 [2:11:54<20:18:37,  4.08s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-7000
Configuration saved in ../../model/train/ptt5-base/checkpoint-7000/config.json


{'eval_loss': 0.035444226115942, 'eval_runtime': 54.4906, 'eval_samples_per_second': 73.921, 'eval_steps_per_second': 9.249, 'epoch': 0.29}


Model weights saved in ../../model/train/ptt5-base/checkpoint-7000/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-7000/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-7000/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-7000/spiece.model
 29%|██▉       | 7200/24920 [2:25:32<20:06:02,  4.08s/it] ***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.04, 'learning_rate': 3.613376835236542e-05, 'epoch': 0.32}


                                                         
 29%|██▉       | 7200/24920 [2:26:26<20:06:02,  4.08s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-7200
Configuration saved in ../../model/train/ptt5-base/checkpoint-7200/config.json


{'eval_loss': 0.035171572118997574, 'eval_runtime': 54.4213, 'eval_samples_per_second': 74.015, 'eval_steps_per_second': 9.261, 'epoch': 0.32}


Model weights saved in ../../model/train/ptt5-base/checkpoint-7200/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-7200/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-7200/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-7200/spiece.model
Deleting older checkpoint [../../model/train/ptt5-base/checkpoint-5200] due to args.save_total_limit
 30%|██▉       | 7400/24920 [2:40:06<19:44:41,  4.06s/it] ***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.0389, 'learning_rate': 3.572593800978793e-05, 'epoch': 0.35}


                                                         
 30%|██▉       | 7400/24920 [2:41:01<19:44:41,  4.06s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-7400
Configuration saved in ../../model/train/ptt5-base/checkpoint-7400/config.json


{'eval_loss': 0.03583730384707451, 'eval_runtime': 54.7785, 'eval_samples_per_second': 73.532, 'eval_steps_per_second': 9.201, 'epoch': 0.35}


Model weights saved in ../../model/train/ptt5-base/checkpoint-7400/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-7400/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-7400/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-7400/spiece.model
Deleting older checkpoint [../../model/train/ptt5-base/checkpoint-5400] due to args.save_total_limit
 30%|███       | 7600/24920 [2:54:43<19:49:03,  4.12s/it] ***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.0346, 'learning_rate': 3.5318107667210443e-05, 'epoch': 0.39}


                                                         
 30%|███       | 7600/24920 [2:55:37<19:49:03,  4.12s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-7600
Configuration saved in ../../model/train/ptt5-base/checkpoint-7600/config.json


{'eval_loss': 0.03525320068001747, 'eval_runtime': 54.8676, 'eval_samples_per_second': 73.413, 'eval_steps_per_second': 9.186, 'epoch': 0.39}


Model weights saved in ../../model/train/ptt5-base/checkpoint-7600/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-7600/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-7600/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-7600/spiece.model
Deleting older checkpoint [../../model/train/ptt5-base/checkpoint-5600] due to args.save_total_limit
 31%|███▏      | 7800/24920 [3:09:15<19:23:44,  4.08s/it] ***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.039, 'learning_rate': 3.4910277324632953e-05, 'epoch': 0.42}


                                                         
 31%|███▏      | 7800/24920 [3:10:10<19:23:44,  4.08s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-7800
Configuration saved in ../../model/train/ptt5-base/checkpoint-7800/config.json


{'eval_loss': 0.03660828247666359, 'eval_runtime': 54.6418, 'eval_samples_per_second': 73.716, 'eval_steps_per_second': 9.224, 'epoch': 0.42}


Model weights saved in ../../model/train/ptt5-base/checkpoint-7800/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-7800/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-7800/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-7800/spiece.model
Deleting older checkpoint [../../model/train/ptt5-base/checkpoint-5800] due to args.save_total_limit
 32%|███▏      | 8000/24920 [3:23:46<19:09:10,  4.08s/it] ***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.0363, 'learning_rate': 3.4502446982055463e-05, 'epoch': 0.45}


                                                         
 32%|███▏      | 8000/24920 [3:24:41<19:09:10,  4.08s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-8000
Configuration saved in ../../model/train/ptt5-base/checkpoint-8000/config.json


{'eval_loss': 0.03499669209122658, 'eval_runtime': 55.0438, 'eval_samples_per_second': 73.178, 'eval_steps_per_second': 9.156, 'epoch': 0.45}


Model weights saved in ../../model/train/ptt5-base/checkpoint-8000/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-8000/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-8000/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-8000/spiece.model
Deleting older checkpoint [../../model/train/ptt5-base/checkpoint-6000] due to args.save_total_limit
 33%|███▎      | 8200/24920 [3:38:19<18:56:42,  4.08s/it] ***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.0359, 'learning_rate': 3.409461663947798e-05, 'epoch': 0.48}


                                                         
 33%|███▎      | 8200/24920 [3:39:14<18:56:42,  4.08s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-8200
Configuration saved in ../../model/train/ptt5-base/checkpoint-8200/config.json


{'eval_loss': 0.035376258194446564, 'eval_runtime': 54.9293, 'eval_samples_per_second': 73.331, 'eval_steps_per_second': 9.175, 'epoch': 0.48}


Model weights saved in ../../model/train/ptt5-base/checkpoint-8200/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-8200/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-8200/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-8200/spiece.model
Deleting older checkpoint [../../model/train/ptt5-base/checkpoint-6200] due to args.save_total_limit
 34%|███▎      | 8400/24920 [3:52:53<18:41:51,  4.07s/it]***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.036, 'learning_rate': 3.368678629690049e-05, 'epoch': 0.51}


                                                         
 34%|███▎      | 8400/24920 [3:53:47<18:41:51,  4.07s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-8400
Configuration saved in ../../model/train/ptt5-base/checkpoint-8400/config.json


{'eval_loss': 0.035488445311784744, 'eval_runtime': 54.5872, 'eval_samples_per_second': 73.79, 'eval_steps_per_second': 9.233, 'epoch': 0.51}


Model weights saved in ../../model/train/ptt5-base/checkpoint-8400/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-8400/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-8400/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-8400/spiece.model
Deleting older checkpoint [../../model/train/ptt5-base/checkpoint-6400] due to args.save_total_limit
 35%|███▍      | 8600/24920 [4:07:26<18:24:36,  4.06s/it]***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.0373, 'learning_rate': 3.327895595432301e-05, 'epoch': 0.55}


                                                         
 35%|███▍      | 8600/24920 [4:08:20<18:24:36,  4.06s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-8600
Configuration saved in ../../model/train/ptt5-base/checkpoint-8600/config.json


{'eval_loss': 0.03550172969698906, 'eval_runtime': 54.471, 'eval_samples_per_second': 73.948, 'eval_steps_per_second': 9.253, 'epoch': 0.55}


Model weights saved in ../../model/train/ptt5-base/checkpoint-8600/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-8600/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-8600/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-8600/spiece.model
Deleting older checkpoint [../../model/train/ptt5-base/checkpoint-6600] due to args.save_total_limit
 35%|███▌      | 8800/24920 [4:21:58<18:16:40,  4.08s/it]***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.0355, 'learning_rate': 3.287112561174552e-05, 'epoch': 0.58}


                                                         
 35%|███▌      | 8800/24920 [4:22:52<18:16:40,  4.08s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-8800
Configuration saved in ../../model/train/ptt5-base/checkpoint-8800/config.json


{'eval_loss': 0.034991305321455, 'eval_runtime': 54.6821, 'eval_samples_per_second': 73.662, 'eval_steps_per_second': 9.217, 'epoch': 0.58}


Model weights saved in ../../model/train/ptt5-base/checkpoint-8800/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-8800/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-8800/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-8800/spiece.model
Deleting older checkpoint [../../model/train/ptt5-base/checkpoint-6800] due to args.save_total_limit
 36%|███▌      | 9000/24920 [4:36:32<17:59:09,  4.07s/it]***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.0372, 'learning_rate': 3.246329526916803e-05, 'epoch': 0.61}


                                                         
 36%|███▌      | 9000/24920 [4:37:26<17:59:09,  4.07s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-9000
Configuration saved in ../../model/train/ptt5-base/checkpoint-9000/config.json


{'eval_loss': 0.034312039613723755, 'eval_runtime': 54.6616, 'eval_samples_per_second': 73.69, 'eval_steps_per_second': 9.22, 'epoch': 0.61}


Model weights saved in ../../model/train/ptt5-base/checkpoint-9000/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-9000/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-9000/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-9000/spiece.model
Deleting older checkpoint [../../model/train/ptt5-base/checkpoint-7000] due to args.save_total_limit
 37%|███▋      | 9200/24920 [4:51:04<17:53:47,  4.10s/it]***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.0342, 'learning_rate': 3.205546492659054e-05, 'epoch': 0.64}


                                                         
 37%|███▋      | 9200/24920 [4:51:59<17:53:47,  4.10s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-9200
Configuration saved in ../../model/train/ptt5-base/checkpoint-9200/config.json


{'eval_loss': 0.035302210599184036, 'eval_runtime': 54.5633, 'eval_samples_per_second': 73.823, 'eval_steps_per_second': 9.237, 'epoch': 0.64}


Model weights saved in ../../model/train/ptt5-base/checkpoint-9200/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-9200/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-9200/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-9200/spiece.model
Deleting older checkpoint [../../model/train/ptt5-base/checkpoint-7200] due to args.save_total_limit
 38%|███▊      | 9400/24920 [5:05:35<17:31:30,  4.07s/it]***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.0345, 'learning_rate': 3.1647634584013054e-05, 'epoch': 0.67}


                                                         
 38%|███▊      | 9400/24920 [5:06:29<17:31:30,  4.07s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-9400
Configuration saved in ../../model/train/ptt5-base/checkpoint-9400/config.json


{'eval_loss': 0.03438310697674751, 'eval_runtime': 54.5577, 'eval_samples_per_second': 73.83, 'eval_steps_per_second': 9.238, 'epoch': 0.67}


Model weights saved in ../../model/train/ptt5-base/checkpoint-9400/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-9400/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-9400/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-9400/spiece.model
Deleting older checkpoint [../../model/train/ptt5-base/checkpoint-7400] due to args.save_total_limit
 39%|███▊      | 9600/24920 [5:20:06<17:17:21,  4.06s/it]***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.037, 'learning_rate': 3.1239804241435564e-05, 'epoch': 0.71}


                                                         
 39%|███▊      | 9600/24920 [5:21:01<17:17:21,  4.06s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-9600
Configuration saved in ../../model/train/ptt5-base/checkpoint-9600/config.json


{'eval_loss': 0.0335809662938118, 'eval_runtime': 54.6769, 'eval_samples_per_second': 73.669, 'eval_steps_per_second': 9.218, 'epoch': 0.71}


Model weights saved in ../../model/train/ptt5-base/checkpoint-9600/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-9600/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-9600/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-9600/spiece.model
Deleting older checkpoint [../../model/train/ptt5-base/checkpoint-7600] due to args.save_total_limit
 39%|███▉      | 9800/24920 [5:34:38<17:09:55,  4.09s/it]***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.0359, 'learning_rate': 3.0831973898858074e-05, 'epoch': 0.74}


                                                         
 39%|███▉      | 9800/24920 [5:35:33<17:09:55,  4.09s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-9800
Configuration saved in ../../model/train/ptt5-base/checkpoint-9800/config.json


{'eval_loss': 0.034582629799842834, 'eval_runtime': 54.5553, 'eval_samples_per_second': 73.833, 'eval_steps_per_second': 9.238, 'epoch': 0.74}


Model weights saved in ../../model/train/ptt5-base/checkpoint-9800/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-9800/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-9800/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-9800/spiece.model
Deleting older checkpoint [../../model/train/ptt5-base/checkpoint-7800] due to args.save_total_limit
 40%|████      | 10000/24920 [5:49:09<16:52:39,  4.07s/it]***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.0335, 'learning_rate': 3.0424143556280587e-05, 'epoch': 0.77}


                                                          
 40%|████      | 10000/24920 [5:50:03<16:52:39,  4.07s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-10000
Configuration saved in ../../model/train/ptt5-base/checkpoint-10000/config.json


{'eval_loss': 0.039332326501607895, 'eval_runtime': 54.3935, 'eval_samples_per_second': 74.053, 'eval_steps_per_second': 9.266, 'epoch': 0.77}


Model weights saved in ../../model/train/ptt5-base/checkpoint-10000/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-10000/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-10000/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-10000/spiece.model
Deleting older checkpoint [../../model/train/ptt5-base/checkpoint-8000] due to args.save_total_limit
 41%|████      | 10200/24920 [6:03:39<16:37:30,  4.07s/it]***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.0339, 'learning_rate': 3.00163132137031e-05, 'epoch': 0.8}


                                                          
 41%|████      | 10200/24920 [6:04:33<16:37:30,  4.07s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-10200
Configuration saved in ../../model/train/ptt5-base/checkpoint-10200/config.json


{'eval_loss': 0.036113590002059937, 'eval_runtime': 54.3368, 'eval_samples_per_second': 74.13, 'eval_steps_per_second': 9.275, 'epoch': 0.8}


Model weights saved in ../../model/train/ptt5-base/checkpoint-10200/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-10200/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-10200/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-10200/spiece.model
Deleting older checkpoint [../../model/train/ptt5-base/checkpoint-8200] due to args.save_total_limit
 42%|████▏     | 10400/24920 [6:18:08<16:22:16,  4.06s/it]***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.0341, 'learning_rate': 2.9608482871125614e-05, 'epoch': 0.83}


                                                          
 42%|████▏     | 10400/24920 [6:19:02<16:22:16,  4.06s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-10400
Configuration saved in ../../model/train/ptt5-base/checkpoint-10400/config.json


{'eval_loss': 0.033041030168533325, 'eval_runtime': 54.4255, 'eval_samples_per_second': 74.009, 'eval_steps_per_second': 9.26, 'epoch': 0.83}


Model weights saved in ../../model/train/ptt5-base/checkpoint-10400/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-10400/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-10400/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-10400/spiece.model
Deleting older checkpoint [../../model/train/ptt5-base/checkpoint-8400] due to args.save_total_limit
 43%|████▎     | 10600/24920 [6:32:37<16:10:12,  4.07s/it]***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.0381, 'learning_rate': 2.9200652528548127e-05, 'epoch': 0.87}


                                                          
 43%|████▎     | 10600/24920 [6:33:31<16:10:12,  4.07s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-10600
Configuration saved in ../../model/train/ptt5-base/checkpoint-10600/config.json


{'eval_loss': 0.034012772142887115, 'eval_runtime': 54.3142, 'eval_samples_per_second': 74.161, 'eval_steps_per_second': 9.279, 'epoch': 0.87}


Model weights saved in ../../model/train/ptt5-base/checkpoint-10600/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-10600/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-10600/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-10600/spiece.model
Deleting older checkpoint [../../model/train/ptt5-base/checkpoint-8600] due to args.save_total_limit
 43%|████▎     | 10800/24920 [6:47:10<15:56:53,  4.07s/it]***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.0335, 'learning_rate': 2.8792822185970637e-05, 'epoch': 0.9}


                                                          
 43%|████▎     | 10800/24920 [6:48:05<15:56:53,  4.07s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-10800
Configuration saved in ../../model/train/ptt5-base/checkpoint-10800/config.json


{'eval_loss': 0.032748669385910034, 'eval_runtime': 54.6942, 'eval_samples_per_second': 73.646, 'eval_steps_per_second': 9.215, 'epoch': 0.9}


Model weights saved in ../../model/train/ptt5-base/checkpoint-10800/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-10800/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-10800/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-10800/spiece.model
Deleting older checkpoint [../../model/train/ptt5-base/checkpoint-8800] due to args.save_total_limit
 44%|████▍     | 11000/24920 [7:01:42<15:45:30,  4.08s/it]***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.0341, 'learning_rate': 2.838499184339315e-05, 'epoch': 0.93}


                                                          
 44%|████▍     | 11000/24920 [7:02:37<15:45:30,  4.08s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-11000
Configuration saved in ../../model/train/ptt5-base/checkpoint-11000/config.json


{'eval_loss': 0.034486666321754456, 'eval_runtime': 54.7649, 'eval_samples_per_second': 73.551, 'eval_steps_per_second': 9.203, 'epoch': 0.93}


Model weights saved in ../../model/train/ptt5-base/checkpoint-11000/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-11000/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-11000/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-11000/spiece.model
Deleting older checkpoint [../../model/train/ptt5-base/checkpoint-9000] due to args.save_total_limit
 45%|████▍     | 11200/24920 [7:16:19<15:36:55,  4.10s/it]***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.0348, 'learning_rate': 2.7977161500815664e-05, 'epoch': 0.96}


                                                          
 45%|████▍     | 11200/24920 [7:17:14<15:36:55,  4.10s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-11200
Configuration saved in ../../model/train/ptt5-base/checkpoint-11200/config.json


{'eval_loss': 0.033023323863744736, 'eval_runtime': 54.5971, 'eval_samples_per_second': 73.777, 'eval_steps_per_second': 9.231, 'epoch': 0.96}


Model weights saved in ../../model/train/ptt5-base/checkpoint-11200/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-11200/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-11200/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-11200/spiece.model
Deleting older checkpoint [../../model/train/ptt5-base/checkpoint-9200] due to args.save_total_limit
 46%|████▌     | 11400/24920 [7:30:51<15:14:23,  4.06s/it]***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.0337, 'learning_rate': 2.7569331158238177e-05, 'epoch': 1.0}


                                                          
 46%|████▌     | 11400/24920 [7:31:46<15:14:23,  4.06s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-11400
Configuration saved in ../../model/train/ptt5-base/checkpoint-11400/config.json


{'eval_loss': 0.03410288691520691, 'eval_runtime': 54.4564, 'eval_samples_per_second': 73.967, 'eval_steps_per_second': 9.255, 'epoch': 1.0}


Model weights saved in ../../model/train/ptt5-base/checkpoint-11400/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-11400/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-11400/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-11400/spiece.model
Deleting older checkpoint [../../model/train/ptt5-base/checkpoint-9400] due to args.save_total_limit
 47%|████▋     | 11600/24920 [7:45:20<15:00:06,  4.05s/it]***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.0269, 'learning_rate': 2.7161500815660684e-05, 'epoch': 1.03}


                                                          
 47%|████▋     | 11600/24920 [7:46:15<15:00:06,  4.05s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-11600
Configuration saved in ../../model/train/ptt5-base/checkpoint-11600/config.json


{'eval_loss': 0.03432347625494003, 'eval_runtime': 54.4664, 'eval_samples_per_second': 73.954, 'eval_steps_per_second': 9.253, 'epoch': 1.03}


Model weights saved in ../../model/train/ptt5-base/checkpoint-11600/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-11600/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-11600/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-11600/spiece.model
Deleting older checkpoint [../../model/train/ptt5-base/checkpoint-9600] due to args.save_total_limit
 47%|████▋     | 11800/24920 [7:59:49<14:50:44,  4.07s/it]***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.0283, 'learning_rate': 2.6753670473083197e-05, 'epoch': 1.06}


                                                          
 47%|████▋     | 11800/24920 [8:00:44<14:50:44,  4.07s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-11800
Configuration saved in ../../model/train/ptt5-base/checkpoint-11800/config.json


{'eval_loss': 0.03402471914887428, 'eval_runtime': 54.5164, 'eval_samples_per_second': 73.886, 'eval_steps_per_second': 9.245, 'epoch': 1.06}


Model weights saved in ../../model/train/ptt5-base/checkpoint-11800/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-11800/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-11800/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-11800/spiece.model
Deleting older checkpoint [../../model/train/ptt5-base/checkpoint-9800] due to args.save_total_limit
 48%|████▊     | 12000/24920 [8:14:20<14:41:43,  4.09s/it]***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.0289, 'learning_rate': 2.634584013050571e-05, 'epoch': 1.09}


                                                          
 48%|████▊     | 12000/24920 [8:15:14<14:41:43,  4.09s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-12000
Configuration saved in ../../model/train/ptt5-base/checkpoint-12000/config.json


{'eval_loss': 0.03309272229671478, 'eval_runtime': 54.8533, 'eval_samples_per_second': 73.432, 'eval_steps_per_second': 9.188, 'epoch': 1.09}


Model weights saved in ../../model/train/ptt5-base/checkpoint-12000/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-12000/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-12000/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-12000/spiece.model
Deleting older checkpoint [../../model/train/ptt5-base/checkpoint-10000] due to args.save_total_limit
 49%|████▉     | 12200/24920 [8:28:50<14:23:40,  4.07s/it]***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.0274, 'learning_rate': 2.5938009787928224e-05, 'epoch': 1.12}


                                                          
 49%|████▉     | 12200/24920 [8:29:45<14:23:40,  4.07s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-12200
Configuration saved in ../../model/train/ptt5-base/checkpoint-12200/config.json


{'eval_loss': 0.0332350879907608, 'eval_runtime': 55.0246, 'eval_samples_per_second': 73.204, 'eval_steps_per_second': 9.16, 'epoch': 1.12}


Model weights saved in ../../model/train/ptt5-base/checkpoint-12200/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-12200/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-12200/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-12200/spiece.model
Deleting older checkpoint [../../model/train/ptt5-base/checkpoint-10200] due to args.save_total_limit
 50%|████▉     | 12400/24920 [8:43:21<14:06:10,  4.06s/it]***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.0286, 'learning_rate': 2.5530179445350734e-05, 'epoch': 1.16}


                                                          
 50%|████▉     | 12400/24920 [8:44:16<14:06:10,  4.06s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-12400
Configuration saved in ../../model/train/ptt5-base/checkpoint-12400/config.json


{'eval_loss': 0.03304194658994675, 'eval_runtime': 54.4162, 'eval_samples_per_second': 74.022, 'eval_steps_per_second': 9.262, 'epoch': 1.16}


Model weights saved in ../../model/train/ptt5-base/checkpoint-12400/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-12400/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-12400/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-12400/spiece.model
Deleting older checkpoint [../../model/train/ptt5-base/checkpoint-10400] due to args.save_total_limit
 51%|█████     | 12600/24920 [8:57:51<13:53:01,  4.06s/it]***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.028, 'learning_rate': 2.5122349102773248e-05, 'epoch': 1.19}


                                                          
 51%|█████     | 12600/24920 [8:58:45<13:53:01,  4.06s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-12600
Configuration saved in ../../model/train/ptt5-base/checkpoint-12600/config.json


{'eval_loss': 0.03348330035805702, 'eval_runtime': 54.3818, 'eval_samples_per_second': 74.069, 'eval_steps_per_second': 9.268, 'epoch': 1.19}


Model weights saved in ../../model/train/ptt5-base/checkpoint-12600/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-12600/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-12600/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-12600/spiece.model
Deleting older checkpoint [../../model/train/ptt5-base/checkpoint-10600] due to args.save_total_limit
 51%|█████▏    | 12800/24920 [9:12:19<13:39:41,  4.06s/it]***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.0271, 'learning_rate': 2.471451876019576e-05, 'epoch': 1.22}


                                                          
 51%|█████▏    | 12800/24920 [9:13:13<13:39:41,  4.06s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-12800
Configuration saved in ../../model/train/ptt5-base/checkpoint-12800/config.json


{'eval_loss': 0.03277859091758728, 'eval_runtime': 54.3456, 'eval_samples_per_second': 74.118, 'eval_steps_per_second': 9.274, 'epoch': 1.22}


Model weights saved in ../../model/train/ptt5-base/checkpoint-12800/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-12800/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-12800/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-12800/spiece.model
Deleting older checkpoint [../../model/train/ptt5-base/checkpoint-10800] due to args.save_total_limit
 52%|█████▏    | 13000/24920 [9:26:47<13:24:52,  4.05s/it]***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.0268, 'learning_rate': 2.4306688417618274e-05, 'epoch': 1.25}


                                                          
 52%|█████▏    | 13000/24920 [9:27:41<13:24:52,  4.05s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-13000
Configuration saved in ../../model/train/ptt5-base/checkpoint-13000/config.json


{'eval_loss': 0.03482929989695549, 'eval_runtime': 54.8141, 'eval_samples_per_second': 73.485, 'eval_steps_per_second': 9.195, 'epoch': 1.25}


Model weights saved in ../../model/train/ptt5-base/checkpoint-13000/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-13000/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-13000/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-13000/spiece.model
Deleting older checkpoint [../../model/train/ptt5-base/checkpoint-11000] due to args.save_total_limit
 53%|█████▎    | 13200/24920 [9:41:15<13:17:04,  4.08s/it]***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.0265, 'learning_rate': 2.3898858075040784e-05, 'epoch': 1.28}


                                                          
 53%|█████▎    | 13200/24920 [9:42:09<13:17:04,  4.08s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-13200
Configuration saved in ../../model/train/ptt5-base/checkpoint-13200/config.json


{'eval_loss': 0.03329039737582207, 'eval_runtime': 54.4179, 'eval_samples_per_second': 74.02, 'eval_steps_per_second': 9.262, 'epoch': 1.28}


Model weights saved in ../../model/train/ptt5-base/checkpoint-13200/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-13200/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-13200/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-13200/spiece.model
Deleting older checkpoint [../../model/train/ptt5-base/checkpoint-11200] due to args.save_total_limit
 54%|█████▍    | 13400/24920 [9:55:43<13:03:05,  4.08s/it]***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.03, 'learning_rate': 2.3491027732463298e-05, 'epoch': 1.32}


                                                          
 54%|█████▍    | 13400/24920 [9:56:37<13:03:05,  4.08s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-13400
Configuration saved in ../../model/train/ptt5-base/checkpoint-13400/config.json


{'eval_loss': 0.03396589681506157, 'eval_runtime': 54.3845, 'eval_samples_per_second': 74.065, 'eval_steps_per_second': 9.267, 'epoch': 1.32}


Model weights saved in ../../model/train/ptt5-base/checkpoint-13400/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-13400/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-13400/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-13400/spiece.model
Deleting older checkpoint [../../model/train/ptt5-base/checkpoint-11400] due to args.save_total_limit
 55%|█████▍    | 13600/24920 [10:10:13<12:51:07,  4.09s/it]***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.0256, 'learning_rate': 2.3083197389885808e-05, 'epoch': 1.35}


                                                           
 55%|█████▍    | 13600/24920 [10:11:08<12:51:07,  4.09s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-13600
Configuration saved in ../../model/train/ptt5-base/checkpoint-13600/config.json


{'eval_loss': 0.03387042135000229, 'eval_runtime': 54.3974, 'eval_samples_per_second': 74.048, 'eval_steps_per_second': 9.265, 'epoch': 1.35}


Model weights saved in ../../model/train/ptt5-base/checkpoint-13600/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-13600/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-13600/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-13600/spiece.model
Deleting older checkpoint [../../model/train/ptt5-base/checkpoint-11600] due to args.save_total_limit
 55%|█████▌    | 13800/24920 [10:24:50<12:37:54,  4.09s/it]***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.0284, 'learning_rate': 2.267536704730832e-05, 'epoch': 1.38}


                                                           
 55%|█████▌    | 13800/24920 [10:25:46<12:37:54,  4.09s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-13800
Configuration saved in ../../model/train/ptt5-base/checkpoint-13800/config.json


{'eval_loss': 0.03275327757000923, 'eval_runtime': 55.4019, 'eval_samples_per_second': 72.705, 'eval_steps_per_second': 9.097, 'epoch': 1.38}


Model weights saved in ../../model/train/ptt5-base/checkpoint-13800/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-13800/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-13800/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-13800/spiece.model
Deleting older checkpoint [../../model/train/ptt5-base/checkpoint-11800] due to args.save_total_limit
 56%|█████▌    | 14000/24920 [10:39:24<12:22:00,  4.08s/it]***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.0291, 'learning_rate': 2.226753670473083e-05, 'epoch': 1.41}


                                                           
 56%|█████▌    | 14000/24920 [10:40:19<12:22:00,  4.08s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-14000
Configuration saved in ../../model/train/ptt5-base/checkpoint-14000/config.json


{'eval_loss': 0.03327794745564461, 'eval_runtime': 54.6509, 'eval_samples_per_second': 73.704, 'eval_steps_per_second': 9.222, 'epoch': 1.41}


Model weights saved in ../../model/train/ptt5-base/checkpoint-14000/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-14000/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-14000/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-14000/spiece.model
Deleting older checkpoint [../../model/train/ptt5-base/checkpoint-12000] due to args.save_total_limit
 57%|█████▋    | 14200/24920 [10:53:53<12:05:35,  4.06s/it]***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.0289, 'learning_rate': 2.1859706362153344e-05, 'epoch': 1.44}


                                                           
 57%|█████▋    | 14200/24920 [10:54:48<12:05:35,  4.06s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-14200
Configuration saved in ../../model/train/ptt5-base/checkpoint-14200/config.json


{'eval_loss': 0.033764783293008804, 'eval_runtime': 54.3898, 'eval_samples_per_second': 74.058, 'eval_steps_per_second': 9.266, 'epoch': 1.44}


Model weights saved in ../../model/train/ptt5-base/checkpoint-14200/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-14200/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-14200/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-14200/spiece.model
Deleting older checkpoint [../../model/train/ptt5-base/checkpoint-12200] due to args.save_total_limit
 58%|█████▊    | 14400/24920 [11:08:22<11:52:35,  4.06s/it]***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.0257, 'learning_rate': 2.1451876019575858e-05, 'epoch': 1.48}


                                                           
 58%|█████▊    | 14400/24920 [11:09:16<11:52:35,  4.06s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-14400
Configuration saved in ../../model/train/ptt5-base/checkpoint-14400/config.json


{'eval_loss': 0.033985648304224014, 'eval_runtime': 54.2261, 'eval_samples_per_second': 74.282, 'eval_steps_per_second': 9.294, 'epoch': 1.48}


Model weights saved in ../../model/train/ptt5-base/checkpoint-14400/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-14400/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-14400/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-14400/spiece.model
Deleting older checkpoint [../../model/train/ptt5-base/checkpoint-12400] due to args.save_total_limit
 59%|█████▊    | 14600/24920 [11:22:58<11:42:49,  4.09s/it]***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.0268, 'learning_rate': 2.104404567699837e-05, 'epoch': 1.51}


                                                           
 59%|█████▊    | 14600/24920 [11:23:52<11:42:49,  4.09s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-14600
Configuration saved in ../../model/train/ptt5-base/checkpoint-14600/config.json


{'eval_loss': 0.03413856402039528, 'eval_runtime': 54.5704, 'eval_samples_per_second': 73.813, 'eval_steps_per_second': 9.236, 'epoch': 1.51}


Model weights saved in ../../model/train/ptt5-base/checkpoint-14600/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-14600/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-14600/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-14600/spiece.model
Deleting older checkpoint [../../model/train/ptt5-base/checkpoint-12600] due to args.save_total_limit
 59%|█████▉    | 14800/24920 [11:37:25<11:22:40,  4.05s/it]***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.0299, 'learning_rate': 2.0636215334420885e-05, 'epoch': 1.54}


                                                           
 59%|█████▉    | 14800/24920 [11:38:19<11:22:40,  4.05s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-14800
Configuration saved in ../../model/train/ptt5-base/checkpoint-14800/config.json


{'eval_loss': 0.034166935831308365, 'eval_runtime': 54.2621, 'eval_samples_per_second': 74.232, 'eval_steps_per_second': 9.288, 'epoch': 1.54}


Model weights saved in ../../model/train/ptt5-base/checkpoint-14800/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-14800/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-14800/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-14800/spiece.model
Deleting older checkpoint [../../model/train/ptt5-base/checkpoint-12800] due to args.save_total_limit
 60%|██████    | 15000/24920 [11:51:53<11:09:25,  4.05s/it]***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.0264, 'learning_rate': 2.0228384991843395e-05, 'epoch': 1.57}


                                                           
 60%|██████    | 15000/24920 [11:52:48<11:09:25,  4.05s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-15000
Configuration saved in ../../model/train/ptt5-base/checkpoint-15000/config.json


{'eval_loss': 0.033432092517614365, 'eval_runtime': 54.432, 'eval_samples_per_second': 74.001, 'eval_steps_per_second': 9.259, 'epoch': 1.57}


Model weights saved in ../../model/train/ptt5-base/checkpoint-15000/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-15000/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-15000/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-15000/spiece.model
Deleting older checkpoint [../../model/train/ptt5-base/checkpoint-13000] due to args.save_total_limit
 61%|██████    | 15200/24920 [12:06:23<10:56:55,  4.06s/it]***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.0253, 'learning_rate': 1.9820554649265908e-05, 'epoch': 1.61}


                                                           
 61%|██████    | 15200/24920 [12:07:18<10:56:55,  4.06s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-15200
Configuration saved in ../../model/train/ptt5-base/checkpoint-15200/config.json


{'eval_loss': 0.0337190218269825, 'eval_runtime': 54.46, 'eval_samples_per_second': 73.963, 'eval_steps_per_second': 9.254, 'epoch': 1.61}


Model weights saved in ../../model/train/ptt5-base/checkpoint-15200/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-15200/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-15200/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-15200/spiece.model
Deleting older checkpoint [../../model/train/ptt5-base/checkpoint-13200] due to args.save_total_limit
 62%|██████▏   | 15400/24920 [12:20:52<10:47:07,  4.08s/it]***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.0292, 'learning_rate': 1.9412724306688418e-05, 'epoch': 1.64}


                                                           
 62%|██████▏   | 15400/24920 [12:21:46<10:47:07,  4.08s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-15400
Configuration saved in ../../model/train/ptt5-base/checkpoint-15400/config.json


{'eval_loss': 0.03311581164598465, 'eval_runtime': 54.3622, 'eval_samples_per_second': 74.096, 'eval_steps_per_second': 9.271, 'epoch': 1.64}


Model weights saved in ../../model/train/ptt5-base/checkpoint-15400/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-15400/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-15400/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-15400/spiece.model
Deleting older checkpoint [../../model/train/ptt5-base/checkpoint-13400] due to args.save_total_limit
 63%|██████▎   | 15600/24920 [12:35:23<10:35:42,  4.09s/it]***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.0258, 'learning_rate': 1.900489396411093e-05, 'epoch': 1.67}


                                                           
 63%|██████▎   | 15600/24920 [12:36:18<10:35:42,  4.09s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-15600
Configuration saved in ../../model/train/ptt5-base/checkpoint-15600/config.json


{'eval_loss': 0.03332404047250748, 'eval_runtime': 54.5929, 'eval_samples_per_second': 73.783, 'eval_steps_per_second': 9.232, 'epoch': 1.67}


Model weights saved in ../../model/train/ptt5-base/checkpoint-15600/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-15600/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-15600/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-15600/spiece.model
Deleting older checkpoint [../../model/train/ptt5-base/checkpoint-13600] due to args.save_total_limit
 63%|██████▎   | 15800/24920 [12:49:55<10:20:55,  4.09s/it]***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.0292, 'learning_rate': 1.859706362153344e-05, 'epoch': 1.7}


                                                           
 63%|██████▎   | 15800/24920 [12:50:50<10:20:55,  4.09s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-15800
Configuration saved in ../../model/train/ptt5-base/checkpoint-15800/config.json


{'eval_loss': 0.03233485668897629, 'eval_runtime': 54.4815, 'eval_samples_per_second': 73.933, 'eval_steps_per_second': 9.251, 'epoch': 1.7}


Model weights saved in ../../model/train/ptt5-base/checkpoint-15800/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-15800/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-15800/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-15800/spiece.model
Deleting older checkpoint [../../model/train/ptt5-base/checkpoint-13800] due to args.save_total_limit
 64%|██████▍   | 16000/24920 [13:04:31<10:07:22,  4.09s/it]***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.0262, 'learning_rate': 1.8189233278955955e-05, 'epoch': 1.73}


                                                           
 64%|██████▍   | 16000/24920 [13:05:26<10:07:22,  4.09s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-16000
Configuration saved in ../../model/train/ptt5-base/checkpoint-16000/config.json


{'eval_loss': 0.03301709517836571, 'eval_runtime': 54.7592, 'eval_samples_per_second': 73.558, 'eval_steps_per_second': 9.204, 'epoch': 1.73}


Model weights saved in ../../model/train/ptt5-base/checkpoint-16000/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-16000/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-16000/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-16000/spiece.model
Deleting older checkpoint [../../model/train/ptt5-base/checkpoint-14000] due to args.save_total_limit
 65%|██████▌   | 16200/24920 [13:19:05<9:51:54,  4.07s/it] ***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.0297, 'learning_rate': 1.7781402936378465e-05, 'epoch': 1.77}


                                                          
 65%|██████▌   | 16200/24920 [13:20:00<9:51:54,  4.07s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-16200
Configuration saved in ../../model/train/ptt5-base/checkpoint-16200/config.json


{'eval_loss': 0.03242029994726181, 'eval_runtime': 54.8458, 'eval_samples_per_second': 73.442, 'eval_steps_per_second': 9.189, 'epoch': 1.77}


Model weights saved in ../../model/train/ptt5-base/checkpoint-16200/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-16200/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-16200/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-16200/spiece.model
Deleting older checkpoint [../../model/train/ptt5-base/checkpoint-14200] due to args.save_total_limit
 66%|██████▌   | 16400/24920 [13:33:37<9:36:12,  4.06s/it] ***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.0264, 'learning_rate': 1.7373572593800978e-05, 'epoch': 1.8}


                                                          
 66%|██████▌   | 16400/24920 [13:34:32<9:36:12,  4.06s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-16400
Configuration saved in ../../model/train/ptt5-base/checkpoint-16400/config.json


{'eval_loss': 0.0320003479719162, 'eval_runtime': 54.7364, 'eval_samples_per_second': 73.589, 'eval_steps_per_second': 9.208, 'epoch': 1.8}


Model weights saved in ../../model/train/ptt5-base/checkpoint-16400/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-16400/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-16400/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-16400/spiece.model
Deleting older checkpoint [../../model/train/ptt5-base/checkpoint-14400] due to args.save_total_limit
 67%|██████▋   | 16600/24920 [13:48:08<9:24:21,  4.07s/it] ***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.0257, 'learning_rate': 1.6965742251223495e-05, 'epoch': 1.83}


                                                          
 67%|██████▋   | 16600/24920 [13:49:03<9:24:21,  4.07s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-16600
Configuration saved in ../../model/train/ptt5-base/checkpoint-16600/config.json


{'eval_loss': 0.031586699187755585, 'eval_runtime': 54.4731, 'eval_samples_per_second': 73.945, 'eval_steps_per_second': 9.252, 'epoch': 1.83}


Model weights saved in ../../model/train/ptt5-base/checkpoint-16600/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-16600/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-16600/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-16600/spiece.model
Deleting older checkpoint [../../model/train/ptt5-base/checkpoint-14600] due to args.save_total_limit
 67%|██████▋   | 16800/24920 [14:02:39<9:13:00,  4.09s/it] ***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.0257, 'learning_rate': 1.6557911908646005e-05, 'epoch': 1.86}


                                                          
 67%|██████▋   | 16800/24920 [14:03:33<9:13:00,  4.09s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-16800
Configuration saved in ../../model/train/ptt5-base/checkpoint-16800/config.json


{'eval_loss': 0.03272559493780136, 'eval_runtime': 54.5957, 'eval_samples_per_second': 73.779, 'eval_steps_per_second': 9.232, 'epoch': 1.86}


Model weights saved in ../../model/train/ptt5-base/checkpoint-16800/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-16800/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-16800/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-16800/spiece.model
Deleting older checkpoint [../../model/train/ptt5-base/checkpoint-14800] due to args.save_total_limit
 68%|██████▊   | 17000/24920 [14:17:11<8:59:42,  4.09s/it] ***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.0274, 'learning_rate': 1.6150081566068518e-05, 'epoch': 1.89}


                                                          
 68%|██████▊   | 17000/24920 [14:18:06<8:59:42,  4.09s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-17000
Configuration saved in ../../model/train/ptt5-base/checkpoint-17000/config.json


{'eval_loss': 0.03192828223109245, 'eval_runtime': 54.6653, 'eval_samples_per_second': 73.685, 'eval_steps_per_second': 9.22, 'epoch': 1.89}


Model weights saved in ../../model/train/ptt5-base/checkpoint-17000/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-17000/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-17000/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-17000/spiece.model
Deleting older checkpoint [../../model/train/ptt5-base/checkpoint-15000] due to args.save_total_limit
 69%|██████▉   | 17191/24920 [14:31:12<8:48:49,  4.11s/it] 

Experiencing connection interruptions. Will try to reestablish communication with Neptune. Internal exception was: HTTPServiceUnavailable


 69%|██████▉   | 17192/24920 [14:31:17<8:50:05,  4.12s/it]

Communication with Neptune restored!


 69%|██████▉   | 17200/24920 [14:31:50<8:53:33,  4.15s/it]***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.0255, 'learning_rate': 1.5742251223491028e-05, 'epoch': 1.93}


                                                          
 69%|██████▉   | 17200/24920 [14:32:45<8:53:33,  4.15s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-17200
Configuration saved in ../../model/train/ptt5-base/checkpoint-17200/config.json


{'eval_loss': 0.033229388296604156, 'eval_runtime': 55.1296, 'eval_samples_per_second': 73.064, 'eval_steps_per_second': 9.142, 'epoch': 1.93}


Model weights saved in ../../model/train/ptt5-base/checkpoint-17200/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-17200/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-17200/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-17200/spiece.model
Deleting older checkpoint [../../model/train/ptt5-base/checkpoint-15200] due to args.save_total_limit
 70%|██████▉   | 17400/24920 [14:46:40<8:41:46,  4.16s/it] ***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.0273, 'learning_rate': 1.533442088091354e-05, 'epoch': 1.96}


                                                          
 70%|██████▉   | 17400/24920 [14:47:35<8:41:46,  4.16s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-17400
Configuration saved in ../../model/train/ptt5-base/checkpoint-17400/config.json


{'eval_loss': 0.032098811119794846, 'eval_runtime': 55.16, 'eval_samples_per_second': 73.024, 'eval_steps_per_second': 9.137, 'epoch': 1.96}


Model weights saved in ../../model/train/ptt5-base/checkpoint-17400/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-17400/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-17400/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-17400/spiece.model
Deleting older checkpoint [../../model/train/ptt5-base/checkpoint-15400] due to args.save_total_limit
 71%|███████   | 17600/24920 [15:01:31<8:28:36,  4.17s/it] ***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.0288, 'learning_rate': 1.4926590538336052e-05, 'epoch': 1.99}


                                                          
 71%|███████   | 17600/24920 [15:02:26<8:28:36,  4.17s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-17600
Configuration saved in ../../model/train/ptt5-base/checkpoint-17600/config.json


{'eval_loss': 0.032441623508930206, 'eval_runtime': 55.211, 'eval_samples_per_second': 72.957, 'eval_steps_per_second': 9.129, 'epoch': 1.99}


Model weights saved in ../../model/train/ptt5-base/checkpoint-17600/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-17600/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-17600/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-17600/spiece.model
Deleting older checkpoint [../../model/train/ptt5-base/checkpoint-15600] due to args.save_total_limit
 71%|███████▏  | 17800/24920 [15:16:21<8:18:48,  4.20s/it] ***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.024, 'learning_rate': 1.4518760195758565e-05, 'epoch': 2.02}


                                                          
 71%|███████▏  | 17800/24920 [15:17:17<8:18:48,  4.20s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-17800
Configuration saved in ../../model/train/ptt5-base/checkpoint-17800/config.json


{'eval_loss': 0.03280528634786606, 'eval_runtime': 55.4043, 'eval_samples_per_second': 72.702, 'eval_steps_per_second': 9.097, 'epoch': 2.02}


Model weights saved in ../../model/train/ptt5-base/checkpoint-17800/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-17800/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-17800/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-17800/spiece.model
Deleting older checkpoint [../../model/train/ptt5-base/checkpoint-15800] due to args.save_total_limit
 72%|███████▏  | 18000/24920 [15:31:11<8:02:44,  4.19s/it] ***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.0231, 'learning_rate': 1.4110929853181077e-05, 'epoch': 2.05}


                                                          
 72%|███████▏  | 18000/24920 [15:32:06<8:02:44,  4.19s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-18000
Configuration saved in ../../model/train/ptt5-base/checkpoint-18000/config.json


{'eval_loss': 0.032688163220882416, 'eval_runtime': 55.4264, 'eval_samples_per_second': 72.673, 'eval_steps_per_second': 9.093, 'epoch': 2.05}


Model weights saved in ../../model/train/ptt5-base/checkpoint-18000/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-18000/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-18000/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-18000/spiece.model
Deleting older checkpoint [../../model/train/ptt5-base/checkpoint-16000] due to args.save_total_limit
 73%|███████▎  | 18200/24920 [15:46:01<7:39:11,  4.10s/it] ***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.0212, 'learning_rate': 1.370309951060359e-05, 'epoch': 2.09}


                                                          
 73%|███████▎  | 18200/24920 [15:46:57<7:39:11,  4.10s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-18200
Configuration saved in ../../model/train/ptt5-base/checkpoint-18200/config.json


{'eval_loss': 0.033261191099882126, 'eval_runtime': 55.2131, 'eval_samples_per_second': 72.954, 'eval_steps_per_second': 9.128, 'epoch': 2.09}


Model weights saved in ../../model/train/ptt5-base/checkpoint-18200/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-18200/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-18200/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-18200/spiece.model
Deleting older checkpoint [../../model/train/ptt5-base/checkpoint-16200] due to args.save_total_limit
 74%|███████▍  | 18400/24920 [16:00:47<7:32:00,  4.16s/it] ***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.0201, 'learning_rate': 1.32952691680261e-05, 'epoch': 2.12}


                                                          
 74%|███████▍  | 18400/24920 [16:01:43<7:32:00,  4.16s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-18400
Configuration saved in ../../model/train/ptt5-base/checkpoint-18400/config.json


{'eval_loss': 0.0328025184571743, 'eval_runtime': 55.8932, 'eval_samples_per_second': 72.066, 'eval_steps_per_second': 9.017, 'epoch': 2.12}


Model weights saved in ../../model/train/ptt5-base/checkpoint-18400/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-18400/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-18400/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-18400/spiece.model
Deleting older checkpoint [../../model/train/ptt5-base/checkpoint-16400] due to args.save_total_limit
 75%|███████▍  | 18600/24920 [16:15:46<7:25:14,  4.23s/it] ***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.0204, 'learning_rate': 1.2887438825448613e-05, 'epoch': 2.15}


                                                          
 75%|███████▍  | 18600/24920 [16:16:41<7:25:14,  4.23s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-18600
Configuration saved in ../../model/train/ptt5-base/checkpoint-18600/config.json


{'eval_loss': 0.0336274690926075, 'eval_runtime': 55.5273, 'eval_samples_per_second': 72.541, 'eval_steps_per_second': 9.077, 'epoch': 2.15}


Model weights saved in ../../model/train/ptt5-base/checkpoint-18600/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-18600/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-18600/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-18600/spiece.model
Deleting older checkpoint [../../model/train/ptt5-base/checkpoint-16600] due to args.save_total_limit
 75%|███████▌  | 18800/24920 [16:30:45<7:10:38,  4.22s/it] ***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.0197, 'learning_rate': 1.2479608482871127e-05, 'epoch': 2.18}


                                                          
 75%|███████▌  | 18800/24920 [16:31:40<7:10:38,  4.22s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-18800
Configuration saved in ../../model/train/ptt5-base/checkpoint-18800/config.json


{'eval_loss': 0.034743838012218475, 'eval_runtime': 55.474, 'eval_samples_per_second': 72.611, 'eval_steps_per_second': 9.085, 'epoch': 2.18}


Model weights saved in ../../model/train/ptt5-base/checkpoint-18800/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-18800/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-18800/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-18800/spiece.model
Deleting older checkpoint [../../model/train/ptt5-base/checkpoint-16800] due to args.save_total_limit
 76%|███████▌  | 19000/24920 [16:45:48<6:52:16,  4.18s/it] ***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.0253, 'learning_rate': 1.2071778140293638e-05, 'epoch': 2.22}


                                                          
 76%|███████▌  | 19000/24920 [16:46:43<6:52:16,  4.18s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-19000
Configuration saved in ../../model/train/ptt5-base/checkpoint-19000/config.json


{'eval_loss': 0.033184681087732315, 'eval_runtime': 55.406, 'eval_samples_per_second': 72.7, 'eval_steps_per_second': 9.096, 'epoch': 2.22}


Model weights saved in ../../model/train/ptt5-base/checkpoint-19000/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-19000/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-19000/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-19000/spiece.model
Deleting older checkpoint [../../model/train/ptt5-base/checkpoint-17000] due to args.save_total_limit
 77%|███████▋  | 19200/24920 [17:00:41<6:36:14,  4.16s/it] ***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.022, 'learning_rate': 1.166394779771615e-05, 'epoch': 2.25}


                                                          
 77%|███████▋  | 19200/24920 [17:01:37<6:36:14,  4.16s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-19200
Configuration saved in ../../model/train/ptt5-base/checkpoint-19200/config.json


{'eval_loss': 0.03394577279686928, 'eval_runtime': 55.1714, 'eval_samples_per_second': 73.009, 'eval_steps_per_second': 9.135, 'epoch': 2.25}


Model weights saved in ../../model/train/ptt5-base/checkpoint-19200/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-19200/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-19200/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-19200/spiece.model
Deleting older checkpoint [../../model/train/ptt5-base/checkpoint-17200] due to args.save_total_limit
 78%|███████▊  | 19400/24920 [17:15:30<6:16:49,  4.10s/it] ***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.0219, 'learning_rate': 1.1256117455138662e-05, 'epoch': 2.28}


                                                          
 78%|███████▊  | 19400/24920 [17:16:26<6:16:49,  4.10s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-19400
Configuration saved in ../../model/train/ptt5-base/checkpoint-19400/config.json


{'eval_loss': 0.03350279852747917, 'eval_runtime': 55.5103, 'eval_samples_per_second': 72.563, 'eval_steps_per_second': 9.079, 'epoch': 2.28}


Model weights saved in ../../model/train/ptt5-base/checkpoint-19400/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-19400/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-19400/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-19400/spiece.model
Deleting older checkpoint [../../model/train/ptt5-base/checkpoint-17400] due to args.save_total_limit
 79%|███████▊  | 19600/24920 [17:30:17<6:05:36,  4.12s/it] ***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.0216, 'learning_rate': 1.0848287112561175e-05, 'epoch': 2.31}


                                                          
 79%|███████▊  | 19600/24920 [17:31:12<6:05:36,  4.12s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-19600
Configuration saved in ../../model/train/ptt5-base/checkpoint-19600/config.json


{'eval_loss': 0.034070245921611786, 'eval_runtime': 55.0177, 'eval_samples_per_second': 73.213, 'eval_steps_per_second': 9.161, 'epoch': 2.31}


Model weights saved in ../../model/train/ptt5-base/checkpoint-19600/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-19600/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-19600/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-19600/spiece.model
Deleting older checkpoint [../../model/train/ptt5-base/checkpoint-17600] due to args.save_total_limit
 79%|███████▉  | 19800/24920 [17:45:05<5:51:57,  4.12s/it] ***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.0215, 'learning_rate': 1.0440456769983689e-05, 'epoch': 2.34}


                                                          
 79%|███████▉  | 19800/24920 [17:46:00<5:51:57,  4.12s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-19800
Configuration saved in ../../model/train/ptt5-base/checkpoint-19800/config.json


{'eval_loss': 0.03342704102396965, 'eval_runtime': 55.2773, 'eval_samples_per_second': 72.869, 'eval_steps_per_second': 9.118, 'epoch': 2.34}


Model weights saved in ../../model/train/ptt5-base/checkpoint-19800/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-19800/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-19800/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-19800/spiece.model
Deleting older checkpoint [../../model/train/ptt5-base/checkpoint-17800] due to args.save_total_limit
 80%|████████  | 20000/24920 [17:59:51<5:40:50,  4.16s/it] ***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.0206, 'learning_rate': 1.00326264274062e-05, 'epoch': 2.38}


                                                          
 80%|████████  | 20000/24920 [18:00:46<5:40:50,  4.16s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-20000
Configuration saved in ../../model/train/ptt5-base/checkpoint-20000/config.json


{'eval_loss': 0.0346258319914341, 'eval_runtime': 55.3466, 'eval_samples_per_second': 72.778, 'eval_steps_per_second': 9.106, 'epoch': 2.38}


Model weights saved in ../../model/train/ptt5-base/checkpoint-20000/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-20000/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-20000/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-20000/spiece.model
Deleting older checkpoint [../../model/train/ptt5-base/checkpoint-18000] due to args.save_total_limit
 81%|████████  | 20200/24920 [18:14:38<5:24:38,  4.13s/it] ***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.0213, 'learning_rate': 9.624796084828712e-06, 'epoch': 2.41}


                                                          
 81%|████████  | 20200/24920 [18:15:33<5:24:38,  4.13s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-20200
Configuration saved in ../../model/train/ptt5-base/checkpoint-20200/config.json


{'eval_loss': 0.03318621590733528, 'eval_runtime': 55.2009, 'eval_samples_per_second': 72.97, 'eval_steps_per_second': 9.13, 'epoch': 2.41}


Model weights saved in ../../model/train/ptt5-base/checkpoint-20200/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-20200/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-20200/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-20200/spiece.model
Deleting older checkpoint [../../model/train/ptt5-base/checkpoint-18200] due to args.save_total_limit
 82%|████████▏ | 20400/24920 [18:29:25<5:13:16,  4.16s/it] ***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.023, 'learning_rate': 9.216965742251224e-06, 'epoch': 2.44}


                                                          
 82%|████████▏ | 20400/24920 [18:30:21<5:13:16,  4.16s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-20400
Configuration saved in ../../model/train/ptt5-base/checkpoint-20400/config.json


{'eval_loss': 0.032678015530109406, 'eval_runtime': 55.2482, 'eval_samples_per_second': 72.907, 'eval_steps_per_second': 9.122, 'epoch': 2.44}


Model weights saved in ../../model/train/ptt5-base/checkpoint-20400/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-20400/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-20400/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-20400/spiece.model
Deleting older checkpoint [../../model/train/ptt5-base/checkpoint-18400] due to args.save_total_limit
 83%|████████▎ | 20600/24920 [18:44:15<4:57:02,  4.13s/it] ***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.0233, 'learning_rate': 8.809135399673735e-06, 'epoch': 2.47}


                                                          
 83%|████████▎ | 20600/24920 [18:45:10<4:57:02,  4.13s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-20600
Configuration saved in ../../model/train/ptt5-base/checkpoint-20600/config.json


{'eval_loss': 0.03318605571985245, 'eval_runtime': 54.9828, 'eval_samples_per_second': 73.259, 'eval_steps_per_second': 9.167, 'epoch': 2.47}


Model weights saved in ../../model/train/ptt5-base/checkpoint-20600/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-20600/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-20600/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-20600/spiece.model
Deleting older checkpoint [../../model/train/ptt5-base/checkpoint-18600] due to args.save_total_limit
 83%|████████▎ | 20800/24920 [18:59:00<4:45:18,  4.15s/it] ***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.019, 'learning_rate': 8.401305057096249e-06, 'epoch': 2.5}


                                                          
 83%|████████▎ | 20800/24920 [18:59:55<4:45:18,  4.15s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-20800
Configuration saved in ../../model/train/ptt5-base/checkpoint-20800/config.json


{'eval_loss': 0.033951032906770706, 'eval_runtime': 55.295, 'eval_samples_per_second': 72.846, 'eval_steps_per_second': 9.115, 'epoch': 2.5}


Model weights saved in ../../model/train/ptt5-base/checkpoint-20800/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-20800/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-20800/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-20800/spiece.model
Deleting older checkpoint [../../model/train/ptt5-base/checkpoint-18800] due to args.save_total_limit
 84%|████████▍ | 21000/24920 [19:13:47<4:30:22,  4.14s/it] ***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.0217, 'learning_rate': 7.99347471451876e-06, 'epoch': 2.54}


                                                          
 84%|████████▍ | 21000/24920 [19:14:42<4:30:22,  4.14s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-21000
Configuration saved in ../../model/train/ptt5-base/checkpoint-21000/config.json


{'eval_loss': 0.03282683715224266, 'eval_runtime': 55.2857, 'eval_samples_per_second': 72.858, 'eval_steps_per_second': 9.116, 'epoch': 2.54}


Model weights saved in ../../model/train/ptt5-base/checkpoint-21000/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-21000/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-21000/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-21000/spiece.model
Deleting older checkpoint [../../model/train/ptt5-base/checkpoint-19000] due to args.save_total_limit
 85%|████████▌ | 21200/24920 [19:28:34<4:14:41,  4.11s/it] ***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.02, 'learning_rate': 7.585644371941272e-06, 'epoch': 2.57}


                                                          
 85%|████████▌ | 21200/24920 [19:29:30<4:14:41,  4.11s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-21200
Configuration saved in ../../model/train/ptt5-base/checkpoint-21200/config.json


{'eval_loss': 0.033493563532829285, 'eval_runtime': 55.219, 'eval_samples_per_second': 72.946, 'eval_steps_per_second': 9.127, 'epoch': 2.57}


Model weights saved in ../../model/train/ptt5-base/checkpoint-21200/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-21200/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-21200/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-21200/spiece.model
Deleting older checkpoint [../../model/train/ptt5-base/checkpoint-19200] due to args.save_total_limit
 86%|████████▌ | 21400/24920 [19:43:20<4:01:46,  4.12s/it] ***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.0206, 'learning_rate': 7.177814029363785e-06, 'epoch': 2.6}


                                                          
 86%|████████▌ | 21400/24920 [19:44:15<4:01:46,  4.12s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-21400
Configuration saved in ../../model/train/ptt5-base/checkpoint-21400/config.json


{'eval_loss': 0.03364516794681549, 'eval_runtime': 55.3632, 'eval_samples_per_second': 72.756, 'eval_steps_per_second': 9.104, 'epoch': 2.6}


Model weights saved in ../../model/train/ptt5-base/checkpoint-21400/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-21400/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-21400/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-21400/spiece.model
Deleting older checkpoint [../../model/train/ptt5-base/checkpoint-19400] due to args.save_total_limit
 87%|████████▋ | 21600/24920 [19:58:04<3:48:25,  4.13s/it] ***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.0235, 'learning_rate': 6.769983686786298e-06, 'epoch': 2.63}


                                                          
 87%|████████▋ | 21600/24920 [19:58:59<3:48:25,  4.13s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-21600
Configuration saved in ../../model/train/ptt5-base/checkpoint-21600/config.json


{'eval_loss': 0.03274868428707123, 'eval_runtime': 54.9239, 'eval_samples_per_second': 73.338, 'eval_steps_per_second': 9.176, 'epoch': 2.63}


Model weights saved in ../../model/train/ptt5-base/checkpoint-21600/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-21600/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-21600/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-21600/spiece.model
Deleting older checkpoint [../../model/train/ptt5-base/checkpoint-19600] due to args.save_total_limit
 87%|████████▋ | 21800/24920 [20:12:52<3:37:05,  4.17s/it] ***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.0205, 'learning_rate': 6.36215334420881e-06, 'epoch': 2.66}


                                                          
 87%|████████▋ | 21800/24920 [20:13:48<3:37:05,  4.17s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-21800
Configuration saved in ../../model/train/ptt5-base/checkpoint-21800/config.json


{'eval_loss': 0.03275454416871071, 'eval_runtime': 55.3855, 'eval_samples_per_second': 72.727, 'eval_steps_per_second': 9.1, 'epoch': 2.66}


Model weights saved in ../../model/train/ptt5-base/checkpoint-21800/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-21800/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-21800/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-21800/spiece.model
Deleting older checkpoint [../../model/train/ptt5-base/checkpoint-19800] due to args.save_total_limit
 88%|████████▊ | 22000/24920 [20:27:39<3:21:30,  4.14s/it] ***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.0209, 'learning_rate': 5.954323001631321e-06, 'epoch': 2.7}


                                                          
 88%|████████▊ | 22000/24920 [20:28:35<3:21:30,  4.14s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-22000
Configuration saved in ../../model/train/ptt5-base/checkpoint-22000/config.json


{'eval_loss': 0.03274182602763176, 'eval_runtime': 55.367, 'eval_samples_per_second': 72.751, 'eval_steps_per_second': 9.103, 'epoch': 2.7}


Model weights saved in ../../model/train/ptt5-base/checkpoint-22000/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-22000/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-22000/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-22000/spiece.model
Deleting older checkpoint [../../model/train/ptt5-base/checkpoint-20000] due to args.save_total_limit
 89%|████████▉ | 22200/24920 [20:42:25<3:07:37,  4.14s/it] ***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.0241, 'learning_rate': 5.546492659053834e-06, 'epoch': 2.73}


                                                          
 89%|████████▉ | 22200/24920 [20:43:20<3:07:37,  4.14s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-22200
Configuration saved in ../../model/train/ptt5-base/checkpoint-22200/config.json


{'eval_loss': 0.03203582763671875, 'eval_runtime': 55.0769, 'eval_samples_per_second': 73.134, 'eval_steps_per_second': 9.151, 'epoch': 2.73}


Model weights saved in ../../model/train/ptt5-base/checkpoint-22200/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-22200/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-22200/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-22200/spiece.model
Deleting older checkpoint [../../model/train/ptt5-base/checkpoint-20200] due to args.save_total_limit
 90%|████████▉ | 22400/24920 [20:57:17<2:53:34,  4.13s/it] ***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.0198, 'learning_rate': 5.1386623164763465e-06, 'epoch': 2.76}


                                                          
 90%|████████▉ | 22400/24920 [20:58:13<2:53:34,  4.13s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-22400
Configuration saved in ../../model/train/ptt5-base/checkpoint-22400/config.json


{'eval_loss': 0.03270697593688965, 'eval_runtime': 55.4259, 'eval_samples_per_second': 72.674, 'eval_steps_per_second': 9.093, 'epoch': 2.76}


Model weights saved in ../../model/train/ptt5-base/checkpoint-22400/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-22400/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-22400/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-22400/spiece.model
Deleting older checkpoint [../../model/train/ptt5-base/checkpoint-20400] due to args.save_total_limit
 91%|█████████ | 22600/24920 [21:12:06<2:40:07,  4.14s/it] ***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.0207, 'learning_rate': 4.730831973898858e-06, 'epoch': 2.79}


                                                          
 91%|█████████ | 22600/24920 [21:13:01<2:40:07,  4.14s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-22600
Configuration saved in ../../model/train/ptt5-base/checkpoint-22600/config.json


{'eval_loss': 0.03286290913820267, 'eval_runtime': 55.3105, 'eval_samples_per_second': 72.825, 'eval_steps_per_second': 9.112, 'epoch': 2.79}


Model weights saved in ../../model/train/ptt5-base/checkpoint-22600/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-22600/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-22600/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-22600/spiece.model
Deleting older checkpoint [../../model/train/ptt5-base/checkpoint-20600] due to args.save_total_limit
 91%|█████████▏| 22800/24920 [21:26:56<2:28:04,  4.19s/it] ***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.0203, 'learning_rate': 4.323001631321371e-06, 'epoch': 2.83}


                                                          
 91%|█████████▏| 22800/24920 [21:27:51<2:28:04,  4.19s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-22800
Configuration saved in ../../model/train/ptt5-base/checkpoint-22800/config.json


{'eval_loss': 0.03315269574522972, 'eval_runtime': 55.1991, 'eval_samples_per_second': 72.972, 'eval_steps_per_second': 9.131, 'epoch': 2.83}


Model weights saved in ../../model/train/ptt5-base/checkpoint-22800/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-22800/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-22800/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-22800/spiece.model
Deleting older checkpoint [../../model/train/ptt5-base/checkpoint-20800] due to args.save_total_limit
 92%|█████████▏| 23000/24920 [21:41:44<2:13:05,  4.16s/it] ***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.0225, 'learning_rate': 3.915171288743882e-06, 'epoch': 2.86}


                                                          
 92%|█████████▏| 23000/24920 [21:42:39<2:13:05,  4.16s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-23000
Configuration saved in ../../model/train/ptt5-base/checkpoint-23000/config.json


{'eval_loss': 0.03251825273036957, 'eval_runtime': 55.1543, 'eval_samples_per_second': 73.031, 'eval_steps_per_second': 9.138, 'epoch': 2.86}


Model weights saved in ../../model/train/ptt5-base/checkpoint-23000/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-23000/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-23000/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-23000/spiece.model
Deleting older checkpoint [../../model/train/ptt5-base/checkpoint-21000] due to args.save_total_limit
 93%|█████████▎| 23200/24920 [21:56:33<1:59:05,  4.15s/it] ***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.0218, 'learning_rate': 3.5073409461663953e-06, 'epoch': 2.89}


                                                          
 93%|█████████▎| 23200/24920 [21:57:29<1:59:05,  4.15s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-23200
Configuration saved in ../../model/train/ptt5-base/checkpoint-23200/config.json


{'eval_loss': 0.032475244253873825, 'eval_runtime': 55.3439, 'eval_samples_per_second': 72.781, 'eval_steps_per_second': 9.107, 'epoch': 2.89}


Model weights saved in ../../model/train/ptt5-base/checkpoint-23200/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-23200/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-23200/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-23200/spiece.model
Deleting older checkpoint [../../model/train/ptt5-base/checkpoint-21200] due to args.save_total_limit
 94%|█████████▍| 23400/24920 [22:11:21<1:45:16,  4.16s/it] ***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.0213, 'learning_rate': 3.0995106035889074e-06, 'epoch': 2.92}


                                                          
 94%|█████████▍| 23400/24920 [22:12:17<1:45:16,  4.16s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-23400
Configuration saved in ../../model/train/ptt5-base/checkpoint-23400/config.json


{'eval_loss': 0.032613594084978104, 'eval_runtime': 55.5445, 'eval_samples_per_second': 72.518, 'eval_steps_per_second': 9.074, 'epoch': 2.92}


Model weights saved in ../../model/train/ptt5-base/checkpoint-23400/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-23400/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-23400/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-23400/spiece.model
Deleting older checkpoint [../../model/train/ptt5-base/checkpoint-21400] due to args.save_total_limit
 95%|█████████▍| 23600/24920 [22:26:09<1:30:43,  4.12s/it]***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.021, 'learning_rate': 2.691680261011419e-06, 'epoch': 2.95}


                                                          
 95%|█████████▍| 23600/24920 [22:27:04<1:30:43,  4.12s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-23600
Configuration saved in ../../model/train/ptt5-base/checkpoint-23600/config.json


{'eval_loss': 0.03257031366229057, 'eval_runtime': 55.3641, 'eval_samples_per_second': 72.755, 'eval_steps_per_second': 9.103, 'epoch': 2.95}


Model weights saved in ../../model/train/ptt5-base/checkpoint-23600/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-23600/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-23600/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-23600/spiece.model
Deleting older checkpoint [../../model/train/ptt5-base/checkpoint-21600] due to args.save_total_limit
 96%|█████████▌| 23800/24920 [22:40:55<1:17:15,  4.14s/it]***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.0209, 'learning_rate': 2.2838499184339317e-06, 'epoch': 2.99}


                                                          
 96%|█████████▌| 23800/24920 [22:41:50<1:17:15,  4.14s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-23800
Configuration saved in ../../model/train/ptt5-base/checkpoint-23800/config.json


{'eval_loss': 0.03263464197516441, 'eval_runtime': 55.0953, 'eval_samples_per_second': 73.11, 'eval_steps_per_second': 9.148, 'epoch': 2.99}


Model weights saved in ../../model/train/ptt5-base/checkpoint-23800/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-23800/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-23800/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-23800/spiece.model
Deleting older checkpoint [../../model/train/ptt5-base/checkpoint-21800] due to args.save_total_limit
 96%|█████████▋| 24000/24920 [22:55:42<1:03:28,  4.14s/it]***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.0226, 'learning_rate': 1.8760195758564438e-06, 'epoch': 3.02}


                                                          
 96%|█████████▋| 24000/24920 [22:56:37<1:03:28,  4.14s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-24000
Configuration saved in ../../model/train/ptt5-base/checkpoint-24000/config.json


{'eval_loss': 0.03244469314813614, 'eval_runtime': 55.0384, 'eval_samples_per_second': 73.185, 'eval_steps_per_second': 9.157, 'epoch': 3.02}


Model weights saved in ../../model/train/ptt5-base/checkpoint-24000/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-24000/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-24000/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-24000/spiece.model
Deleting older checkpoint [../../model/train/ptt5-base/checkpoint-22000] due to args.save_total_limit
 97%|█████████▋| 24200/24920 [23:10:29<49:23,  4.12s/it]  ***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.0191, 'learning_rate': 1.468189233278956e-06, 'epoch': 3.05}


                                                        
 97%|█████████▋| 24200/24920 [23:11:24<49:23,  4.12s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-24200
Configuration saved in ../../model/train/ptt5-base/checkpoint-24200/config.json


{'eval_loss': 0.03270672634243965, 'eval_runtime': 55.2636, 'eval_samples_per_second': 72.887, 'eval_steps_per_second': 9.12, 'epoch': 3.05}


Model weights saved in ../../model/train/ptt5-base/checkpoint-24200/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-24200/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-24200/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-24200/spiece.model
Deleting older checkpoint [../../model/train/ptt5-base/checkpoint-22200] due to args.save_total_limit
 98%|█████████▊| 24400/24920 [23:25:19<35:59,  4.15s/it]  ***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.0178, 'learning_rate': 1.0603588907014684e-06, 'epoch': 3.08}


                                                        
 98%|█████████▊| 24400/24920 [23:26:14<35:59,  4.15s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-24400
Configuration saved in ../../model/train/ptt5-base/checkpoint-24400/config.json


{'eval_loss': 0.032966915518045425, 'eval_runtime': 55.065, 'eval_samples_per_second': 73.15, 'eval_steps_per_second': 9.153, 'epoch': 3.08}


Model weights saved in ../../model/train/ptt5-base/checkpoint-24400/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-24400/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-24400/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-24400/spiece.model
Deleting older checkpoint [../../model/train/ptt5-base/checkpoint-22400] due to args.save_total_limit
 99%|█████████▊| 24600/24920 [23:40:08<22:04,  4.14s/it]  ***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.0192, 'learning_rate': 6.525285481239805e-07, 'epoch': 3.11}


                                                        
 99%|█████████▊| 24600/24920 [23:41:03<22:04,  4.14s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-24600
Configuration saved in ../../model/train/ptt5-base/checkpoint-24600/config.json


{'eval_loss': 0.033010199666023254, 'eval_runtime': 55.115, 'eval_samples_per_second': 73.084, 'eval_steps_per_second': 9.145, 'epoch': 3.11}


Model weights saved in ../../model/train/ptt5-base/checkpoint-24600/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-24600/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-24600/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-24600/spiece.model
Deleting older checkpoint [../../model/train/ptt5-base/checkpoint-22600] due to args.save_total_limit
100%|█████████▉| 24800/24920 [23:54:54<08:18,  4.15s/it]  ***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.0191, 'learning_rate': 2.4469820554649265e-07, 'epoch': 3.15}


                                                        
100%|█████████▉| 24800/24920 [23:55:50<08:18,  4.15s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-24800
Configuration saved in ../../model/train/ptt5-base/checkpoint-24800/config.json


{'eval_loss': 0.033007100224494934, 'eval_runtime': 55.4151, 'eval_samples_per_second': 72.688, 'eval_steps_per_second': 9.095, 'epoch': 3.15}


Model weights saved in ../../model/train/ptt5-base/checkpoint-24800/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-24800/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-24800/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-24800/spiece.model
Deleting older checkpoint [../../model/train/ptt5-base/checkpoint-22800] due to args.save_total_limit
100%|██████████| 24920/24920 [24:04:11<00:00,  4.15s/it]

Training completed. Do not forget to share your model on huggingface.co/models =)


100%|██████████| 24920/24920 [24:04:11<00:00,  4.15s/it]

{'train_runtime': 86652.3247, 'train_samples_per_second': 18.405, 'train_steps_per_second': 0.288, 'train_loss': 0.022168252383916374, 'epoch': 3.17}
Shutting down background jobs, please wait a moment...
Done!
Waiting for the remaining 15 operations to synchronize with Neptune. Do not kill this process.
All 15 operations synced, thanks for waiting!
Explore the metadata in the Neptune app:
https://app.neptune.ai/marcusborela/IA386DD/e/IAD-108


100%|██████████| 24920/24920 [24:04:11<00:00,  3.48s/it]


In [111]:
model.save_pretrained('../../model/train/ptt5-indir-version')

Configuration saved in ../../model/train/ptt5-indir-version/config.json
Model weights saved in ../../model/train/ptt5-indir-version/pytorch_model.bin


In [112]:
train_metrics

TrainOutput(global_step=24920, training_loss=0.022168252383916374, metrics={'train_runtime': 86652.3247, 'train_samples_per_second': 18.405, 'train_steps_per_second': 0.288, 'train_loss': 0.022168252383916374, 'epoch': 3.17})

In [113]:
num_step_alert = 200
training_args = Seq2SeqTrainingArguments(output_dir=PATH_TRAIN_MODEL_LOCAL)
# Needed to make the Trainer work with an on-the-fly transformation on the dataset
# training_args.remove_unused_columns = False
training_args.output_dir = PATH_TRAIN_MODEL_LOCAL
training_args.warmup_steps=400 # Alterar!
training_args.num_train_epochs=6.0 # Alterar!
training_args.logging_steps=num_step_alert # Alterar!
training_args.save_strategy="steps"
training_args.save_steps=num_step_alert
training_args.save_total_limit=20
training_args.learning_rate=5e-5
training_args.per_device_train_batch_size=16 # t4: 8, a100-40: 32
training_args.gradient_accumulation_steps=4 # t4: 4, a100-40: 2
#training_args._n_gpu = 1
# training_args.bf16 = True # se for usar a100, 3090, 4090 -> usar
training_args.ignore_data_skip = True
training_args.load_best_model_at_end = True
training_args.evaluation_strategy='steps'
training_args.eval_steps=num_step_alert
training_args.do_eval = True
# training_args.optim='adamw_hf' #default
training_args.gradient_checkpointing = False # True
# se precisar economizar gpu
# training_args.optim='adamw_bnb_8bit'
# training_args.gradient_checkpointing = True
training_args.report_to="neptune",
# training_args.report_to = 'None'

PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).


In [114]:
print(training_args)

Seq2SeqTrainingArguments(
_n_gpu=1,
adafactor=False,
adam_beta1=0.9,
adam_beta2=0.999,
adam_epsilon=1e-08,
auto_find_batch_size=False,
bf16=False,
bf16_full_eval=False,
data_seed=None,
dataloader_drop_last=False,
dataloader_num_workers=0,
dataloader_pin_memory=True,
ddp_bucket_cap_mb=None,
ddp_find_unused_parameters=None,
ddp_timeout=1800,
debug=[],
deepspeed=None,
disable_tqdm=False,
do_eval=True,
do_predict=False,
do_train=False,
eval_accumulation_steps=None,
eval_delay=0,
eval_steps=200,
evaluation_strategy=steps,
fp16=False,
fp16_backend=auto,
fp16_full_eval=False,
fp16_opt_level=O1,
fsdp=[],
fsdp_min_num_params=0,
fsdp_transformer_layer_cls_to_wrap=None,
full_determinism=False,
generation_max_length=None,
generation_num_beams=None,
gradient_accumulation_steps=4,
gradient_checkpointing=False,
greater_is_better=None,
group_by_length=False,
half_precision_backend=auto,
hub_model_id=None,
hub_private_repo=False,
hub_strategy=every_save,
hub_token=<HUB_TOKEN>,
ignore_data_skip=True,
in

In [115]:
trainer = trainer_cls(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=valid_dataset,
    tokenizer=tokenizer,
    data_collator=data_collator,
)

In [116]:
train_metrics = trainer.train(resume_from_checkpoint=True)

Loading model from ../../model/train/ptt5-base/checkpoint-24800.
***** Running training *****
  Num examples = 398710
  Num Epochs = 6
  Instantaneous batch size per device = 16
  Total train batch size (w. parallel, distributed & accumulation) = 64
  Gradient Accumulation steps = 4
  Total optimization steps = 37380
  Number of trainable parameters = 222903552
  Continuing training from checkpoint, will skip to saved global_step
  Continuing training from epoch 3
  Continuing training from global step 24800


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

 67%|██████▋   | 25000/37380 [13:44<14:15:30,  4.15s/it]***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.0182, 'learning_rate': 1.6738777717685236e-05, 'epoch': 3.03}


                                                        
 67%|██████▋   | 25000/37380 [14:39<14:15:30,  4.15s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-25000
Configuration saved in ../../model/train/ptt5-base/checkpoint-25000/config.json


{'eval_loss': 0.03435084596276283, 'eval_runtime': 55.4408, 'eval_samples_per_second': 72.654, 'eval_steps_per_second': 9.091, 'epoch': 3.03}


Model weights saved in ../../model/train/ptt5-base/checkpoint-25000/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-25000/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-25000/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-25000/spiece.model
 67%|██████▋   | 25200/37380 [45:52<13:46:00,  4.07s/it]***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.0187, 'learning_rate': 1.6468361276365603e-05, 'epoch': 3.06}


                                                        
 67%|██████▋   | 25200/37380 [46:47<13:46:00,  4.07s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-25200
Configuration saved in ../../model/train/ptt5-base/checkpoint-25200/config.json


{'eval_loss': 0.03584064170718193, 'eval_runtime': 54.993, 'eval_samples_per_second': 73.246, 'eval_steps_per_second': 9.165, 'epoch': 3.06}


Model weights saved in ../../model/train/ptt5-base/checkpoint-25200/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-25200/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-25200/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-25200/spiece.model
 68%|██████▊   | 25400/37380 [1:18:16<13:55:26,  4.18s/it]***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.0189, 'learning_rate': 1.619794483504597e-05, 'epoch': 3.1}


                                                          
 68%|██████▊   | 25400/37380 [1:19:11<13:55:26,  4.18s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-25400
Configuration saved in ../../model/train/ptt5-base/checkpoint-25400/config.json


{'eval_loss': 0.03574325889348984, 'eval_runtime': 55.056, 'eval_samples_per_second': 73.162, 'eval_steps_per_second': 9.154, 'epoch': 3.1}


Model weights saved in ../../model/train/ptt5-base/checkpoint-25400/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-25400/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-25400/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-25400/spiece.model
 68%|██████▊   | 25600/37380 [1:33:05<13:34:23,  4.15s/it]***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.0182, 'learning_rate': 1.592752839372634e-05, 'epoch': 3.13}


                                                          
 68%|██████▊   | 25600/37380 [1:34:00<13:34:23,  4.15s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-25600
Configuration saved in ../../model/train/ptt5-base/checkpoint-25600/config.json


{'eval_loss': 0.0346958227455616, 'eval_runtime': 55.2803, 'eval_samples_per_second': 72.865, 'eval_steps_per_second': 9.117, 'epoch': 3.13}


Model weights saved in ../../model/train/ptt5-base/checkpoint-25600/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-25600/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-25600/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-25600/spiece.model
 69%|██████▉   | 25800/37380 [1:47:58<13:09:18,  4.09s/it]***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.0216, 'learning_rate': 1.565711195240671e-05, 'epoch': 3.16}


                                                          
 69%|██████▉   | 25800/37380 [1:48:52<13:09:18,  4.09s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-25800
Configuration saved in ../../model/train/ptt5-base/checkpoint-25800/config.json


{'eval_loss': 0.03292287513613701, 'eval_runtime': 54.461, 'eval_samples_per_second': 73.961, 'eval_steps_per_second': 9.254, 'epoch': 3.16}


Model weights saved in ../../model/train/ptt5-base/checkpoint-25800/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-25800/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-25800/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-25800/spiece.model
 70%|██████▉   | 26000/37380 [2:02:38<13:04:46,  4.14s/it]***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.0225, 'learning_rate': 1.5386695511087072e-05, 'epoch': 3.19}


                                                          
 70%|██████▉   | 26000/37380 [2:03:33<13:04:46,  4.14s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-26000
Configuration saved in ../../model/train/ptt5-base/checkpoint-26000/config.json


{'eval_loss': 0.03302432596683502, 'eval_runtime': 54.8012, 'eval_samples_per_second': 73.502, 'eval_steps_per_second': 9.197, 'epoch': 3.19}


Model weights saved in ../../model/train/ptt5-base/checkpoint-26000/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-26000/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-26000/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-26000/spiece.model
 70%|███████   | 26200/37380 [2:17:16<12:42:46,  4.09s/it]***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.019, 'learning_rate': 1.5116279069767441e-05, 'epoch': 3.22}


                                                          
 70%|███████   | 26200/37380 [2:18:11<12:42:46,  4.09s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-26200
Configuration saved in ../../model/train/ptt5-base/checkpoint-26200/config.json


{'eval_loss': 0.034505702555179596, 'eval_runtime': 54.7859, 'eval_samples_per_second': 73.523, 'eval_steps_per_second': 9.199, 'epoch': 3.22}


Model weights saved in ../../model/train/ptt5-base/checkpoint-26200/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-26200/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-26200/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-26200/spiece.model
 71%|███████   | 26400/37380 [2:32:00<12:47:09,  4.19s/it]***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.0221, 'learning_rate': 1.484586262844781e-05, 'epoch': 3.26}


                                                          
 71%|███████   | 26400/37380 [2:32:55<12:47:09,  4.19s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-26400
Configuration saved in ../../model/train/ptt5-base/checkpoint-26400/config.json


{'eval_loss': 0.034426845610141754, 'eval_runtime': 55.5634, 'eval_samples_per_second': 72.494, 'eval_steps_per_second': 9.071, 'epoch': 3.26}


Model weights saved in ../../model/train/ptt5-base/checkpoint-26400/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-26400/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-26400/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-26400/spiece.model
 71%|███████   | 26600/37380 [2:46:55<12:31:05,  4.18s/it]***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.0208, 'learning_rate': 1.4575446187128177e-05, 'epoch': 3.29}


                                                          
 71%|███████   | 26600/37380 [2:47:50<12:31:05,  4.18s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-26600
Configuration saved in ../../model/train/ptt5-base/checkpoint-26600/config.json


{'eval_loss': 0.034394122660160065, 'eval_runtime': 55.3994, 'eval_samples_per_second': 72.708, 'eval_steps_per_second': 9.098, 'epoch': 3.29}


Model weights saved in ../../model/train/ptt5-base/checkpoint-26600/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-26600/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-26600/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-26600/spiece.model
 72%|███████▏  | 26800/37380 [3:01:49<12:15:05,  4.17s/it]***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.02, 'learning_rate': 1.4305029745808546e-05, 'epoch': 3.32}


                                                          
 72%|███████▏  | 26800/37380 [3:02:45<12:15:05,  4.17s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-26800
Configuration saved in ../../model/train/ptt5-base/checkpoint-26800/config.json


{'eval_loss': 0.0343005396425724, 'eval_runtime': 55.5902, 'eval_samples_per_second': 72.459, 'eval_steps_per_second': 9.066, 'epoch': 3.32}


Model weights saved in ../../model/train/ptt5-base/checkpoint-26800/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-26800/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-26800/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-26800/spiece.model
 72%|███████▏  | 27000/37380 [3:16:44<12:03:10,  4.18s/it]***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.0212, 'learning_rate': 1.4034613304488914e-05, 'epoch': 3.35}


                                                          
 72%|███████▏  | 27000/37380 [3:17:39<12:03:10,  4.18s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-27000
Configuration saved in ../../model/train/ptt5-base/checkpoint-27000/config.json


{'eval_loss': 0.03411152958869934, 'eval_runtime': 55.5097, 'eval_samples_per_second': 72.564, 'eval_steps_per_second': 9.079, 'epoch': 3.35}


Model weights saved in ../../model/train/ptt5-base/checkpoint-27000/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-27000/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-27000/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-27000/spiece.model
Deleting older checkpoint [../../model/train/ptt5-base/checkpoint-23000] due to args.save_total_limit
 73%|███████▎  | 27200/37380 [3:31:39<11:59:35,  4.24s/it]***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.0185, 'learning_rate': 1.3764196863169283e-05, 'epoch': 3.39}


                                                          
 73%|███████▎  | 27200/37380 [3:32:35<11:59:35,  4.24s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-27200
Configuration saved in ../../model/train/ptt5-base/checkpoint-27200/config.json


{'eval_loss': 0.03429172933101654, 'eval_runtime': 55.2816, 'eval_samples_per_second': 72.863, 'eval_steps_per_second': 9.117, 'epoch': 3.39}


Model weights saved in ../../model/train/ptt5-base/checkpoint-27200/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-27200/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-27200/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-27200/spiece.model
Deleting older checkpoint [../../model/train/ptt5-base/checkpoint-23200] due to args.save_total_limit
 73%|███████▎  | 27400/37380 [3:46:33<11:31:44,  4.16s/it]***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.0211, 'learning_rate': 1.349378042184965e-05, 'epoch': 3.42}


                                                          
 73%|███████▎  | 27400/37380 [3:47:29<11:31:44,  4.16s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-27400
Configuration saved in ../../model/train/ptt5-base/checkpoint-27400/config.json


{'eval_loss': 0.0341457761824131, 'eval_runtime': 55.3048, 'eval_samples_per_second': 72.833, 'eval_steps_per_second': 9.113, 'epoch': 3.42}


Model weights saved in ../../model/train/ptt5-base/checkpoint-27400/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-27400/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-27400/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-27400/spiece.model
Deleting older checkpoint [../../model/train/ptt5-base/checkpoint-23400] due to args.save_total_limit
 74%|███████▍  | 27600/37380 [4:01:27<11:20:32,  4.18s/it]***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.0208, 'learning_rate': 1.3223363980530019e-05, 'epoch': 3.45}


                                                          
 74%|███████▍  | 27600/37380 [4:02:22<11:20:32,  4.18s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-27600
Configuration saved in ../../model/train/ptt5-base/checkpoint-27600/config.json


{'eval_loss': 0.033053621649742126, 'eval_runtime': 55.5817, 'eval_samples_per_second': 72.47, 'eval_steps_per_second': 9.068, 'epoch': 3.45}


Model weights saved in ../../model/train/ptt5-base/checkpoint-27600/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-27600/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-27600/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-27600/spiece.model
Deleting older checkpoint [../../model/train/ptt5-base/checkpoint-23600] due to args.save_total_limit
 74%|███████▍  | 27800/37380 [4:16:21<11:05:39,  4.17s/it]***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.0195, 'learning_rate': 1.2952947539210383e-05, 'epoch': 3.48}


                                                          
 74%|███████▍  | 27800/37380 [4:17:16<11:05:39,  4.17s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-27800
Configuration saved in ../../model/train/ptt5-base/checkpoint-27800/config.json


{'eval_loss': 0.033555351197719574, 'eval_runtime': 55.3398, 'eval_samples_per_second': 72.787, 'eval_steps_per_second': 9.107, 'epoch': 3.48}


Model weights saved in ../../model/train/ptt5-base/checkpoint-27800/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-27800/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-27800/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-27800/spiece.model
Deleting older checkpoint [../../model/train/ptt5-base/checkpoint-23800] due to args.save_total_limit
 75%|███████▍  | 28000/37380 [4:31:14<10:53:19,  4.18s/it]***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.0196, 'learning_rate': 1.2682531097890751e-05, 'epoch': 3.51}


                                                          
 75%|███████▍  | 28000/37380 [4:32:09<10:53:19,  4.18s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-28000
Configuration saved in ../../model/train/ptt5-base/checkpoint-28000/config.json


{'eval_loss': 0.03426952287554741, 'eval_runtime': 55.2655, 'eval_samples_per_second': 72.885, 'eval_steps_per_second': 9.12, 'epoch': 3.51}


Model weights saved in ../../model/train/ptt5-base/checkpoint-28000/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-28000/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-28000/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-28000/spiece.model
Deleting older checkpoint [../../model/train/ptt5-base/checkpoint-24000] due to args.save_total_limit
 75%|███████▌  | 28200/37380 [4:46:10<10:47:58,  4.24s/it]***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.0205, 'learning_rate': 1.241211465657112e-05, 'epoch': 3.55}


                                                          
 75%|███████▌  | 28200/37380 [4:47:06<10:47:58,  4.24s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-28200
Configuration saved in ../../model/train/ptt5-base/checkpoint-28200/config.json


{'eval_loss': 0.03372732177376747, 'eval_runtime': 55.4543, 'eval_samples_per_second': 72.636, 'eval_steps_per_second': 9.089, 'epoch': 3.55}


Model weights saved in ../../model/train/ptt5-base/checkpoint-28200/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-28200/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-28200/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-28200/spiece.model
Deleting older checkpoint [../../model/train/ptt5-base/checkpoint-24200] due to args.save_total_limit
 76%|███████▌  | 28400/37380 [5:01:04<10:25:33,  4.18s/it]***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.0207, 'learning_rate': 1.2141698215251488e-05, 'epoch': 3.58}


                                                          
 76%|███████▌  | 28400/37380 [5:01:59<10:25:33,  4.18s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-28400
Configuration saved in ../../model/train/ptt5-base/checkpoint-28400/config.json


{'eval_loss': 0.03371589258313179, 'eval_runtime': 55.2955, 'eval_samples_per_second': 72.845, 'eval_steps_per_second': 9.115, 'epoch': 3.58}


Model weights saved in ../../model/train/ptt5-base/checkpoint-28400/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-28400/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-28400/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-28400/spiece.model
Deleting older checkpoint [../../model/train/ptt5-base/checkpoint-24400] due to args.save_total_limit
 77%|███████▋  | 28600/37380 [5:15:58<10:12:48,  4.19s/it]***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.0195, 'learning_rate': 1.1871281773931855e-05, 'epoch': 3.61}


                                                          
 77%|███████▋  | 28600/37380 [5:16:53<10:12:48,  4.19s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-28600
Configuration saved in ../../model/train/ptt5-base/checkpoint-28600/config.json


{'eval_loss': 0.03311342000961304, 'eval_runtime': 55.415, 'eval_samples_per_second': 72.688, 'eval_steps_per_second': 9.095, 'epoch': 3.61}


Model weights saved in ../../model/train/ptt5-base/checkpoint-28600/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-28600/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-28600/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-28600/spiece.model
Deleting older checkpoint [../../model/train/ptt5-base/checkpoint-24600] due to args.save_total_limit
 77%|███████▋  | 28800/37380 [5:30:52<9:59:55,  4.20s/it] ***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.0199, 'learning_rate': 1.1600865332612224e-05, 'epoch': 3.64}


                                                         
 77%|███████▋  | 28800/37380 [5:31:47<9:59:55,  4.20s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-28800
Configuration saved in ../../model/train/ptt5-base/checkpoint-28800/config.json


{'eval_loss': 0.032547686249017715, 'eval_runtime': 55.4309, 'eval_samples_per_second': 72.667, 'eval_steps_per_second': 9.092, 'epoch': 3.64}


Model weights saved in ../../model/train/ptt5-base/checkpoint-28800/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-28800/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-28800/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-28800/spiece.model
Deleting older checkpoint [../../model/train/ptt5-base/checkpoint-24800] due to args.save_total_limit
 78%|███████▊  | 29000/37380 [5:45:48<9:45:45,  4.19s/it] ***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.0196, 'learning_rate': 1.1330448891292591e-05, 'epoch': 3.67}


                                                         
 78%|███████▊  | 29000/37380 [5:46:43<9:45:45,  4.19s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-29000
Configuration saved in ../../model/train/ptt5-base/checkpoint-29000/config.json


{'eval_loss': 0.03298616781830788, 'eval_runtime': 55.4767, 'eval_samples_per_second': 72.607, 'eval_steps_per_second': 9.085, 'epoch': 3.67}


Model weights saved in ../../model/train/ptt5-base/checkpoint-29000/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-29000/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-29000/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-29000/spiece.model
Deleting older checkpoint [../../model/train/ptt5-base/checkpoint-25000] due to args.save_total_limit
 78%|███████▊  | 29200/37380 [6:00:43<9:27:26,  4.16s/it] ***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.0214, 'learning_rate': 1.1060032449972958e-05, 'epoch': 3.71}


                                                         
 78%|███████▊  | 29200/37380 [6:01:38<9:27:26,  4.16s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-29200
Configuration saved in ../../model/train/ptt5-base/checkpoint-29200/config.json


{'eval_loss': 0.03255263715982437, 'eval_runtime': 55.3995, 'eval_samples_per_second': 72.708, 'eval_steps_per_second': 9.098, 'epoch': 3.71}


Model weights saved in ../../model/train/ptt5-base/checkpoint-29200/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-29200/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-29200/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-29200/spiece.model
Deleting older checkpoint [../../model/train/ptt5-base/checkpoint-25200] due to args.save_total_limit
 79%|███████▊  | 29400/37380 [6:15:36<9:18:09,  4.20s/it] ***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.0201, 'learning_rate': 1.0789616008653325e-05, 'epoch': 3.74}


                                                         
 79%|███████▊  | 29400/37380 [6:16:31<9:18:09,  4.20s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-29400
Configuration saved in ../../model/train/ptt5-base/checkpoint-29400/config.json


{'eval_loss': 0.032849110662937164, 'eval_runtime': 55.506, 'eval_samples_per_second': 72.569, 'eval_steps_per_second': 9.08, 'epoch': 3.74}


Model weights saved in ../../model/train/ptt5-base/checkpoint-29400/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-29400/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-29400/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-29400/spiece.model
Deleting older checkpoint [../../model/train/ptt5-base/checkpoint-25400] due to args.save_total_limit
 79%|███████▉  | 29600/37380 [6:30:33<9:03:23,  4.19s/it] ***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.0196, 'learning_rate': 1.0519199567333694e-05, 'epoch': 3.77}


                                                         
 79%|███████▉  | 29600/37380 [6:31:29<9:03:23,  4.19s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-29600
Configuration saved in ../../model/train/ptt5-base/checkpoint-29600/config.json


{'eval_loss': 0.033511437475681305, 'eval_runtime': 55.4356, 'eval_samples_per_second': 72.661, 'eval_steps_per_second': 9.092, 'epoch': 3.77}


Model weights saved in ../../model/train/ptt5-base/checkpoint-29600/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-29600/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-29600/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-29600/spiece.model
Deleting older checkpoint [../../model/train/ptt5-base/checkpoint-25600] due to args.save_total_limit
 80%|███████▉  | 29800/37380 [6:45:26<8:47:55,  4.18s/it] ***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.0205, 'learning_rate': 1.0248783126014062e-05, 'epoch': 3.8}


                                                         
 80%|███████▉  | 29800/37380 [6:46:22<8:47:55,  4.18s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-29800
Configuration saved in ../../model/train/ptt5-base/checkpoint-29800/config.json


{'eval_loss': 0.033625781536102295, 'eval_runtime': 55.5439, 'eval_samples_per_second': 72.519, 'eval_steps_per_second': 9.074, 'epoch': 3.8}


Model weights saved in ../../model/train/ptt5-base/checkpoint-29800/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-29800/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-29800/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-29800/spiece.model
Deleting older checkpoint [../../model/train/ptt5-base/checkpoint-25800] due to args.save_total_limit
 80%|████████  | 30000/37380 [7:00:21<8:29:38,  4.14s/it] ***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.0201, 'learning_rate': 9.97836668469443e-06, 'epoch': 3.83}


                                                         
 80%|████████  | 30000/37380 [7:01:16<8:29:38,  4.14s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-30000
Configuration saved in ../../model/train/ptt5-base/checkpoint-30000/config.json


{'eval_loss': 0.03320852667093277, 'eval_runtime': 55.3709, 'eval_samples_per_second': 72.746, 'eval_steps_per_second': 9.102, 'epoch': 3.83}


Model weights saved in ../../model/train/ptt5-base/checkpoint-30000/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-30000/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-30000/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-30000/spiece.model
Deleting older checkpoint [../../model/train/ptt5-base/checkpoint-26000] due to args.save_total_limit
 81%|████████  | 30200/37380 [7:15:15<8:18:25,  4.17s/it] ***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.0221, 'learning_rate': 9.707950243374798e-06, 'epoch': 3.87}


                                                         
 81%|████████  | 30200/37380 [7:16:10<8:18:25,  4.17s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-30200
Configuration saved in ../../model/train/ptt5-base/checkpoint-30200/config.json


{'eval_loss': 0.032312966883182526, 'eval_runtime': 55.409, 'eval_samples_per_second': 72.696, 'eval_steps_per_second': 9.096, 'epoch': 3.87}


Model weights saved in ../../model/train/ptt5-base/checkpoint-30200/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-30200/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-30200/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-30200/spiece.model
Deleting older checkpoint [../../model/train/ptt5-base/checkpoint-26200] due to args.save_total_limit
 81%|████████▏ | 30400/37380 [7:30:10<8:09:54,  4.21s/it] ***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.0202, 'learning_rate': 9.437533802055165e-06, 'epoch': 3.9}


                                                         
 81%|████████▏ | 30400/37380 [7:31:05<8:09:54,  4.21s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-30400
Configuration saved in ../../model/train/ptt5-base/checkpoint-30400/config.json


{'eval_loss': 0.03236682340502739, 'eval_runtime': 55.4142, 'eval_samples_per_second': 72.689, 'eval_steps_per_second': 9.095, 'epoch': 3.9}


Model weights saved in ../../model/train/ptt5-base/checkpoint-30400/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-30400/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-30400/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-30400/spiece.model
Deleting older checkpoint [../../model/train/ptt5-base/checkpoint-26400] due to args.save_total_limit
 82%|████████▏ | 30600/37380 [7:45:05<7:53:38,  4.19s/it] ***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.0207, 'learning_rate': 9.167117360735534e-06, 'epoch': 3.93}


                                                         
 82%|████████▏ | 30600/37380 [7:46:00<7:53:38,  4.19s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-30600
Configuration saved in ../../model/train/ptt5-base/checkpoint-30600/config.json


{'eval_loss': 0.03250203654170036, 'eval_runtime': 55.3833, 'eval_samples_per_second': 72.73, 'eval_steps_per_second': 9.1, 'epoch': 3.93}


Model weights saved in ../../model/train/ptt5-base/checkpoint-30600/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-30600/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-30600/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-30600/spiece.model
Deleting older checkpoint [../../model/train/ptt5-base/checkpoint-26600] due to args.save_total_limit
 82%|████████▏ | 30800/37380 [8:00:04<7:47:41,  4.26s/it] ***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.0209, 'learning_rate': 8.896700919415901e-06, 'epoch': 3.96}


                                                         
 82%|████████▏ | 30800/37380 [8:00:59<7:47:41,  4.26s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-30800
Configuration saved in ../../model/train/ptt5-base/checkpoint-30800/config.json


{'eval_loss': 0.03224475309252739, 'eval_runtime': 55.683, 'eval_samples_per_second': 72.338, 'eval_steps_per_second': 9.051, 'epoch': 3.96}


Model weights saved in ../../model/train/ptt5-base/checkpoint-30800/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-30800/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-30800/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-30800/spiece.model
Deleting older checkpoint [../../model/train/ptt5-base/checkpoint-26800] due to args.save_total_limit
 83%|████████▎ | 31000/37380 [8:15:00<7:17:01,  4.11s/it] ***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.0187, 'learning_rate': 8.62628447809627e-06, 'epoch': 4.0}


                                                         
 83%|████████▎ | 31000/37380 [8:15:56<7:17:01,  4.11s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-31000
Configuration saved in ../../model/train/ptt5-base/checkpoint-31000/config.json


{'eval_loss': 0.033378809690475464, 'eval_runtime': 55.3686, 'eval_samples_per_second': 72.749, 'eval_steps_per_second': 9.103, 'epoch': 4.0}


Model weights saved in ../../model/train/ptt5-base/checkpoint-31000/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-31000/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-31000/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-31000/spiece.model
Deleting older checkpoint [../../model/train/ptt5-base/checkpoint-27000] due to args.save_total_limit
 83%|████████▎ | 31200/37380 [8:29:42<7:00:31,  4.08s/it] ***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.0162, 'learning_rate': 8.355868036776636e-06, 'epoch': 4.03}


                                                         
 83%|████████▎ | 31200/37380 [8:30:36<7:00:31,  4.08s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-31200
Configuration saved in ../../model/train/ptt5-base/checkpoint-31200/config.json


{'eval_loss': 0.03468750789761543, 'eval_runtime': 54.4594, 'eval_samples_per_second': 73.963, 'eval_steps_per_second': 9.255, 'epoch': 4.03}


Model weights saved in ../../model/train/ptt5-base/checkpoint-31200/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-31200/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-31200/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-31200/spiece.model
Deleting older checkpoint [../../model/train/ptt5-base/checkpoint-27200] due to args.save_total_limit
 84%|████████▍ | 31400/37380 [8:47:12<6:48:56,  4.10s/it]  

{'loss': 0.0165, 'learning_rate': 8.085451595457005e-06, 'epoch': 4.06}


***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8
                                                         
 84%|████████▍ | 31400/37380 [8:48:08<6:48:56,  4.10s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-31400
Configuration saved in ../../model/train/ptt5-base/checkpoint-31400/config.json


{'eval_loss': 0.035022228956222534, 'eval_runtime': 55.3112, 'eval_samples_per_second': 72.824, 'eval_steps_per_second': 9.112, 'epoch': 4.06}


Model weights saved in ../../model/train/ptt5-base/checkpoint-31400/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-31400/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-31400/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-31400/spiece.model
Deleting older checkpoint [../../model/train/ptt5-base/checkpoint-27400] due to args.save_total_limit
 85%|████████▍ | 31600/37380 [9:04:12<6:40:59,  4.16s/it]  

{'loss': 0.0167, 'learning_rate': 7.815035154137372e-06, 'epoch': 4.09}


***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8
                                                         
 85%|████████▍ | 31600/37380 [9:05:08<6:40:59,  4.16s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-31600
Configuration saved in ../../model/train/ptt5-base/checkpoint-31600/config.json


{'eval_loss': 0.03544702008366585, 'eval_runtime': 55.0779, 'eval_samples_per_second': 73.133, 'eval_steps_per_second': 9.151, 'epoch': 4.09}


Model weights saved in ../../model/train/ptt5-base/checkpoint-31600/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-31600/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-31600/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-31600/spiece.model
Deleting older checkpoint [../../model/train/ptt5-base/checkpoint-27600] due to args.save_total_limit
 85%|████████▌ | 31800/37380 [9:19:06<6:25:31,  4.15s/it] ***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.0157, 'learning_rate': 7.54461871281774e-06, 'epoch': 4.12}


                                                         
 85%|████████▌ | 31800/37380 [9:20:02<6:25:31,  4.15s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-31800
Configuration saved in ../../model/train/ptt5-base/checkpoint-31800/config.json


{'eval_loss': 0.03496887907385826, 'eval_runtime': 55.417, 'eval_samples_per_second': 72.685, 'eval_steps_per_second': 9.095, 'epoch': 4.12}


Model weights saved in ../../model/train/ptt5-base/checkpoint-31800/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-31800/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-31800/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-31800/spiece.model
Deleting older checkpoint [../../model/train/ptt5-base/checkpoint-27800] due to args.save_total_limit
 86%|████████▌ | 32000/37380 [9:33:57<6:12:21,  4.15s/it] ***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.0165, 'learning_rate': 7.274202271498108e-06, 'epoch': 4.16}


                                                         
 86%|████████▌ | 32000/37380 [9:34:53<6:12:21,  4.15s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-32000
Configuration saved in ../../model/train/ptt5-base/checkpoint-32000/config.json


{'eval_loss': 0.03463713452219963, 'eval_runtime': 55.1023, 'eval_samples_per_second': 73.1, 'eval_steps_per_second': 9.147, 'epoch': 4.16}


Model weights saved in ../../model/train/ptt5-base/checkpoint-32000/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-32000/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-32000/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-32000/spiece.model
Deleting older checkpoint [../../model/train/ptt5-base/checkpoint-28000] due to args.save_total_limit
 86%|████████▌ | 32200/37380 [9:48:51<6:01:21,  4.19s/it] ***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.0169, 'learning_rate': 7.003785830178475e-06, 'epoch': 4.19}


                                                         
 86%|████████▌ | 32200/37380 [9:49:46<6:01:21,  4.19s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-32200
Configuration saved in ../../model/train/ptt5-base/checkpoint-32200/config.json


{'eval_loss': 0.03459534794092178, 'eval_runtime': 55.415, 'eval_samples_per_second': 72.688, 'eval_steps_per_second': 9.095, 'epoch': 4.19}


Model weights saved in ../../model/train/ptt5-base/checkpoint-32200/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-32200/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-32200/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-32200/spiece.model
Deleting older checkpoint [../../model/train/ptt5-base/checkpoint-28200] due to args.save_total_limit
 87%|████████▋ | 32400/37380 [10:03:44<5:46:01,  4.17s/it]***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.016, 'learning_rate': 6.733369388858843e-06, 'epoch': 4.22}


                                                          
 87%|████████▋ | 32400/37380 [10:04:39<5:46:01,  4.17s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-32400
Configuration saved in ../../model/train/ptt5-base/checkpoint-32400/config.json


{'eval_loss': 0.035855039954185486, 'eval_runtime': 55.3782, 'eval_samples_per_second': 72.736, 'eval_steps_per_second': 9.101, 'epoch': 4.22}


Model weights saved in ../../model/train/ptt5-base/checkpoint-32400/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-32400/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-32400/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-32400/spiece.model
Deleting older checkpoint [../../model/train/ptt5-base/checkpoint-28400] due to args.save_total_limit
 87%|████████▋ | 32600/37380 [10:18:35<5:31:12,  4.16s/it] ***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.0167, 'learning_rate': 6.4629529475392105e-06, 'epoch': 4.25}


                                                          
 87%|████████▋ | 32600/37380 [10:19:31<5:31:12,  4.16s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-32600
Configuration saved in ../../model/train/ptt5-base/checkpoint-32600/config.json


{'eval_loss': 0.035321012139320374, 'eval_runtime': 55.3158, 'eval_samples_per_second': 72.818, 'eval_steps_per_second': 9.111, 'epoch': 4.25}


Model weights saved in ../../model/train/ptt5-base/checkpoint-32600/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-32600/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-32600/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-32600/spiece.model
Deleting older checkpoint [../../model/train/ptt5-base/checkpoint-28600] due to args.save_total_limit
 88%|████████▊ | 32800/37380 [10:33:28<5:19:12,  4.18s/it] ***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.0147, 'learning_rate': 6.1925365062195785e-06, 'epoch': 4.28}


                                                          
 88%|████████▊ | 32800/37380 [10:34:23<5:19:12,  4.18s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-32800
Configuration saved in ../../model/train/ptt5-base/checkpoint-32800/config.json


{'eval_loss': 0.03554528206586838, 'eval_runtime': 55.3452, 'eval_samples_per_second': 72.78, 'eval_steps_per_second': 9.106, 'epoch': 4.28}


Model weights saved in ../../model/train/ptt5-base/checkpoint-32800/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-32800/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-32800/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-32800/spiece.model
Deleting older checkpoint [../../model/train/ptt5-base/checkpoint-28800] due to args.save_total_limit
 88%|████████▊ | 33000/37380 [10:48:18<5:03:08,  4.15s/it] ***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.0181, 'learning_rate': 5.922120064899947e-06, 'epoch': 4.32}


                                                          
 88%|████████▊ | 33000/37380 [10:49:14<5:03:08,  4.15s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-33000
Configuration saved in ../../model/train/ptt5-base/checkpoint-33000/config.json


{'eval_loss': 0.0350835956633091, 'eval_runtime': 55.2393, 'eval_samples_per_second': 72.919, 'eval_steps_per_second': 9.124, 'epoch': 4.32}


Model weights saved in ../../model/train/ptt5-base/checkpoint-33000/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-33000/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-33000/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-33000/spiece.model
Deleting older checkpoint [../../model/train/ptt5-base/checkpoint-29000] due to args.save_total_limit
 89%|████████▉ | 33200/37380 [11:03:12<4:49:35,  4.16s/it] ***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.0158, 'learning_rate': 5.651703623580314e-06, 'epoch': 4.35}


                                                          
 89%|████████▉ | 33200/37380 [11:04:07<4:49:35,  4.16s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-33200
Configuration saved in ../../model/train/ptt5-base/checkpoint-33200/config.json


{'eval_loss': 0.034998662769794464, 'eval_runtime': 55.3424, 'eval_samples_per_second': 72.783, 'eval_steps_per_second': 9.107, 'epoch': 4.35}


Model weights saved in ../../model/train/ptt5-base/checkpoint-33200/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-33200/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-33200/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-33200/spiece.model
Deleting older checkpoint [../../model/train/ptt5-base/checkpoint-29200] due to args.save_total_limit
 89%|████████▉ | 33400/37380 [11:18:02<4:39:02,  4.21s/it] ***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.0178, 'learning_rate': 5.381287182260682e-06, 'epoch': 4.38}


                                                          
 89%|████████▉ | 33400/37380 [11:18:57<4:39:02,  4.21s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-33400
Configuration saved in ../../model/train/ptt5-base/checkpoint-33400/config.json


{'eval_loss': 0.03397810459136963, 'eval_runtime': 55.5399, 'eval_samples_per_second': 72.524, 'eval_steps_per_second': 9.075, 'epoch': 4.38}


Model weights saved in ../../model/train/ptt5-base/checkpoint-33400/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-33400/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-33400/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-33400/spiece.model
Deleting older checkpoint [../../model/train/ptt5-base/checkpoint-29400] due to args.save_total_limit
 90%|████████▉ | 33600/37380 [11:32:50<4:22:51,  4.17s/it] ***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.0175, 'learning_rate': 5.11087074094105e-06, 'epoch': 4.41}


                                                          
 90%|████████▉ | 33600/37380 [11:33:45<4:22:51,  4.17s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-33600
Configuration saved in ../../model/train/ptt5-base/checkpoint-33600/config.json


{'eval_loss': 0.03373459726572037, 'eval_runtime': 55.1911, 'eval_samples_per_second': 72.983, 'eval_steps_per_second': 9.132, 'epoch': 4.41}


Model weights saved in ../../model/train/ptt5-base/checkpoint-33600/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-33600/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-33600/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-33600/spiece.model
Deleting older checkpoint [../../model/train/ptt5-base/checkpoint-29600] due to args.save_total_limit
 90%|█████████ | 33800/37380 [11:47:41<4:06:06,  4.12s/it] ***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.0175, 'learning_rate': 4.840454299621417e-06, 'epoch': 4.44}


                                                          
 90%|█████████ | 33800/37380 [11:48:37<4:06:06,  4.12s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-33800
Configuration saved in ../../model/train/ptt5-base/checkpoint-33800/config.json


{'eval_loss': 0.03356345742940903, 'eval_runtime': 55.163, 'eval_samples_per_second': 73.02, 'eval_steps_per_second': 9.137, 'epoch': 4.44}


Model weights saved in ../../model/train/ptt5-base/checkpoint-33800/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-33800/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-33800/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-33800/spiece.model
Deleting older checkpoint [../../model/train/ptt5-base/checkpoint-29800] due to args.save_total_limit
 91%|█████████ | 34000/37380 [12:02:33<3:56:01,  4.19s/it] ***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.0152, 'learning_rate': 4.570037858301785e-06, 'epoch': 4.48}


                                                          
 91%|█████████ | 34000/37380 [12:03:28<3:56:01,  4.19s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-34000
Configuration saved in ../../model/train/ptt5-base/checkpoint-34000/config.json


{'eval_loss': 0.03426697477698326, 'eval_runtime': 55.1776, 'eval_samples_per_second': 73.001, 'eval_steps_per_second': 9.134, 'epoch': 4.48}


Model weights saved in ../../model/train/ptt5-base/checkpoint-34000/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-34000/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-34000/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-34000/spiece.model
Deleting older checkpoint [../../model/train/ptt5-base/checkpoint-30000] due to args.save_total_limit
 91%|█████████▏| 34200/37380 [12:17:26<3:40:49,  4.17s/it] ***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.0157, 'learning_rate': 4.2996214169821526e-06, 'epoch': 4.51}


                                                          
 91%|█████████▏| 34200/37380 [12:18:21<3:40:49,  4.17s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-34200
Configuration saved in ../../model/train/ptt5-base/checkpoint-34200/config.json


{'eval_loss': 0.034476831555366516, 'eval_runtime': 55.094, 'eval_samples_per_second': 73.111, 'eval_steps_per_second': 9.148, 'epoch': 4.51}


Model weights saved in ../../model/train/ptt5-base/checkpoint-34200/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-34200/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-34200/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-34200/spiece.model
Deleting older checkpoint [../../model/train/ptt5-base/checkpoint-30200] due to args.save_total_limit
 92%|█████████▏| 34400/37380 [12:32:18<3:25:26,  4.14s/it] ***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.0183, 'learning_rate': 4.029204975662521e-06, 'epoch': 4.54}


                                                          
 92%|█████████▏| 34400/37380 [12:33:14<3:25:26,  4.14s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-34400
Configuration saved in ../../model/train/ptt5-base/checkpoint-34400/config.json


{'eval_loss': 0.03425212576985359, 'eval_runtime': 55.5392, 'eval_samples_per_second': 72.525, 'eval_steps_per_second': 9.075, 'epoch': 4.54}


Model weights saved in ../../model/train/ptt5-base/checkpoint-34400/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-34400/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-34400/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-34400/spiece.model
Deleting older checkpoint [../../model/train/ptt5-base/checkpoint-30400] due to args.save_total_limit
 93%|█████████▎| 34600/37380 [12:47:09<3:12:38,  4.16s/it] ***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.0151, 'learning_rate': 3.758788534342888e-06, 'epoch': 4.57}


                                                          
 93%|█████████▎| 34600/37380 [12:48:05<3:12:38,  4.16s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-34600
Configuration saved in ../../model/train/ptt5-base/checkpoint-34600/config.json


{'eval_loss': 0.03455273061990738, 'eval_runtime': 55.3052, 'eval_samples_per_second': 72.832, 'eval_steps_per_second': 9.113, 'epoch': 4.57}


Model weights saved in ../../model/train/ptt5-base/checkpoint-34600/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-34600/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-34600/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-34600/spiece.model
Deleting older checkpoint [../../model/train/ptt5-base/checkpoint-30600] due to args.save_total_limit
 93%|█████████▎| 34800/37380 [13:01:59<2:58:17,  4.15s/it] ***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.0158, 'learning_rate': 3.488372093023256e-06, 'epoch': 4.61}


                                                          
 93%|█████████▎| 34800/37380 [13:02:54<2:58:17,  4.15s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-34800
Configuration saved in ../../model/train/ptt5-base/checkpoint-34800/config.json


{'eval_loss': 0.03455011174082756, 'eval_runtime': 55.5137, 'eval_samples_per_second': 72.559, 'eval_steps_per_second': 9.079, 'epoch': 4.61}


Model weights saved in ../../model/train/ptt5-base/checkpoint-34800/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-34800/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-34800/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-34800/spiece.model
Deleting older checkpoint [../../model/train/ptt5-base/checkpoint-30800] due to args.save_total_limit
 94%|█████████▎| 35000/37380 [13:16:46<2:44:15,  4.14s/it] ***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.0184, 'learning_rate': 3.217955651703624e-06, 'epoch': 4.64}


                                                          
 94%|█████████▎| 35000/37380 [13:17:42<2:44:15,  4.14s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-35000
Configuration saved in ../../model/train/ptt5-base/checkpoint-35000/config.json


{'eval_loss': 0.03427380695939064, 'eval_runtime': 55.5387, 'eval_samples_per_second': 72.526, 'eval_steps_per_second': 9.075, 'epoch': 4.64}


Model weights saved in ../../model/train/ptt5-base/checkpoint-35000/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-35000/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-35000/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-35000/spiece.model
Deleting older checkpoint [../../model/train/ptt5-base/checkpoint-31000] due to args.save_total_limit
 94%|█████████▍| 35200/37380 [13:31:38<2:30:40,  4.15s/it] ***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.0153, 'learning_rate': 2.9475392103839913e-06, 'epoch': 4.67}


                                                          
 94%|█████████▍| 35200/37380 [13:32:33<2:30:40,  4.15s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-35200
Configuration saved in ../../model/train/ptt5-base/checkpoint-35200/config.json


{'eval_loss': 0.034559741616249084, 'eval_runtime': 55.4916, 'eval_samples_per_second': 72.588, 'eval_steps_per_second': 9.082, 'epoch': 4.67}


Model weights saved in ../../model/train/ptt5-base/checkpoint-35200/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-35200/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-35200/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-35200/spiece.model
Deleting older checkpoint [../../model/train/ptt5-base/checkpoint-31200] due to args.save_total_limit
 95%|█████████▍| 35400/37380 [13:46:31<2:18:10,  4.19s/it] ***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.0201, 'learning_rate': 2.6771227690643593e-06, 'epoch': 4.7}


                                                          
 95%|█████████▍| 35400/37380 [13:47:26<2:18:10,  4.19s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-35400
Configuration saved in ../../model/train/ptt5-base/checkpoint-35400/config.json


{'eval_loss': 0.033719126135110855, 'eval_runtime': 55.1741, 'eval_samples_per_second': 73.005, 'eval_steps_per_second': 9.135, 'epoch': 4.7}


Model weights saved in ../../model/train/ptt5-base/checkpoint-35400/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-35400/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-35400/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-35400/spiece.model
Deleting older checkpoint [../../model/train/ptt5-base/checkpoint-31400] due to args.save_total_limit
 95%|█████████▌| 35600/37380 [14:01:19<2:02:25,  4.13s/it] ***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.0172, 'learning_rate': 2.406706327744727e-06, 'epoch': 4.73}


                                                          
 95%|█████████▌| 35600/37380 [14:02:14<2:02:25,  4.13s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-35600
Configuration saved in ../../model/train/ptt5-base/checkpoint-35600/config.json


{'eval_loss': 0.03357253968715668, 'eval_runtime': 55.3597, 'eval_samples_per_second': 72.76, 'eval_steps_per_second': 9.104, 'epoch': 4.73}


Model weights saved in ../../model/train/ptt5-base/checkpoint-35600/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-35600/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-35600/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-35600/spiece.model
Deleting older checkpoint [../../model/train/ptt5-base/checkpoint-31600] due to args.save_total_limit
 96%|█████████▌| 35800/37380 [14:16:09<1:50:17,  4.19s/it] ***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.0198, 'learning_rate': 2.1362898864250946e-06, 'epoch': 4.77}


                                                          
 96%|█████████▌| 35800/37380 [14:17:04<1:50:17,  4.19s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-35800
Configuration saved in ../../model/train/ptt5-base/checkpoint-35800/config.json


{'eval_loss': 0.033345792442560196, 'eval_runtime': 55.3595, 'eval_samples_per_second': 72.761, 'eval_steps_per_second': 9.104, 'epoch': 4.77}


Model weights saved in ../../model/train/ptt5-base/checkpoint-35800/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-35800/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-35800/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-35800/spiece.model
Deleting older checkpoint [../../model/train/ptt5-base/checkpoint-31800] due to args.save_total_limit
 96%|█████████▋| 36000/37380 [14:31:01<1:35:31,  4.15s/it]***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.0166, 'learning_rate': 1.8658734451054625e-06, 'epoch': 4.8}


                                                          
 96%|█████████▋| 36000/37380 [14:31:57<1:35:31,  4.15s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-36000
Configuration saved in ../../model/train/ptt5-base/checkpoint-36000/config.json


{'eval_loss': 0.0335436575114727, 'eval_runtime': 55.3112, 'eval_samples_per_second': 72.824, 'eval_steps_per_second': 9.112, 'epoch': 4.8}


Model weights saved in ../../model/train/ptt5-base/checkpoint-36000/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-36000/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-36000/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-36000/spiece.model
Deleting older checkpoint [../../model/train/ptt5-base/checkpoint-32000] due to args.save_total_limit
 97%|█████████▋| 36200/37380 [14:45:52<1:22:33,  4.20s/it]***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.0163, 'learning_rate': 1.5954570037858302e-06, 'epoch': 4.83}


                                                          
 97%|█████████▋| 36200/37380 [14:46:47<1:22:33,  4.20s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-36200
Configuration saved in ../../model/train/ptt5-base/checkpoint-36200/config.json


{'eval_loss': 0.03342711925506592, 'eval_runtime': 55.442, 'eval_samples_per_second': 72.653, 'eval_steps_per_second': 9.091, 'epoch': 4.83}


Model weights saved in ../../model/train/ptt5-base/checkpoint-36200/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-36200/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-36200/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-36200/spiece.model
Deleting older checkpoint [../../model/train/ptt5-base/checkpoint-32200] due to args.save_total_limit
 97%|█████████▋| 36400/37380 [15:00:41<1:08:07,  4.17s/it]***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.0158, 'learning_rate': 1.325040562466198e-06, 'epoch': 4.86}


                                                          
 97%|█████████▋| 36400/37380 [15:01:36<1:08:07,  4.17s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-36400
Configuration saved in ../../model/train/ptt5-base/checkpoint-36400/config.json


{'eval_loss': 0.03371485322713852, 'eval_runtime': 55.2507, 'eval_samples_per_second': 72.904, 'eval_steps_per_second': 9.122, 'epoch': 4.86}


Model weights saved in ../../model/train/ptt5-base/checkpoint-36400/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-36400/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-36400/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-36400/spiece.model
Deleting older checkpoint [../../model/train/ptt5-base/checkpoint-32400] due to args.save_total_limit
 98%|█████████▊| 36600/37380 [15:15:30<53:50,  4.14s/it]  ***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.0187, 'learning_rate': 1.0546241211465657e-06, 'epoch': 4.89}


                                                        
 98%|█████████▊| 36600/37380 [15:16:25<53:50,  4.14s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-36600
Configuration saved in ../../model/train/ptt5-base/checkpoint-36600/config.json


{'eval_loss': 0.03363970294594765, 'eval_runtime': 55.2924, 'eval_samples_per_second': 72.849, 'eval_steps_per_second': 9.115, 'epoch': 4.89}


Model weights saved in ../../model/train/ptt5-base/checkpoint-36600/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-36600/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-36600/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-36600/spiece.model
Deleting older checkpoint [../../model/train/ptt5-base/checkpoint-32600] due to args.save_total_limit
 98%|█████████▊| 36800/37380 [15:30:22<40:23,  4.18s/it]  ***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.0155, 'learning_rate': 7.842076798269334e-07, 'epoch': 4.93}


                                                        
 98%|█████████▊| 36800/37380 [15:31:17<40:23,  4.18s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-36800
Configuration saved in ../../model/train/ptt5-base/checkpoint-36800/config.json


{'eval_loss': 0.033717378973960876, 'eval_runtime': 55.2173, 'eval_samples_per_second': 72.948, 'eval_steps_per_second': 9.128, 'epoch': 4.93}


Model weights saved in ../../model/train/ptt5-base/checkpoint-36800/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-36800/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-36800/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-36800/spiece.model
Deleting older checkpoint [../../model/train/ptt5-base/checkpoint-32800] due to args.save_total_limit
 99%|█████████▉| 37000/37380 [15:45:11<26:23,  4.17s/it]  ***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.0187, 'learning_rate': 5.137912385073012e-07, 'epoch': 4.96}


                                                        
 99%|█████████▉| 37000/37380 [15:46:07<26:23,  4.17s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-37000
Configuration saved in ../../model/train/ptt5-base/checkpoint-37000/config.json


{'eval_loss': 0.033612579107284546, 'eval_runtime': 55.3379, 'eval_samples_per_second': 72.789, 'eval_steps_per_second': 9.108, 'epoch': 4.96}


Model weights saved in ../../model/train/ptt5-base/checkpoint-37000/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-37000/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-37000/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-37000/spiece.model
Deleting older checkpoint [../../model/train/ptt5-base/checkpoint-33000] due to args.save_total_limit
100%|█████████▉| 37200/37380 [16:00:02<12:26,  4.15s/it]  ***** Running Evaluation *****
  Num examples = 4028
  Batch size = 8


{'loss': 0.0179, 'learning_rate': 2.43374797187669e-07, 'epoch': 4.99}


                                                        
100%|█████████▉| 37200/37380 [16:00:57<12:26,  4.15s/it]Saving model checkpoint to ../../model/train/ptt5-base/checkpoint-37200
Configuration saved in ../../model/train/ptt5-base/checkpoint-37200/config.json


{'eval_loss': 0.03358309343457222, 'eval_runtime': 55.645, 'eval_samples_per_second': 72.387, 'eval_steps_per_second': 9.057, 'epoch': 4.99}


Model weights saved in ../../model/train/ptt5-base/checkpoint-37200/pytorch_model.bin
tokenizer config file saved in ../../model/train/ptt5-base/checkpoint-37200/tokenizer_config.json
Special tokens file saved in ../../model/train/ptt5-base/checkpoint-37200/special_tokens_map.json
Copy vocab file to ../../model/train/ptt5-base/checkpoint-37200/spiece.model
Deleting older checkpoint [../../model/train/ptt5-base/checkpoint-33200] due to args.save_total_limit
100%|██████████| 37380/37380 [16:13:27<00:00,  4.19s/it]  

Training completed. Do not forget to share your model on huggingface.co/models =)


100%|██████████| 37380/37380 [16:13:27<00:00,  4.19s/it]

{'train_runtime': 58408.934, 'train_samples_per_second': 40.957, 'train_steps_per_second': 0.64, 'train_loss': 0.006224599051692385, 'epoch': 5.02}
Shutting down background jobs, please wait a moment...
Done!
Waiting for the remaining 15 operations to synchronize with Neptune. Do not kill this process.
All 15 operations synced, thanks for waiting!
Explore the metadata in the Neptune app:
https://app.neptune.ai/marcusborela/IA386DD/e/IAD-109


100%|██████████| 37380/37380 [16:13:27<00:00,  1.56s/it]


In [117]:
model.save_pretrained('../../model/train/ptt5-indir-version/indir-600-pcte')

Configuration saved in ../../model/train/ptt5-indir-version/indir-600-pcte/config.json
Model weights saved in ../../model/train/ptt5-indir-version/indir-600-pcte/pytorch_model.bin


In [118]:
train_metrics

TrainOutput(global_step=37380, training_loss=0.006224599051692385, metrics={'train_runtime': 58408.934, 'train_samples_per_second': 40.957, 'train_steps_per_second': 0.64, 'train_loss': 0.006224599051692385, 'epoch': 5.02})

In [None]:
EXECUÇÕES ANTERIORES

In [None]:
%%time
train_metrics = trainer.train(resume_from_checkpoint=True)

In [None]:
print(train_metrics )

Não sei se a mudança do batch size (32x2) 64 para 32 (8x4) impactou passar por dados duas vezes!!!

In [None]:
%%time
train_metrics = trainer.train(resume_from_checkpoint=True)

In [None]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

In [None]:
train_metrics = trainer.train(resume_from_checkpoint=True)

Abaixo execucao lim50: a100/40gb (erro mount drive)

In [None]:
%%time
train_metrics = trainer.train(resume_from_checkpoint=False)

In [None]:
train_metrics = trainer.train(resume_from_checkpoint=True)

In [None]:
%%time
train_metrics = trainer.train(resume_from_checkpoint=False)

In [None]:
train_metrics = trainer.train(resume_from_checkpoint=True)

In [None]:
train_metrics

In [None]:
huggingface-cli login

In [None]:
pip install huggingface_hub

In [None]:
from huggingface_hub import notebook_login

In [None]:
notebook_login()


In [None]:
trainer.push_to_hub(
    model_id="ptt5-base-pt-msmarco-100k-v2-indir-lim100",
    repo_name="marcusborela"
    # use_auth_token=""
)