In [1]:
import os
import random
import json
import csv
import numpy as np
import pandas as pd
from collections import namedtuple
from tqdm.auto import tqdm
from pathlib import Path
from datetime import datetime

import torch
from torch.utils.data import Dataset, DataLoader

from transformers import AdamW, get_linear_schedule_with_warmup
from transformers import BertForTokenClassification, BertTokenizer, AutoModelWithLMHead, AutoConfig
import pytorch_lightning as pl

import logging
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)

import warnings
warnings.filterwarnings('ignore')

import pdb

## Download pretrained model and tokenizer

From https://huggingface.co/models

In [1]:
from transformers import BartForConditionalGeneration, BartTokenizer

In [8]:
# Download model/tokenizer, or load from cache
config = AutoConfig.from_pretrained(
    'bert-large-uncased',
    cache_dir='./cache',
    output_past = False,
    num_labels=3,  # change based on number of IOB labels
)
model = BartForConditionalGeneration.from_pretrained(
    'bert-large-uncased',
    config=config,
    cache_dir='./cache'
)
tokenizer = BartTokenizer.from_pretrained(
    'bert-large-uncased',
    cache_dir='./cache'
)

In [2]:
MODEL_DIR = "./bert-large-uncased-25Mar2020"

In [10]:
# Save the model/tokenizer
model.save_pretrained(MODEL_DIR)
tokenizer.save_pretrained(MODEL_DIR)
config.save_pretrained(MODEL_DIR)

('./bart-large-25Mar2020/vocab.json',
 './bart-large-25Mar2020/merges.txt',
 './bart-large-25Mar2020/special_tokens_map.json',
 './bart-large-25Mar2020/added_tokens.json')

In [3]:
# Load the model/tokenizer
config = AutoConfig.from_pretrained(
    MODEL_DIR, 
    cache_dir='./cache',
    output_past = False,
)
tokenizer = BartTokenizer.from_pretrained(
    MODEL_DIR, 
    cache_dir='./cache'
)
model = BartForConditionalGeneration.from_pretrained(
    MODEL_DIR, 
    config=config,
    cache_dir='./cache'
)

KeyboardInterrupt: 

## Load BERT model for training

INFO:transformers.configuration_utils:loading configuration file https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-uncased-config.json from cache at ./cache/6dfaed860471b03ab5b9acb6153bea82b6632fb9bbe514d3fff050fe1319ee6d.788fed32bb8481a9b15ce726d41c53d5d5066b04c667e34ce3a7a3826d1573d8
INFO:transformers.configuration_utils:Model config BertConfig {
  "_num_labels": 3,
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": null,
  "do_sample": false,
  "early_stopping": false,
  "eos_token_id": null,
  "finetuning_task": null,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 1024,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2"
  },
  "initializer_range": 0.02,
  "intermediate_size": 4096,
  "is_decoder": false,
  "is_encoder_decoder": false,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2
  },
  "layer_norm_eps": 1e-12,
  "length_penalty": 1.0,
  "max_len

In [5]:
model = BertForTokenClassification.from_pretrained(
    'bert-large-uncased',
    from_tf=False,
    config=config,
    cache_dir='./cache',
)

INFO:filelock:Lock 47013707973240 acquired on ./cache/54da47087cc86ce75324e4dc9bbb5f66c6e83a7c6bd23baea8b489acc8d09aa4.4d5343a4b979c4beeaadef17a0453d1bb183dd9b084f58b84c7cc781df343ae6.lock
INFO:transformers.file_utils:https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-uncased-pytorch_model.bin not found in cache or force_download set to True, downloading to /hps/nobackup/research/chembl/deeplearning/transformers-ner/cache/tmpvu7vxfdd
Downloading: 100%|██████████| 1.34G/1.34G [08:59<00:00, 2.49MB/s] 
INFO:transformers.file_utils:storing https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-uncased-pytorch_model.bin in cache at ./cache/54da47087cc86ce75324e4dc9bbb5f66c6e83a7c6bd23baea8b489acc8d09aa4.4d5343a4b979c4beeaadef17a0453d1bb183dd9b084f58b84c7cc781df343ae6
INFO:transformers.file_utils:creating metadata file for ./cache/54da47087cc86ce75324e4dc9bbb5f66c6e83a7c6bd23baea8b489acc8d09aa4.4d5343a4b979c4beeaadef17a0453d1bb183dd9b084f58b84c7cc781df343ae6
INFO:filelock

In [48]:
import itertools as it

def get_token_text_locs(text, tokens):
    tok_i = 0
    tok_char_i = 0
    char_toks = []
    for i,c in enumerate(text.lower()):            
        if tok_char_i > len(tokens[tok_i])-1:
            tok_i += 1
            tok_char_i = 0
            
#         For ROBERTA
#         while True:
#             if tok_i > len(tokens)-1:
#                 break
#             if tokens[tok_i] in {'Ċ', 'Ġ'}:
#                 tok_i += 1
#                 tok_char_i = 0
#             else:
#                 break

        if tok_i > len(tokens)-1:
            break

        if tokens[tok_i][0:2] == '##' and tok_char_i == 0:
            tok_char_i = 2
            
#         For ROBERTA
#         if tokens[tok_i][0] == 'Ġ' and tok_char_i == 0:
#             tok_char_i = 1

        if tokens[tok_i][tok_char_i] == c:
            tok_char_i += 1

        char_toks.append(tok_i)

    return char_toks

def remove_overlaps(locs):
    remove = set()
    for (b1,e1),(b2,e2) in it.permutations(locs,2):
        if b2>=b1 and e2<=e1:
            if (e2-b2)<(e1-b1):
                remove.add((b2,e2))

    return [(b,e) for b,e in locs if not (b,e) in remove]

def get_text_locs(s, text):
    s = s.lower()
    text = text.lower()
    l = len(s)
    matches = []
    for i in range(len(text)):
        if text[i:i+l] == s:
            matches.append((i,i+l))
    return matches

def tokenize(tokenizer, text):
    tokens = tokenizer.tokenize(text.lower(), add_special_tokens=True)
    codes = tokenizer.encode(text.lower(), add_special_tokens=True)

    return tokens, codes

def gen_iob(tokenizer, text, targets):
    tokens, codes = tokenize(tokenizer, text)
    char_toks = get_token_text_locs(text, tokens)
    
    matches = []
    for s in targets:
        matches.append(get_text_locs(s,text))
    matches = sorted([l for m in matches for l in m])
    matches = remove_overlaps(matches)

    iob = [[t,"O"] for t in tokens]
    match_toks = [i for b,e in matches for i in range(char_toks[b],char_toks[e-1]+1)]
    for l in match_toks:
        iob[l][1] = "I-IND"
    match_beginnings = [char_toks[b] for b,e in matches]
    for l in match_beginnings:
        iob[l][1] = "B-IND"
    
    for i,(t,l) in enumerate(iob):
        if t[:2] == '##':
            l = '[PAD]'
        iob[i] = (t,l)

    return iob, codes

In [49]:
s = "hello, this is (a) testing, <bullet> et cetera. This is another sentence. <newline> OK then"

In [51]:
tokens, codes = tokenize(tokenizer, s)

In [52]:
tokens

['hello',
 ',',
 'this',
 'is',
 '(',
 'a',
 ')',
 'testing',
 ',',
 '<bullet>',
 'et',
 'ce',
 '##tera',
 '.',
 'this',
 'is',
 'another',
 'sentence',
 '.',
 '<newline>',
 'ok',
 'then']

In [53]:
gen_iob(tokenizer, s, ['this is', 'et cetera'])

([('hello', 'O'),
  (',', 'O'),
  ('this', 'B-IND'),
  ('is', 'I-IND'),
  ('(', 'O'),
  ('a', 'O'),
  (')', 'O'),
  ('testing', 'O'),
  (',', 'O'),
  ('<bullet>', 'O'),
  ('et', 'B-IND'),
  ('ce', 'I-IND'),
  ('##tera', '[PAD]'),
  ('.', 'O'),
  ('this', 'B-IND'),
  ('is', 'I-IND'),
  ('another', 'O'),
  ('sentence', 'O'),
  ('.', 'O'),
  ('<newline>', 'O'),
  ('ok', 'O'),
  ('then', 'O')],
 [101,
  7592,
  1010,
  2023,
  2003,
  1006,
  1037,
  1007,
  5604,
  1010,
  30524,
  3802,
  8292,
  14621,
  1012,
  2023,
  2003,
  2178,
  6251,
  1012,
  30523,
  7929,
  2059,
  102])

In [1]:
from utils import *
import yaml

import logging
logging.getLogger().setLevel(logging.CRITICAL)

import warnings
warnings.filterwarnings('ignore')

args_file = "./args.yaml"

with open(args_file) as f:
    args_dict = yaml.load(f)

extra_args_file = ""
    
if extra_args_file:
    with open(extra_args_file) as f:
        extra_args_dict = yaml.load(f)
        for k,v in extra_args_dict.items():
            args_dict[k] = v

args = Dict2Obj(args_dict)

checkpoint_dir = "../bert-lm/output/14May2020-17h05m15s/checkpoint-1584"
args['model_name_or_path'] = checkpoint_dir
args['config_name'] = checkpoint_dir
args['tokenizer_name'] = checkpoint_dir

bert_system = BertNerSystem(args)  # Load model
trainer = get_trainer(bert_system, args)  # get the trainer

## Test dataloader

In [3]:
import pdb, traceback, sys
try:
    dataloader = bert_system.train_dataloader()
except:
    extype, value, tb = sys.exc_info()
    traceback.print_exc()
    pdb.post_mortem(tb)

Loading train data: 100%|██████████| 1088/1088 [00:08<00:00, 130.74it/s]


In [4]:
dataset = dataloader.dataset

In [7]:
next(iter(dataloader))

{'tokens': tensor([[  101, 24936,  1998,  8192,  1026,  2047,  4179,  1028,  3161, 10768,
           2003,  5393,  2005,  1996,  9740,  1998,  3949,  1997,  3707, 18888,
           2019, 17577,  1998,  1013,  2030, 28268, 13164,  4135, 28522, 10074,
           2019, 17577,  2015,  1012,   102,     0,     0,     0,     0,     0,
              0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
              0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
              0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
              0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
              0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
              0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
              0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
              0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
              0,     0,     0,

In [5]:
len(dataset.data)

903

In [6]:
dataset.data[0]

['4f58c203-f649-a18e-aaf3-70b0797652f4',
 '1 INDICATIONS AND USAGE<newline>  Azelastine hydrochloride nasal spray is indicated for the treatment of the symptoms of seasonal allergic rhinitis in adults and pediatric patients 5 years and older, and for the treatment of the symptoms of vasomotor rhinitis in adults and adolescent patients 12 years and older.<newline>  Azelastine hydrochloride nasal spray is an H1-receptor antagonist indicated for the treatment of the symptoms of seasonal allergic rhinitis in adults and pediatric patients 5 years and older and for the treatment of the symptoms of vasomotor rhinitis in adults and adolescent patients 12 years and older. ( 1 ) ',
 ['SEASONAL ALLERGIC RHINITIS', 'VASOMOTOR RHINITIS'],
 [('1', 'O'),
  ('indications', 'O'),
  ('and', 'O'),
  ('usage', 'O'),
  ('<', 'O'),
  ('new', 'O'),
  ('##line', 'O'),
  ('>', 'O'),
  ('az', 'O'),
  ('##ela', 'O'),
  ('##sti', 'O'),
  ('##ne', 'O'),
  ('hydro', 'O'),
  ('##ch', 'O'),
  ('##lor', 'O'),
  ('##id

## Test training

In [2]:
import pdb, traceback, sys
try:
    trainer.fit(bert_system)
except:
    extype, value, tb = sys.exc_info()
    traceback.print_exc()
    pdb.post_mortem(tb)

Loading val data: 100%|██████████| 543/543 [00:04<00:00, 132.99it/s]

Validation sanity check: 0it [00:00, ?it/s]




                                                                      

Loading train data:   0%|          | 0/1088 [00:00<?, ?it/s]



Loading train data: 100%|██████████| 1088/1088 [00:08<00:00, 132.01it/s]
Loading val data: 100%|██████████| 543/543 [00:03<00:00, 136.71it/s]

Epoch 1:   0%|          | 0/919 [00:00<?, ?it/s] 




Epoch 1:  22%|██▏       | 200/919 [00:48<02:55,  4.09it/s, loss=0.013, v_num=33]
Epoch 1:  22%|██▏       | 201/919 [00:49<02:55,  4.10it/s, loss=0.013, v_num=33]
Epoch 1:  22%|██▏       | 203/919 [00:49<02:53,  4.12it/s, loss=0.013, v_num=33]
Epoch 1:  22%|██▏       | 205/919 [00:49<02:52,  4.15it/s, loss=0.013, v_num=33]
Epoch 1:  23%|██▎       | 207/919 [00:49<02:50,  4.18it/s, loss=0.013, v_num=33]
Epoch 1:  23%|██▎       | 209/919 [00:49<02:49,  4.20it/s, loss=0.013, v_num=33]
Epoch 1:  23%|██▎       | 211/919 [00:49<02:47,  4.23it/s, loss=0.013, v_num=33]
Epoch 1:  23%|██▎       | 213/919 [00:50<02:46,  4.25it/s, loss=0.013, v_num=33]
Epoch 1:  23%|██▎       | 215/919 [00:50<02:44,  4.27it/s, loss=0.013, v_num=33]
Epoch 1:  24%|██▎       | 217/919 [00:50<02:43,  4.30it/s, loss=0.013, v_num=33]
Epoch 1:  24%|██▍       | 219/919 [00:50<02:41,  4.32it/s, loss=0.013, v_num=33]
Epoch 1:  24%|██▍       | 221/919 [00:50<02:40,  4.35it/s, loss=0.013, v_num=33]
Epoch 1:  24%|██▍       | 22

INFO:utils:***** Validation results *****
INFO:utils:avg_val_loss = tensor(0.1649, device='cuda:0')

INFO:utils:loss = tensor(0.1900, device='cuda:0')



Epoch 1:  47%|████▋     | 431/919 [01:09<01:18,  6.18it/s, loss=0.013, v_num=33]
Epoch 1:  69%|██████▊   | 631/919 [01:59<00:54,  5.30it/s, loss=0.010, v_num=33]
Validating: 0it [00:00, ?it/s][A
Epoch 1:  69%|██████▉   | 633/919 [01:59<00:53,  5.31it/s, loss=0.010, v_num=33]
Epoch 1:  69%|██████▉   | 635/919 [01:59<00:53,  5.32it/s, loss=0.010, v_num=33]
Epoch 1:  69%|██████▉   | 637/919 [01:59<00:52,  5.33it/s, loss=0.010, v_num=33]
Epoch 1:  70%|██████▉   | 639/919 [01:59<00:52,  5.34it/s, loss=0.010, v_num=33]
Epoch 1:  70%|██████▉   | 641/919 [01:59<00:52,  5.34it/s, loss=0.010, v_num=33]
Epoch 1:  70%|██████▉   | 643/919 [02:00<00:51,  5.35it/s, loss=0.010, v_num=33]
Epoch 1:  70%|███████   | 645/919 [02:00<00:51,  5.36it/s, loss=0.010, v_num=33]
Epoch 1:  70%|███████   | 647/919 [02:00<00:50,  5.37it/s, loss=0.010, v_num=33]
Epoch 1:  71%|███████   | 649/919 [02:00<00:50,  5.38it/s, loss=0.010, v_num=33]
Epoch 1:  71%|███████   | 651/919 [02:00<00:49,  5.39it/s, loss=0.010, v_nu

INFO:utils:***** Validation results *****
INFO:utils:avg_val_loss = tensor(0.1142, device='cuda:0')

INFO:utils:epoch = 0

INFO:utils:loss = tensor(0.2049, device='cuda:0')



Epoch 1:  94%|█████████▍| 862/919 [02:19<00:09,  6.16it/s, loss=0.010, v_num=33]
Epoch 1: 100%|██████████| 919/919 [02:33<00:00,  5.97it/s, loss=0.009, v_num=33]

INFO:utils:Model checkpointed at step 18


Epoch 2:  22%|██▏       | 200/919 [00:49<02:57,  4.05it/s, loss=0.005, v_num=33]
Validating: 0it [00:00, ?it/s][A
Epoch 2:  22%|██▏       | 202/919 [00:49<02:56,  4.07it/s, loss=0.005, v_num=33]
Epoch 2:  22%|██▏       | 204/919 [00:49<02:54,  4.10it/s, loss=0.005, v_num=33]
Epoch 2:  22%|██▏       | 206/919 [00:49<02:52,  4.12it/s, loss=0.005, v_num=33]
Epoch 2:  23%|██▎       | 208/919 [00:50<02:51,  4.15it/s, loss=0.005, v_num=33]
Epoch 2:  23%|██▎       | 210/919 [00:50<02:49,  4.17it/s, loss=0.005, v_num=33]
Epoch 2:  23%|██▎       | 212/919 [00:50<02:48,  4.20it/s, loss=0.005, v_num=33]
Epoch 2:  23%|██▎       | 214/919 [00:50<02:46,  4.22it/s, loss=0.005, v_num=33]
Epoch 2:  24%|██▎       | 216/919 [00:50<02:45,  4.25it/s, loss=0.005, v_num=33]
Epoch 2:  24%|██▎       | 218/919 [00:51<02:44,  4.27it/s, loss=0.005, v_num=33]
Epoch 2:  24%|██▍       | 220/919 [00:51<02:42,  4.29it/s, loss=0.005, v_num=33]
Epoch 2:  24%|██▍       | 222/919 [00:51<02:41,  4.32it/s, loss=0.005, v_nu

INFO:utils:***** Validation results *****
INFO:utils:avg_val_loss = tensor(0.0961, device='cuda:0')

INFO:utils:epoch = 0

INFO:utils:loss = tensor(0.0251, device='cuda:0')



Epoch 2:  47%|████▋     | 431/919 [01:10<01:19,  6.13it/s, loss=0.005, v_num=33]
Epoch 2:  69%|██████▊   | 631/919 [01:59<00:54,  5.27it/s, loss=0.004, v_num=33]
Epoch 2:  69%|██████▉   | 632/919 [01:59<00:54,  5.27it/s, loss=0.004, v_num=33]
Epoch 2:  69%|██████▉   | 634/919 [02:00<00:53,  5.28it/s, loss=0.004, v_num=33]
Epoch 2:  69%|██████▉   | 636/919 [02:00<00:53,  5.29it/s, loss=0.004, v_num=33]
Epoch 2:  69%|██████▉   | 638/919 [02:00<00:53,  5.30it/s, loss=0.004, v_num=33]
Epoch 2:  70%|██████▉   | 640/919 [02:00<00:52,  5.31it/s, loss=0.004, v_num=33]
Epoch 2:  70%|██████▉   | 642/919 [02:00<00:52,  5.32it/s, loss=0.004, v_num=33]
Epoch 2:  70%|███████   | 644/919 [02:00<00:51,  5.32it/s, loss=0.004, v_num=33]
Epoch 2:  70%|███████   | 646/919 [02:01<00:51,  5.33it/s, loss=0.004, v_num=33]
Epoch 2:  71%|███████   | 648/919 [02:01<00:50,  5.34it/s, loss=0.004, v_num=33]
Epoch 2:  71%|███████   | 650/919 [02:01<00:50,  5.35it/s, loss=0.004, v_num=33]
Epoch 2:  71%|███████   | 65

INFO:utils:***** Validation results *****
INFO:utils:avg_val_loss = tensor(0.0881, device='cuda:0')

INFO:utils:epoch = 1

INFO:utils:loss = tensor(0.0321, device='cuda:0')



Epoch 2:  94%|█████████▍| 862/919 [02:20<00:09,  6.12it/s, loss=0.004, v_num=33]
Epoch 2: 100%|██████████| 919/919 [02:34<00:00,  5.94it/s, loss=0.003, v_num=33]

INFO:utils:Model checkpointed at step 36


Epoch 3:  22%|██▏       | 200/919 [00:49<02:56,  4.07it/s, loss=0.003, v_num=33]
Validating: 0it [00:00, ?it/s][A
Epoch 3:  22%|██▏       | 202/919 [00:49<02:55,  4.09it/s, loss=0.003, v_num=33]
Epoch 3:  22%|██▏       | 204/919 [00:49<02:53,  4.12it/s, loss=0.003, v_num=33]
Epoch 3:  22%|██▏       | 206/919 [00:49<02:52,  4.14it/s, loss=0.003, v_num=33]
Epoch 3:  23%|██▎       | 208/919 [00:49<02:50,  4.17it/s, loss=0.003, v_num=33]
Epoch 3:  23%|██▎       | 210/919 [00:50<02:49,  4.19it/s, loss=0.003, v_num=33]
Epoch 3:  23%|██▎       | 212/919 [00:50<02:47,  4.22it/s, loss=0.003, v_num=33]
Epoch 3:  23%|██▎       | 214/919 [00:50<02:46,  4.24it/s, loss=0.003, v_num=33]
Epoch 3:  24%|██▎       | 216/919 [00:50<02:44,  4.27it/s, loss=0.003, v_num=33]
Epoch 3:  24%|██▎       | 218/919 [00:50<02:43,  4.29it/s, loss=0.003, v_num=33]
Epoch 3:  24%|██▍       | 220/919 [00:51<02:42,  4.31it/s, loss=0.003, v_num=33]
Epoch 3:  24%|██▍       | 222/919 [00:51<02:40,  4.34it/s, loss=0.003, v_nu

INFO:utils:***** Validation results *****
INFO:utils:avg_val_loss = tensor(0.1041, device='cuda:0')

INFO:utils:epoch = 1

INFO:utils:loss = tensor(0.0109, device='cuda:0')



Epoch 3:  47%|████▋     | 431/919 [01:10<01:19,  6.15it/s, loss=0.003, v_num=33]
Epoch 3:  69%|██████▊   | 631/919 [01:59<00:54,  5.29it/s, loss=0.002, v_num=33]
Epoch 3:  69%|██████▉   | 632/919 [01:59<00:54,  5.29it/s, loss=0.002, v_num=33]
Epoch 3:  69%|██████▉   | 634/919 [01:59<00:53,  5.30it/s, loss=0.002, v_num=33]
Epoch 3:  69%|██████▉   | 636/919 [01:59<00:53,  5.31it/s, loss=0.002, v_num=33]
Epoch 3:  69%|██████▉   | 638/919 [01:59<00:52,  5.32it/s, loss=0.002, v_num=33]
Epoch 3:  70%|██████▉   | 640/919 [02:00<00:52,  5.33it/s, loss=0.002, v_num=33]
Epoch 3:  70%|██████▉   | 642/919 [02:00<00:51,  5.34it/s, loss=0.002, v_num=33]
Epoch 3:  70%|███████   | 644/919 [02:00<00:51,  5.35it/s, loss=0.002, v_num=33]
Epoch 3:  70%|███████   | 646/919 [02:00<00:50,  5.36it/s, loss=0.002, v_num=33]
Epoch 3:  71%|███████   | 648/919 [02:00<00:50,  5.36it/s, loss=0.002, v_num=33]
Epoch 3:  71%|███████   | 650/919 [02:00<00:50,  5.37it/s, loss=0.002, v_num=33]
Epoch 3:  71%|███████   | 65

INFO:utils:***** Validation results *****
INFO:utils:avg_val_loss = tensor(0.0930, device='cuda:0')

INFO:utils:epoch = 2

INFO:utils:loss = tensor(0.1389, device='cuda:0')



Epoch 3:  94%|█████████▍| 862/919 [02:20<00:09,  6.15it/s, loss=0.002, v_num=33]
Epoch 3: 100%|██████████| 919/919 [02:34<00:00,  5.96it/s, loss=0.002, v_num=33]

INFO:utils:Model checkpointed at step 54


Epoch 4:  22%|██▏       | 200/919 [00:49<02:56,  4.07it/s, loss=0.002, v_num=33]
Validating: 0it [00:00, ?it/s][A
Epoch 4:  22%|██▏       | 202/919 [00:49<02:55,  4.09it/s, loss=0.002, v_num=33]
Epoch 4:  22%|██▏       | 204/919 [00:49<02:53,  4.12it/s, loss=0.002, v_num=33]
Epoch 4:  22%|██▏       | 206/919 [00:49<02:52,  4.14it/s, loss=0.002, v_num=33]
Epoch 4:  23%|██▎       | 208/919 [00:49<02:50,  4.17it/s, loss=0.002, v_num=33]
Epoch 4:  23%|██▎       | 210/919 [00:50<02:49,  4.19it/s, loss=0.002, v_num=33]
Epoch 4:  23%|██▎       | 212/919 [00:50<02:47,  4.22it/s, loss=0.002, v_num=33]
Epoch 4:  23%|██▎       | 214/919 [00:50<02:46,  4.24it/s, loss=0.002, v_num=33]
Epoch 4:  24%|██▎       | 216/919 [00:50<02:44,  4.27it/s, loss=0.002, v_num=33]
Epoch 4:  24%|██▎       | 218/919 [00:50<02:43,  4.29it/s, loss=0.002, v_num=33]
Epoch 4:  24%|██▍       | 220/919 [00:50<02:41,  4.32it/s, loss=0.002, v_num=33]
Epoch 4:  24%|██▍       | 222/919 [00:51<02:40,  4.34it/s, loss=0.002, v_nu

INFO:utils:***** Validation results *****
INFO:utils:avg_val_loss = tensor(0.1343, device='cuda:0')

INFO:utils:epoch = 2

INFO:utils:loss = tensor(0.0113, device='cuda:0')



Epoch 4:  47%|████▋     | 431/919 [01:10<01:19,  6.16it/s, loss=0.002, v_num=33]
Epoch 4:  69%|██████▊   | 631/919 [01:59<00:54,  5.29it/s, loss=0.001, v_num=33]
Epoch 4:  69%|██████▉   | 632/919 [01:59<00:54,  5.30it/s, loss=0.001, v_num=33]
Epoch 4:  69%|██████▉   | 634/919 [01:59<00:53,  5.31it/s, loss=0.001, v_num=33]
Epoch 4:  69%|██████▉   | 636/919 [01:59<00:53,  5.32it/s, loss=0.001, v_num=33]
Epoch 4:  69%|██████▉   | 638/919 [01:59<00:52,  5.32it/s, loss=0.001, v_num=33]
Epoch 4:  70%|██████▉   | 640/919 [02:00<00:52,  5.33it/s, loss=0.001, v_num=33]
Epoch 4:  70%|██████▉   | 642/919 [02:00<00:51,  5.34it/s, loss=0.001, v_num=33]
Epoch 4:  70%|███████   | 644/919 [02:00<00:51,  5.35it/s, loss=0.001, v_num=33]
Epoch 4:  70%|███████   | 646/919 [02:00<00:50,  5.36it/s, loss=0.001, v_num=33]
Epoch 4:  71%|███████   | 648/919 [02:00<00:50,  5.37it/s, loss=0.001, v_num=33]
Epoch 4:  71%|███████   | 650/919 [02:00<00:50,  5.38it/s, loss=0.001, v_num=33]
Epoch 4:  71%|███████   | 65

INFO:utils:***** Validation results *****
INFO:utils:avg_val_loss = tensor(0.0978, device='cuda:0')

INFO:utils:epoch = 3

INFO:utils:loss = tensor(0.0726, device='cuda:0')



Epoch 4:  94%|█████████▍| 862/919 [02:20<00:09,  6.15it/s, loss=0.001, v_num=33]
Epoch 4: 100%|██████████| 919/919 [02:34<00:00,  5.96it/s, loss=0.001, v_num=33]

INFO:utils:Model checkpointed at step 72


Epoch 5:  22%|██▏       | 200/919 [00:49<02:56,  4.07it/s, loss=0.001, v_num=33]
Validating: 0it [00:00, ?it/s][A
Epoch 5:  22%|██▏       | 202/919 [00:49<02:55,  4.09it/s, loss=0.001, v_num=33]
Epoch 5:  22%|██▏       | 204/919 [00:49<02:53,  4.12it/s, loss=0.001, v_num=33]
Epoch 5:  22%|██▏       | 206/919 [00:49<02:52,  4.14it/s, loss=0.001, v_num=33]
Epoch 5:  23%|██▎       | 208/919 [00:49<02:50,  4.17it/s, loss=0.001, v_num=33]
Epoch 5:  23%|██▎       | 210/919 [00:50<02:49,  4.19it/s, loss=0.001, v_num=33]
Epoch 5:  23%|██▎       | 212/919 [00:50<02:47,  4.22it/s, loss=0.001, v_num=33]
Epoch 5:  23%|██▎       | 214/919 [00:50<02:46,  4.24it/s, loss=0.001, v_num=33]
Epoch 5:  24%|██▎       | 216/919 [00:50<02:44,  4.27it/s, loss=0.001, v_num=33]
Epoch 5:  24%|██▎       | 218/919 [00:50<02:43,  4.29it/s, loss=0.001, v_num=33]
Epoch 5:  24%|██▍       | 220/919 [00:50<02:41,  4.32it/s, loss=0.001, v_num=33]
Epoch 5:  24%|██▍       | 222/919 [00:51<02:40,  4.34it/s, loss=0.001, v_nu

INFO:utils:***** Validation results *****
INFO:utils:avg_val_loss = tensor(0.1054, device='cuda:0')

INFO:utils:epoch = 3

INFO:utils:loss = tensor(0.0076, device='cuda:0')



Epoch 5:  47%|████▋     | 431/919 [01:10<01:19,  6.16it/s, loss=0.001, v_num=33]
Epoch 5:  69%|██████▊   | 631/919 [01:59<00:54,  5.30it/s, loss=0.001, v_num=33]
Epoch 5:  69%|██████▉   | 632/919 [01:59<00:54,  5.30it/s, loss=0.001, v_num=33]
Epoch 5:  69%|██████▉   | 634/919 [01:59<00:53,  5.31it/s, loss=0.001, v_num=33]
Epoch 5:  69%|██████▉   | 636/919 [01:59<00:53,  5.32it/s, loss=0.001, v_num=33]
Epoch 5:  69%|██████▉   | 638/919 [01:59<00:52,  5.33it/s, loss=0.001, v_num=33]
Epoch 5:  70%|██████▉   | 640/919 [01:59<00:52,  5.34it/s, loss=0.001, v_num=33]
Epoch 5:  70%|██████▉   | 642/919 [02:00<00:51,  5.34it/s, loss=0.001, v_num=33]
Epoch 5:  70%|███████   | 644/919 [02:00<00:51,  5.35it/s, loss=0.001, v_num=33]
Epoch 5:  70%|███████   | 646/919 [02:00<00:50,  5.36it/s, loss=0.001, v_num=33]
Epoch 5:  71%|███████   | 648/919 [02:00<00:50,  5.37it/s, loss=0.001, v_num=33]
Epoch 5:  71%|███████   | 650/919 [02:00<00:50,  5.38it/s, loss=0.001, v_num=33]
Epoch 5:  71%|███████   | 65

INFO:utils:***** Validation results *****
INFO:utils:avg_val_loss = tensor(0.1145, device='cuda:0')

INFO:utils:epoch = 4

INFO:utils:loss = tensor(0.0212, device='cuda:0')



Epoch 5:  94%|█████████▍| 862/919 [02:20<00:09,  6.15it/s, loss=0.001, v_num=33]
Epoch 5: 100%|██████████| 919/919 [02:34<00:00,  5.96it/s, loss=0.001, v_num=33]

INFO:utils:Model checkpointed at step 90


Epoch 6:  22%|██▏       | 200/919 [00:49<02:56,  4.08it/s, loss=0.001, v_num=33]
Validating: 0it [00:00, ?it/s][A
Epoch 6:  22%|██▏       | 202/919 [00:49<02:54,  4.10it/s, loss=0.001, v_num=33]
Epoch 6:  22%|██▏       | 204/919 [00:49<02:53,  4.13it/s, loss=0.001, v_num=33]
Epoch 6:  22%|██▏       | 206/919 [00:49<02:51,  4.15it/s, loss=0.001, v_num=33]
Epoch 6:  23%|██▎       | 208/919 [00:49<02:50,  4.18it/s, loss=0.001, v_num=33]
Epoch 6:  23%|██▎       | 210/919 [00:49<02:48,  4.20it/s, loss=0.001, v_num=33]
Epoch 6:  23%|██▎       | 212/919 [00:50<02:47,  4.23it/s, loss=0.001, v_num=33]
Epoch 6:  23%|██▎       | 214/919 [00:50<02:45,  4.25it/s, loss=0.001, v_num=33]
Epoch 6:  24%|██▎       | 216/919 [00:50<02:44,  4.27it/s, loss=0.001, v_num=33]
Epoch 6:  24%|██▎       | 218/919 [00:50<02:43,  4.30it/s, loss=0.001, v_num=33]
Epoch 6:  24%|██▍       | 220/919 [00:50<02:41,  4.32it/s, loss=0.001, v_num=33]
Epoch 6:  24%|██▍       | 222/919 [00:51<02:40,  4.35it/s, loss=0.001, v_nu

INFO:utils:***** Validation results *****
INFO:utils:avg_val_loss = tensor(0.1138, device='cuda:0')

INFO:utils:epoch = 4

INFO:utils:loss = tensor(0.0070, device='cuda:0')



Epoch 6:  47%|████▋     | 431/919 [01:09<01:19,  6.16it/s, loss=0.001, v_num=33]
Epoch 6:  69%|██████▊   | 631/919 [01:59<00:54,  5.29it/s, loss=0.001, v_num=33]
Epoch 6:  69%|██████▉   | 632/919 [01:59<00:54,  5.30it/s, loss=0.001, v_num=33]
Epoch 6:  69%|██████▉   | 634/919 [01:59<00:53,  5.31it/s, loss=0.001, v_num=33]
Epoch 6:  69%|██████▉   | 636/919 [01:59<00:53,  5.32it/s, loss=0.001, v_num=33]
Epoch 6:  69%|██████▉   | 638/919 [01:59<00:52,  5.32it/s, loss=0.001, v_num=33]
Epoch 6:  70%|██████▉   | 640/919 [02:00<00:52,  5.33it/s, loss=0.001, v_num=33]
Epoch 6:  70%|██████▉   | 642/919 [02:00<00:51,  5.34it/s, loss=0.001, v_num=33]
Epoch 6:  70%|███████   | 644/919 [02:00<00:51,  5.35it/s, loss=0.001, v_num=33]
Epoch 6:  70%|███████   | 646/919 [02:00<00:50,  5.36it/s, loss=0.001, v_num=33]
Epoch 6:  71%|███████   | 648/919 [02:00<00:50,  5.37it/s, loss=0.001, v_num=33]
Epoch 6:  71%|███████   | 650/919 [02:00<00:50,  5.38it/s, loss=0.001, v_num=33]
Epoch 6:  71%|███████   | 65

INFO:utils:***** Validation results *****
INFO:utils:avg_val_loss = tensor(0.1184, device='cuda:0')

INFO:utils:epoch = 5

INFO:utils:loss = tensor(0.0006, device='cuda:0')



Epoch 6:  94%|█████████▍| 862/919 [02:20<00:09,  6.15it/s, loss=0.001, v_num=33]
Epoch 6: 100%|██████████| 919/919 [02:34<00:00,  5.96it/s, loss=0.001, v_num=33]

INFO:utils:Model checkpointed at step 108


Epoch 7:  22%|██▏       | 200/919 [00:49<02:56,  4.06it/s, loss=0.001, v_num=33]
Validating: 0it [00:00, ?it/s][A
Epoch 7:  22%|██▏       | 202/919 [00:49<02:55,  4.09it/s, loss=0.001, v_num=33]
Epoch 7:  22%|██▏       | 204/919 [00:49<02:53,  4.11it/s, loss=0.001, v_num=33]
Epoch 7:  22%|██▏       | 206/919 [00:49<02:52,  4.14it/s, loss=0.001, v_num=33]
Epoch 7:  23%|██▎       | 208/919 [00:49<02:50,  4.16it/s, loss=0.001, v_num=33]
Epoch 7:  23%|██▎       | 210/919 [00:50<02:49,  4.19it/s, loss=0.001, v_num=33]
Epoch 7:  23%|██▎       | 212/919 [00:50<02:47,  4.21it/s, loss=0.001, v_num=33]
Epoch 7:  23%|██▎       | 214/919 [00:50<02:46,  4.24it/s, loss=0.001, v_num=33]
Epoch 7:  24%|██▎       | 216/919 [00:50<02:44,  4.26it/s, loss=0.001, v_num=33]
Epoch 7:  24%|██▎       | 218/919 [00:50<02:43,  4.29it/s, loss=0.001, v_num=33]
Epoch 7:  24%|██▍       | 220/919 [00:51<02:42,  4.31it/s, loss=0.001, v_num=33]
Epoch 7:  24%|██▍       | 222/919 [00:51<02:40,  4.33it/s, loss=0.001, v_nu

INFO:utils:***** Validation results *****
INFO:utils:avg_val_loss = tensor(0.1264, device='cuda:0')

INFO:utils:epoch = 5

INFO:utils:loss = tensor(0.0166, device='cuda:0')



Epoch 7:  47%|████▋     | 431/919 [01:10<01:19,  6.15it/s, loss=0.001, v_num=33]
Epoch 7:  69%|██████▊   | 631/919 [01:59<00:54,  5.28it/s, loss=0.001, v_num=33]
Epoch 7:  69%|██████▉   | 632/919 [01:59<00:54,  5.29it/s, loss=0.001, v_num=33]
Epoch 7:  69%|██████▉   | 634/919 [01:59<00:53,  5.29it/s, loss=0.001, v_num=33]
Epoch 7:  69%|██████▉   | 636/919 [01:59<00:53,  5.30it/s, loss=0.001, v_num=33]
Epoch 7:  69%|██████▉   | 638/919 [02:00<00:52,  5.31it/s, loss=0.001, v_num=33]
Epoch 7:  70%|██████▉   | 640/919 [02:00<00:52,  5.32it/s, loss=0.001, v_num=33]
Epoch 7:  70%|██████▉   | 642/919 [02:00<00:51,  5.33it/s, loss=0.001, v_num=33]
Epoch 7:  70%|███████   | 644/919 [02:00<00:51,  5.34it/s, loss=0.001, v_num=33]
Epoch 7:  70%|███████   | 646/919 [02:00<00:51,  5.35it/s, loss=0.001, v_num=33]
Epoch 7:  71%|███████   | 648/919 [02:01<00:50,  5.35it/s, loss=0.001, v_num=33]
Epoch 7:  71%|███████   | 650/919 [02:01<00:50,  5.36it/s, loss=0.001, v_num=33]
Epoch 7:  71%|███████   | 65

INFO:utils:***** Validation results *****
INFO:utils:avg_val_loss = tensor(0.1345, device='cuda:0')

INFO:utils:epoch = 6

INFO:utils:loss = tensor(0.0050, device='cuda:0')



Epoch 7:  94%|█████████▍| 862/919 [02:20<00:09,  6.14it/s, loss=0.001, v_num=33]
Epoch 7: 100%|██████████| 919/919 [02:34<00:00,  5.95it/s, loss=0.001, v_num=33]

INFO:utils:Model checkpointed at step 126


Epoch 8:  22%|██▏       | 200/919 [00:49<02:57,  4.06it/s, loss=0.000, v_num=33]
Validating: 0it [00:00, ?it/s][A
Epoch 8:  22%|██▏       | 202/919 [00:49<02:55,  4.08it/s, loss=0.000, v_num=33]
Epoch 8:  22%|██▏       | 204/919 [00:49<02:54,  4.11it/s, loss=0.000, v_num=33]
Epoch 8:  22%|██▏       | 206/919 [00:49<02:52,  4.13it/s, loss=0.000, v_num=33]
Epoch 8:  23%|██▎       | 208/919 [00:50<02:50,  4.16it/s, loss=0.000, v_num=33]
Epoch 8:  23%|██▎       | 210/919 [00:50<02:49,  4.18it/s, loss=0.000, v_num=33]
Epoch 8:  23%|██▎       | 212/919 [00:50<02:47,  4.21it/s, loss=0.000, v_num=33]
Epoch 8:  23%|██▎       | 214/919 [00:50<02:46,  4.23it/s, loss=0.000, v_num=33]
Epoch 8:  24%|██▎       | 216/919 [00:50<02:45,  4.26it/s, loss=0.000, v_num=33]
Epoch 8:  24%|██▎       | 218/919 [00:50<02:43,  4.28it/s, loss=0.000, v_num=33]
Epoch 8:  24%|██▍       | 220/919 [00:51<02:42,  4.31it/s, loss=0.000, v_num=33]
Epoch 8:  24%|██▍       | 222/919 [00:51<02:40,  4.33it/s, loss=0.000, v_nu

INFO:utils:***** Validation results *****
INFO:utils:avg_val_loss = tensor(0.1421, device='cuda:0')

INFO:utils:epoch = 6

INFO:utils:loss = tensor(0.0003, device='cuda:0')



Epoch 8:  47%|████▋     | 431/919 [01:10<01:19,  6.14it/s, loss=0.000, v_num=33]
Epoch 8:  69%|██████▊   | 631/919 [01:59<00:54,  5.28it/s, loss=0.000, v_num=33]
Epoch 8:  69%|██████▉   | 632/919 [01:59<00:54,  5.28it/s, loss=0.000, v_num=33]
Epoch 8:  69%|██████▉   | 634/919 [01:59<00:53,  5.29it/s, loss=0.000, v_num=33]
Epoch 8:  69%|██████▉   | 636/919 [01:59<00:53,  5.30it/s, loss=0.000, v_num=33]
Epoch 8:  69%|██████▉   | 638/919 [02:00<00:52,  5.31it/s, loss=0.000, v_num=33]
Epoch 8:  70%|██████▉   | 640/919 [02:00<00:52,  5.32it/s, loss=0.000, v_num=33]
Epoch 8:  70%|██████▉   | 642/919 [02:00<00:51,  5.33it/s, loss=0.000, v_num=33]
Epoch 8:  70%|███████   | 644/919 [02:00<00:51,  5.34it/s, loss=0.000, v_num=33]
Epoch 8:  70%|███████   | 646/919 [02:00<00:51,  5.34it/s, loss=0.000, v_num=33]
Epoch 8:  71%|███████   | 648/919 [02:01<00:50,  5.35it/s, loss=0.000, v_num=33]
Epoch 8:  71%|███████   | 650/919 [02:01<00:50,  5.36it/s, loss=0.000, v_num=33]
Epoch 8:  71%|███████   | 65

INFO:utils:***** Validation results *****
INFO:utils:avg_val_loss = tensor(0.1444, device='cuda:0')

INFO:utils:epoch = 7

INFO:utils:loss = tensor(0.0031, device='cuda:0')



Epoch 8:  94%|█████████▍| 862/919 [02:20<00:09,  6.13it/s, loss=0.000, v_num=33]
Epoch 8: 100%|██████████| 919/919 [02:34<00:00,  5.95it/s, loss=0.000, v_num=33]

INFO:utils:Model checkpointed at step 144


Epoch 9:  22%|██▏       | 200/919 [00:49<02:56,  4.06it/s, loss=0.000, v_num=33]
Validating: 0it [00:00, ?it/s][A
Epoch 9:  22%|██▏       | 202/919 [00:49<02:55,  4.09it/s, loss=0.000, v_num=33]
Epoch 9:  22%|██▏       | 204/919 [00:49<02:53,  4.11it/s, loss=0.000, v_num=33]
Epoch 9:  22%|██▏       | 206/919 [00:49<02:52,  4.14it/s, loss=0.000, v_num=33]
Epoch 9:  23%|██▎       | 208/919 [00:49<02:50,  4.16it/s, loss=0.000, v_num=33]
Epoch 9:  23%|██▎       | 210/919 [00:50<02:49,  4.19it/s, loss=0.000, v_num=33]
Epoch 9:  23%|██▎       | 212/919 [00:50<02:47,  4.21it/s, loss=0.000, v_num=33]
Epoch 9:  23%|██▎       | 214/919 [00:50<02:46,  4.24it/s, loss=0.000, v_num=33]
Epoch 9:  24%|██▎       | 216/919 [00:50<02:44,  4.26it/s, loss=0.000, v_num=33]
Epoch 9:  24%|██▎       | 218/919 [00:50<02:43,  4.29it/s, loss=0.000, v_num=33]
Epoch 9:  24%|██▍       | 220/919 [00:51<02:42,  4.31it/s, loss=0.000, v_num=33]
Epoch 9:  24%|██▍       | 222/919 [00:51<02:40,  4.33it/s, loss=0.000, v_nu

INFO:utils:***** Validation results *****
INFO:utils:avg_val_loss = tensor(0.1484, device='cuda:0')

INFO:utils:epoch = 7

INFO:utils:loss = tensor(0.0036, device='cuda:0')



Epoch 9:  47%|████▋     | 431/919 [01:10<01:19,  6.15it/s, loss=0.000, v_num=33]
Epoch 9:  69%|██████▊   | 631/919 [01:59<00:54,  5.28it/s, loss=0.000, v_num=33]
Epoch 9:  69%|██████▉   | 632/919 [01:59<00:54,  5.29it/s, loss=0.000, v_num=33]
Epoch 9:  69%|██████▉   | 634/919 [01:59<00:53,  5.30it/s, loss=0.000, v_num=33]
Epoch 9:  69%|██████▉   | 636/919 [01:59<00:53,  5.31it/s, loss=0.000, v_num=33]
Epoch 9:  69%|██████▉   | 638/919 [02:00<00:52,  5.31it/s, loss=0.000, v_num=33]
Epoch 9:  70%|██████▉   | 640/919 [02:00<00:52,  5.32it/s, loss=0.000, v_num=33]
Epoch 9:  70%|██████▉   | 642/919 [02:00<00:51,  5.33it/s, loss=0.000, v_num=33]
Epoch 9:  70%|███████   | 644/919 [02:00<00:51,  5.34it/s, loss=0.000, v_num=33]
Epoch 9:  70%|███████   | 646/919 [02:00<00:51,  5.35it/s, loss=0.000, v_num=33]
Epoch 9:  71%|███████   | 648/919 [02:00<00:50,  5.36it/s, loss=0.000, v_num=33]
Epoch 9:  71%|███████   | 650/919 [02:01<00:50,  5.37it/s, loss=0.000, v_num=33]
Epoch 9:  71%|███████   | 65

INFO:utils:***** Validation results *****
INFO:utils:avg_val_loss = tensor(0.1500, device='cuda:0')

INFO:utils:epoch = 8

INFO:utils:loss = tensor(0.0035, device='cuda:0')



Epoch 9:  94%|█████████▍| 862/919 [02:20<00:09,  6.14it/s, loss=0.000, v_num=33]
Epoch 9: 100%|██████████| 919/919 [02:34<00:00,  5.95it/s, loss=0.000, v_num=33]

INFO:utils:Model checkpointed at step 162


Epoch 10:  22%|██▏       | 200/919 [00:49<02:56,  4.06it/s, loss=0.000, v_num=33]
Validating: 0it [00:00, ?it/s][A
Epoch 10:  22%|██▏       | 202/919 [00:49<02:55,  4.09it/s, loss=0.000, v_num=33]
Epoch 10:  22%|██▏       | 204/919 [00:49<02:53,  4.11it/s, loss=0.000, v_num=33]
Epoch 10:  22%|██▏       | 206/919 [00:49<02:52,  4.14it/s, loss=0.000, v_num=33]
Epoch 10:  23%|██▎       | 208/919 [00:49<02:50,  4.16it/s, loss=0.000, v_num=33]
Epoch 10:  23%|██▎       | 210/919 [00:50<02:49,  4.19it/s, loss=0.000, v_num=33]
Epoch 10:  23%|██▎       | 212/919 [00:50<02:47,  4.21it/s, loss=0.000, v_num=33]
Epoch 10:  23%|██▎       | 214/919 [00:50<02:46,  4.24it/s, loss=0.000, v_num=33]
Epoch 10:  24%|██▎       | 216/919 [00:50<02:44,  4.26it/s, loss=0.000, v_num=33]
Epoch 10:  24%|██▎       | 218/919 [00:50<02:43,  4.29it/s, loss=0.000, v_num=33]
Epoch 10:  24%|██▍       | 220/919 [00:51<02:42,  4.31it/s, loss=0.000, v_num=33]
Epoch 10:  24%|██▍       | 222/919 [00:51<02:40,  4.34it/s, loss

INFO:utils:***** Validation results *****
INFO:utils:avg_val_loss = tensor(0.1533, device='cuda:0')

INFO:utils:epoch = 8

INFO:utils:loss = tensor(0.0007, device='cuda:0')



Epoch 10:  47%|████▋     | 431/919 [01:10<01:19,  6.14it/s, loss=0.000, v_num=33]
Epoch 10:  69%|██████▊   | 631/919 [01:59<00:54,  5.28it/s, loss=0.000, v_num=33]
Epoch 10:  69%|██████▉   | 632/919 [01:59<00:54,  5.28it/s, loss=0.000, v_num=33]
Epoch 10:  69%|██████▉   | 634/919 [01:59<00:53,  5.29it/s, loss=0.000, v_num=33]
Epoch 10:  69%|██████▉   | 636/919 [01:59<00:53,  5.30it/s, loss=0.000, v_num=33]
Epoch 10:  69%|██████▉   | 638/919 [02:00<00:52,  5.31it/s, loss=0.000, v_num=33]
Epoch 10:  70%|██████▉   | 640/919 [02:00<00:52,  5.32it/s, loss=0.000, v_num=33]
Epoch 10:  70%|██████▉   | 642/919 [02:00<00:52,  5.33it/s, loss=0.000, v_num=33]
Epoch 10:  70%|███████   | 644/919 [02:00<00:51,  5.33it/s, loss=0.000, v_num=33]
Epoch 10:  70%|███████   | 646/919 [02:00<00:51,  5.34it/s, loss=0.000, v_num=33]
Epoch 10:  71%|███████   | 648/919 [02:01<00:50,  5.35it/s, loss=0.000, v_num=33]
Epoch 10:  71%|███████   | 650/919 [02:01<00:50,  5.36it/s, loss=0.000, v_num=33]
Epoch 10:  71%|█

INFO:utils:***** Validation results *****
INFO:utils:avg_val_loss = tensor(0.1551, device='cuda:0')

INFO:utils:epoch = 9

INFO:utils:loss = tensor(0.0004, device='cuda:0')



Epoch 10:  94%|█████████▍| 862/919 [02:20<00:09,  6.13it/s, loss=0.000, v_num=33]
Epoch 10: 100%|██████████| 919/919 [02:34<00:00,  5.95it/s, loss=0.000, v_num=33]

INFO:utils:Model checkpointed at step 180


Epoch 11:  22%|██▏       | 200/919 [00:49<02:56,  4.06it/s, loss=0.000, v_num=33]
Validating: 0it [00:00, ?it/s][A
Epoch 11:  22%|██▏       | 202/919 [00:49<02:55,  4.09it/s, loss=0.000, v_num=33]
Epoch 11:  22%|██▏       | 204/919 [00:49<02:53,  4.11it/s, loss=0.000, v_num=33]
Epoch 11:  22%|██▏       | 206/919 [00:49<02:52,  4.14it/s, loss=0.000, v_num=33]
Epoch 11:  23%|██▎       | 208/919 [00:49<02:50,  4.16it/s, loss=0.000, v_num=33]
Epoch 11:  23%|██▎       | 210/919 [00:50<02:49,  4.19it/s, loss=0.000, v_num=33]
Epoch 11:  23%|██▎       | 212/919 [00:50<02:47,  4.21it/s, loss=0.000, v_num=33]
Epoch 11:  23%|██▎       | 214/919 [00:50<02:46,  4.24it/s, loss=0.000, v_num=33]
Epoch 11:  24%|██▎       | 216/919 [00:50<02:44,  4.26it/s, loss=0.000, v_num=33]
Epoch 11:  24%|██▎       | 218/919 [00:50<02:43,  4.29it/s, loss=0.000, v_num=33]
Epoch 11:  24%|██▍       | 220/919 [00:51<02:42,  4.31it/s, loss=0.000, v_num=33]
Epoch 11:  24%|██▍       | 222/919 [00:51<02:40,  4.33it/s, loss

INFO:utils:***** Validation results *****
INFO:utils:avg_val_loss = tensor(0.1606, device='cuda:0')

INFO:utils:epoch = 9

INFO:utils:loss = tensor(0.0003, device='cuda:0')



Epoch 11:  47%|████▋     | 431/919 [01:10<01:19,  6.15it/s, loss=0.000, v_num=33]
Epoch 11:  69%|██████▊   | 631/919 [01:59<00:54,  5.29it/s, loss=0.000, v_num=33]
Epoch 11:  69%|██████▉   | 632/919 [01:59<00:54,  5.29it/s, loss=0.000, v_num=33]
Epoch 11:  69%|██████▉   | 634/919 [01:59<00:53,  5.30it/s, loss=0.000, v_num=33]
Epoch 11:  69%|██████▉   | 636/919 [01:59<00:53,  5.31it/s, loss=0.000, v_num=33]
Epoch 11:  69%|██████▉   | 638/919 [01:59<00:52,  5.32it/s, loss=0.000, v_num=33]
Epoch 11:  70%|██████▉   | 640/919 [02:00<00:52,  5.33it/s, loss=0.000, v_num=33]
Epoch 11:  70%|██████▉   | 642/919 [02:00<00:51,  5.34it/s, loss=0.000, v_num=33]
Epoch 11:  70%|███████   | 644/919 [02:00<00:51,  5.35it/s, loss=0.000, v_num=33]
Epoch 11:  70%|███████   | 646/919 [02:00<00:50,  5.35it/s, loss=0.000, v_num=33]
Epoch 11:  71%|███████   | 648/919 [02:00<00:50,  5.36it/s, loss=0.000, v_num=33]
Epoch 11:  71%|███████   | 650/919 [02:01<00:50,  5.37it/s, loss=0.000, v_num=33]
Epoch 11:  71%|█

INFO:utils:***** Validation results *****
INFO:utils:avg_val_loss = tensor(0.1628, device='cuda:0')

INFO:utils:epoch = 10

INFO:utils:loss = tensor(0.0330, device='cuda:0')



Epoch 11:  94%|█████████▍| 862/919 [02:20<00:09,  6.14it/s, loss=0.000, v_num=33]
Epoch 11: 100%|██████████| 919/919 [02:34<00:00,  5.95it/s, loss=0.000, v_num=33]

INFO:utils:Model checkpointed at step 198


Epoch 12:  22%|██▏       | 200/919 [00:49<02:56,  4.07it/s, loss=0.000, v_num=33]
Validating: 0it [00:00, ?it/s][A
Epoch 12:  22%|██▏       | 202/919 [00:49<02:54,  4.10it/s, loss=0.000, v_num=33]
Epoch 12:  22%|██▏       | 204/919 [00:49<02:53,  4.12it/s, loss=0.000, v_num=33]
Epoch 12:  22%|██▏       | 206/919 [00:49<02:51,  4.15it/s, loss=0.000, v_num=33]
Epoch 12:  23%|██▎       | 208/919 [00:49<02:50,  4.17it/s, loss=0.000, v_num=33]
Epoch 12:  23%|██▎       | 210/919 [00:50<02:48,  4.20it/s, loss=0.000, v_num=33]
Epoch 12:  23%|██▎       | 212/919 [00:50<02:47,  4.22it/s, loss=0.000, v_num=33]
Epoch 12:  23%|██▎       | 214/919 [00:50<02:45,  4.25it/s, loss=0.000, v_num=33]
Epoch 12:  24%|██▎       | 216/919 [00:50<02:44,  4.27it/s, loss=0.000, v_num=33]
Epoch 12:  24%|██▎       | 218/919 [00:50<02:43,  4.30it/s, loss=0.000, v_num=33]
Epoch 12:  24%|██▍       | 220/919 [00:50<02:41,  4.32it/s, loss=0.000, v_num=33]
Epoch 12:  24%|██▍       | 222/919 [00:51<02:40,  4.34it/s, loss

INFO:utils:***** Validation results *****
INFO:utils:avg_val_loss = tensor(0.1662, device='cuda:0')

INFO:utils:epoch = 10

INFO:utils:loss = tensor(0.0002, device='cuda:0')



Epoch 12:  47%|████▋     | 431/919 [01:09<01:19,  6.16it/s, loss=0.000, v_num=33]
Epoch 12:  69%|██████▊   | 631/919 [01:59<00:54,  5.29it/s, loss=0.000, v_num=33]
Epoch 12:  69%|██████▉   | 632/919 [01:59<00:54,  5.29it/s, loss=0.000, v_num=33]
Epoch 12:  69%|██████▉   | 634/919 [01:59<00:53,  5.30it/s, loss=0.000, v_num=33]
Epoch 12:  69%|██████▉   | 636/919 [01:59<00:53,  5.31it/s, loss=0.000, v_num=33]
Epoch 12:  69%|██████▉   | 638/919 [02:00<00:52,  5.32it/s, loss=0.000, v_num=33]
Epoch 12:  70%|██████▉   | 640/919 [02:00<00:52,  5.33it/s, loss=0.000, v_num=33]
Epoch 12:  70%|██████▉   | 642/919 [02:00<00:51,  5.33it/s, loss=0.000, v_num=33]
Epoch 12:  70%|███████   | 644/919 [02:00<00:51,  5.34it/s, loss=0.000, v_num=33]
Epoch 12:  70%|███████   | 646/919 [02:00<00:51,  5.35it/s, loss=0.000, v_num=33]
Epoch 12:  71%|███████   | 648/919 [02:00<00:50,  5.36it/s, loss=0.000, v_num=33]
Epoch 12:  71%|███████   | 650/919 [02:01<00:50,  5.37it/s, loss=0.000, v_num=33]
Epoch 12:  71%|█

INFO:utils:***** Validation results *****
INFO:utils:avg_val_loss = tensor(0.1690, device='cuda:0')

INFO:utils:epoch = 11

INFO:utils:loss = tensor(0.0004, device='cuda:0')



Epoch 12:  94%|█████████▍| 862/919 [02:20<00:09,  6.14it/s, loss=0.000, v_num=33]
Epoch 12: 100%|██████████| 919/919 [02:34<00:00,  5.95it/s, loss=0.000, v_num=33]

INFO:utils:Model checkpointed at step 216


Epoch 13:  22%|██▏       | 200/919 [00:49<02:56,  4.07it/s, loss=0.000, v_num=33]
Validating: 0it [00:00, ?it/s][A
Epoch 13:  22%|██▏       | 202/919 [00:49<02:55,  4.09it/s, loss=0.000, v_num=33]
Epoch 13:  22%|██▏       | 204/919 [00:49<02:53,  4.12it/s, loss=0.000, v_num=33]
Epoch 13:  22%|██▏       | 206/919 [00:49<02:52,  4.14it/s, loss=0.000, v_num=33]
Epoch 13:  23%|██▎       | 208/919 [00:49<02:50,  4.17it/s, loss=0.000, v_num=33]
Epoch 13:  23%|██▎       | 210/919 [00:50<02:49,  4.19it/s, loss=0.000, v_num=33]
Epoch 13:  23%|██▎       | 212/919 [00:50<02:47,  4.22it/s, loss=0.000, v_num=33]
Epoch 13:  23%|██▎       | 214/919 [00:50<02:46,  4.24it/s, loss=0.000, v_num=33]
Epoch 13:  24%|██▎       | 216/919 [00:50<02:44,  4.26it/s, loss=0.000, v_num=33]
Epoch 13:  24%|██▎       | 218/919 [00:50<02:43,  4.29it/s, loss=0.000, v_num=33]
Epoch 13:  24%|██▍       | 220/919 [00:51<02:42,  4.31it/s, loss=0.000, v_num=33]
Epoch 13:  24%|██▍       | 222/919 [00:51<02:40,  4.34it/s, loss

INFO:utils:***** Validation results *****
INFO:utils:avg_val_loss = tensor(0.1708, device='cuda:0')

INFO:utils:epoch = 11

INFO:utils:loss = tensor(0.0021, device='cuda:0')



Epoch 13:  47%|████▋     | 431/919 [01:10<01:19,  6.15it/s, loss=0.000, v_num=33]
Epoch 13:  69%|██████▊   | 631/919 [01:59<00:54,  5.29it/s, loss=0.000, v_num=33]
Epoch 13:  69%|██████▉   | 632/919 [01:59<00:54,  5.29it/s, loss=0.000, v_num=33]
Epoch 13:  69%|██████▉   | 634/919 [01:59<00:53,  5.30it/s, loss=0.000, v_num=33]
Epoch 13:  69%|██████▉   | 636/919 [01:59<00:53,  5.31it/s, loss=0.000, v_num=33]
Epoch 13:  69%|██████▉   | 638/919 [02:00<00:52,  5.32it/s, loss=0.000, v_num=33]
Epoch 13:  70%|██████▉   | 640/919 [02:00<00:52,  5.32it/s, loss=0.000, v_num=33]
Epoch 13:  70%|██████▉   | 642/919 [02:00<00:51,  5.33it/s, loss=0.000, v_num=33]
Epoch 13:  70%|███████   | 644/919 [02:00<00:51,  5.34it/s, loss=0.000, v_num=33]
Epoch 13:  70%|███████   | 646/919 [02:00<00:51,  5.35it/s, loss=0.000, v_num=33]
Epoch 13:  71%|███████   | 648/919 [02:00<00:50,  5.36it/s, loss=0.000, v_num=33]
Epoch 13:  71%|███████   | 650/919 [02:01<00:50,  5.37it/s, loss=0.000, v_num=33]
Epoch 13:  71%|█

INFO:utils:***** Validation results *****
INFO:utils:avg_val_loss = tensor(0.1715, device='cuda:0')

INFO:utils:epoch = 12

INFO:utils:loss = tensor(0.0002, device='cuda:0')



Epoch 13:  94%|█████████▍| 862/919 [02:20<00:09,  6.14it/s, loss=0.000, v_num=33]
Epoch 13: 100%|██████████| 919/919 [02:34<00:00,  5.95it/s, loss=0.000, v_num=33]

INFO:utils:Model checkpointed at step 234


Epoch 14:  22%|██▏       | 200/919 [00:49<02:56,  4.06it/s, loss=0.000, v_num=33]
Validating: 0it [00:00, ?it/s][A
Epoch 14:  22%|██▏       | 202/919 [00:49<02:55,  4.09it/s, loss=0.000, v_num=33]
Epoch 14:  22%|██▏       | 204/919 [00:49<02:53,  4.11it/s, loss=0.000, v_num=33]
Epoch 14:  22%|██▏       | 206/919 [00:49<02:52,  4.14it/s, loss=0.000, v_num=33]
Epoch 14:  23%|██▎       | 208/919 [00:49<02:50,  4.16it/s, loss=0.000, v_num=33]
Epoch 14:  23%|██▎       | 210/919 [00:50<02:49,  4.19it/s, loss=0.000, v_num=33]
Epoch 14:  23%|██▎       | 212/919 [00:50<02:47,  4.21it/s, loss=0.000, v_num=33]
Epoch 14:  23%|██▎       | 214/919 [00:50<02:46,  4.24it/s, loss=0.000, v_num=33]
Epoch 14:  24%|██▎       | 216/919 [00:50<02:44,  4.26it/s, loss=0.000, v_num=33]
Epoch 14:  24%|██▎       | 218/919 [00:50<02:43,  4.29it/s, loss=0.000, v_num=33]
Epoch 14:  24%|██▍       | 220/919 [00:51<02:42,  4.31it/s, loss=0.000, v_num=33]
Epoch 14:  24%|██▍       | 222/919 [00:51<02:40,  4.33it/s, loss

INFO:utils:***** Validation results *****
INFO:utils:avg_val_loss = tensor(0.1722, device='cuda:0')

INFO:utils:epoch = 12

INFO:utils:loss = tensor(0.0002, device='cuda:0')



Epoch 14:  47%|████▋     | 431/919 [01:10<01:19,  6.15it/s, loss=0.000, v_num=33]
Epoch 14:  69%|██████▊   | 631/919 [01:59<00:54,  5.28it/s, loss=0.000, v_num=33]
Epoch 14:  69%|██████▉   | 632/919 [01:59<00:54,  5.29it/s, loss=0.000, v_num=33]
Epoch 14:  69%|██████▉   | 634/919 [01:59<00:53,  5.30it/s, loss=0.000, v_num=33]
Epoch 14:  69%|██████▉   | 636/919 [01:59<00:53,  5.31it/s, loss=0.000, v_num=33]
Epoch 14:  69%|██████▉   | 638/919 [02:00<00:52,  5.31it/s, loss=0.000, v_num=33]
Epoch 14:  70%|██████▉   | 640/919 [02:00<00:52,  5.32it/s, loss=0.000, v_num=33]
Epoch 14:  70%|██████▉   | 642/919 [02:00<00:51,  5.33it/s, loss=0.000, v_num=33]
Epoch 14:  70%|███████   | 644/919 [02:00<00:51,  5.34it/s, loss=0.000, v_num=33]
Epoch 14:  70%|███████   | 646/919 [02:00<00:51,  5.35it/s, loss=0.000, v_num=33]
Epoch 14:  71%|███████   | 648/919 [02:00<00:50,  5.36it/s, loss=0.000, v_num=33]
Epoch 14:  71%|███████   | 650/919 [02:01<00:50,  5.37it/s, loss=0.000, v_num=33]
Epoch 14:  71%|█

INFO:utils:***** Validation results *****
INFO:utils:avg_val_loss = tensor(0.1746, device='cuda:0')

INFO:utils:epoch = 13

INFO:utils:loss = tensor(8.5159e-05, device='cuda:0')



Epoch 14:  94%|█████████▍| 862/919 [02:20<00:09,  6.14it/s, loss=0.000, v_num=33]
Epoch 14: 100%|██████████| 919/919 [02:34<00:00,  5.95it/s, loss=0.000, v_num=33]

INFO:utils:Model checkpointed at step 252


Epoch 15:  22%|██▏       | 200/919 [00:49<02:56,  4.07it/s, loss=0.000, v_num=33]
Validating: 0it [00:00, ?it/s][A
Epoch 15:  22%|██▏       | 202/919 [00:49<02:55,  4.09it/s, loss=0.000, v_num=33]
Epoch 15:  22%|██▏       | 204/919 [00:49<02:53,  4.12it/s, loss=0.000, v_num=33]
Epoch 15:  22%|██▏       | 206/919 [00:49<02:52,  4.14it/s, loss=0.000, v_num=33]
Epoch 15:  23%|██▎       | 208/919 [00:49<02:50,  4.17it/s, loss=0.000, v_num=33]
Epoch 15:  23%|██▎       | 210/919 [00:50<02:49,  4.19it/s, loss=0.000, v_num=33]
Epoch 15:  23%|██▎       | 212/919 [00:50<02:47,  4.22it/s, loss=0.000, v_num=33]
Epoch 15:  23%|██▎       | 214/919 [00:50<02:46,  4.24it/s, loss=0.000, v_num=33]
Epoch 15:  24%|██▎       | 216/919 [00:50<02:44,  4.27it/s, loss=0.000, v_num=33]
Epoch 15:  24%|██▎       | 218/919 [00:50<02:43,  4.29it/s, loss=0.000, v_num=33]
Epoch 15:  24%|██▍       | 220/919 [00:50<02:42,  4.31it/s, loss=0.000, v_num=33]
Epoch 15:  24%|██▍       | 222/919 [00:51<02:40,  4.34it/s, loss

INFO:utils:***** Validation results *****
INFO:utils:avg_val_loss = tensor(0.1763, device='cuda:0')

INFO:utils:epoch = 13

INFO:utils:loss = tensor(0.0001, device='cuda:0')



Epoch 15:  47%|████▋     | 431/919 [01:10<01:19,  6.15it/s, loss=0.000, v_num=33]
Epoch 15:  69%|██████▊   | 631/919 [01:59<00:54,  5.29it/s, loss=0.000, v_num=33]
Epoch 15:  69%|██████▉   | 632/919 [01:59<00:54,  5.29it/s, loss=0.000, v_num=33]
Epoch 15:  69%|██████▉   | 634/919 [01:59<00:53,  5.30it/s, loss=0.000, v_num=33]
Epoch 15:  69%|██████▉   | 636/919 [01:59<00:53,  5.31it/s, loss=0.000, v_num=33]
Epoch 15:  69%|██████▉   | 638/919 [01:59<00:52,  5.32it/s, loss=0.000, v_num=33]
Epoch 15:  70%|██████▉   | 640/919 [02:00<00:52,  5.33it/s, loss=0.000, v_num=33]
Epoch 15:  70%|██████▉   | 642/919 [02:00<00:51,  5.33it/s, loss=0.000, v_num=33]
Epoch 15:  70%|███████   | 644/919 [02:00<00:51,  5.34it/s, loss=0.000, v_num=33]
Epoch 15:  70%|███████   | 646/919 [02:00<00:51,  5.35it/s, loss=0.000, v_num=33]
Epoch 15:  71%|███████   | 648/919 [02:00<00:50,  5.36it/s, loss=0.000, v_num=33]
Epoch 15:  71%|███████   | 650/919 [02:01<00:50,  5.37it/s, loss=0.000, v_num=33]
Epoch 15:  71%|█

INFO:utils:***** Validation results *****
INFO:utils:avg_val_loss = tensor(0.1779, device='cuda:0')

INFO:utils:epoch = 14

INFO:utils:loss = tensor(0.0003, device='cuda:0')



Epoch 15:  94%|█████████▍| 862/919 [02:20<00:09,  6.14it/s, loss=0.000, v_num=33]
Epoch 15: 100%|██████████| 919/919 [02:34<00:00,  5.95it/s, loss=0.000, v_num=33]

INFO:utils:Model checkpointed at step 270


Epoch 16:  22%|██▏       | 200/919 [00:49<02:56,  4.07it/s, loss=0.000, v_num=33]
Validating: 0it [00:00, ?it/s][A
Epoch 16:  22%|██▏       | 202/919 [00:49<02:55,  4.09it/s, loss=0.000, v_num=33]
Epoch 16:  22%|██▏       | 204/919 [00:49<02:53,  4.12it/s, loss=0.000, v_num=33]
Epoch 16:  22%|██▏       | 206/919 [00:49<02:52,  4.14it/s, loss=0.000, v_num=33]
Epoch 16:  23%|██▎       | 208/919 [00:49<02:50,  4.17it/s, loss=0.000, v_num=33]
Epoch 16:  23%|██▎       | 210/919 [00:50<02:49,  4.19it/s, loss=0.000, v_num=33]
Epoch 16:  23%|██▎       | 212/919 [00:50<02:47,  4.22it/s, loss=0.000, v_num=33]
Epoch 16:  23%|██▎       | 214/919 [00:50<02:46,  4.24it/s, loss=0.000, v_num=33]
Epoch 16:  24%|██▎       | 216/919 [00:50<02:44,  4.27it/s, loss=0.000, v_num=33]
Epoch 16:  24%|██▎       | 218/919 [00:50<02:43,  4.29it/s, loss=0.000, v_num=33]
Epoch 16:  24%|██▍       | 220/919 [00:51<02:42,  4.31it/s, loss=0.000, v_num=33]
Epoch 16:  24%|██▍       | 222/919 [00:51<02:40,  4.34it/s, loss

INFO:utils:***** Validation results *****
INFO:utils:avg_val_loss = tensor(0.1790, device='cuda:0')

INFO:utils:epoch = 14

INFO:utils:loss = tensor(0.0004, device='cuda:0')



Epoch 16:  47%|████▋     | 431/919 [01:10<01:19,  6.14it/s, loss=0.000, v_num=33]
Epoch 16:  69%|██████▊   | 631/919 [01:59<00:54,  5.28it/s, loss=0.000, v_num=33]
Epoch 16:  69%|██████▉   | 632/919 [01:59<00:54,  5.29it/s, loss=0.000, v_num=33]
Epoch 16:  69%|██████▉   | 634/919 [01:59<00:53,  5.30it/s, loss=0.000, v_num=33]
Epoch 16:  69%|██████▉   | 636/919 [01:59<00:53,  5.31it/s, loss=0.000, v_num=33]
Epoch 16:  69%|██████▉   | 638/919 [02:00<00:52,  5.31it/s, loss=0.000, v_num=33]
Epoch 16:  70%|██████▉   | 640/919 [02:00<00:52,  5.32it/s, loss=0.000, v_num=33]
Epoch 16:  70%|██████▉   | 642/919 [02:00<00:51,  5.33it/s, loss=0.000, v_num=33]
Epoch 16:  70%|███████   | 644/919 [02:00<00:51,  5.34it/s, loss=0.000, v_num=33]
Epoch 16:  70%|███████   | 646/919 [02:00<00:51,  5.35it/s, loss=0.000, v_num=33]
Epoch 16:  71%|███████   | 648/919 [02:00<00:50,  5.36it/s, loss=0.000, v_num=33]
Epoch 16:  71%|███████   | 650/919 [02:01<00:50,  5.37it/s, loss=0.000, v_num=33]
Epoch 16:  71%|█

INFO:utils:***** Validation results *****
INFO:utils:avg_val_loss = tensor(0.1794, device='cuda:0')

INFO:utils:epoch = 15

INFO:utils:loss = tensor(0.0004, device='cuda:0')



Epoch 16:  94%|█████████▍| 862/919 [02:20<00:09,  6.14it/s, loss=0.000, v_num=33]
Epoch 16: 100%|██████████| 919/919 [02:34<00:00,  5.95it/s, loss=0.000, v_num=33]

INFO:utils:Model checkpointed at step 288


Epoch 16: 100%|██████████| 919/919 [02:40<00:00,  5.73it/s, loss=0.000, v_num=33]
