In [1]:
!pip install einops

%load_ext autoreload
%autoreload 2

import os
import sys
import glob
import random
import zipfile

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
from PIL import Image
from torch.utils.data import DataLoader, Dataset
import torchvision as tv
from tqdm.notebook import tqdm

Looking in indexes: https://pypi.org/simple, https://pip.repos.neuron.amazonaws.com


In [11]:
root_dir = '/home/ubuntu'
data_dir = os.path.join(root_dir, 'CheXpert-v1.0-small')
vit_dir = os.path.join(root_dir, 'vit-pytorch')
os.chdir(vit_dir)
print(f'\nChanged CWD to "{vit_dir}"')
sys.path.append('./vit_pytorch')
from vit_pytorch import ViT


Changed CWD to "/home/ubuntu/vit-pytorch"


In [10]:
print(sys.path)

['/home/ubuntu/vit-pytorch', '/home/ubuntu/anaconda3/envs/pytorch_latest_p37/lib/python37.zip', '/home/ubuntu/anaconda3/envs/pytorch_latest_p37/lib/python3.7', '/home/ubuntu/anaconda3/envs/pytorch_latest_p37/lib/python3.7/lib-dynload', '', '/home/ubuntu/anaconda3/envs/pytorch_latest_p37/lib/python3.7/site-packages', '/home/ubuntu/anaconda3/envs/pytorch_latest_p37/lib/python3.7/site-packages/locket-0.2.1-py3.7.egg', '/home/ubuntu/anaconda3/envs/pytorch_latest_p37/lib/python3.7/site-packages/IPython/extensions', '/home/ubuntu/.ipython', './vit_pytorch', './chexpert']


In [12]:
import pandas as pd
from PIL import Image


USE_GPU = torch.cuda.is_available()
device = 'cuda' if USE_GPU else 'cpu'

long_type = torch.cuda.LongTensor if USE_GPU else torch.long

# def read_csv(path):
#   with open(path) as f:
#     df = pd.read_csv(f)
#   return df

class CxrDataset(Dataset):
  def __init__(self,
               root_dir,
               csv_path,
               transforms):
    """
    Args:
      project_root_dir: Absolute path to the project directory.
      csv_path: Relative path to data CSV from project_root_dir.
    """
    self.root_dir = root_dir
    raw_df = pd.read_csv(os.path.join(root_dir, csv_path))
    self.df = raw_df.replace(to_replace=-1, value=1).fillna(0)
    self.transforms = transforms

  def __len__(self):
    # return len(df)
    return 64

  def __getitem__(self, idx):
    row = self.df.iloc[idx]
    image = Image.open(os.path.join(root_dir, row.Path))
    image = self.transforms(image)
    label = row['No Finding': 'Support Devices'].values.astype(np.float32)
    # TODO(sahark): Make multi-label, multi-class
    # one_hot_label = F.one_hot(label, num_classes=2).type(torch.float32)
    # torch.FloatTensor(label)
    return image, torch.from_numpy(label)

config = {
    'image_size': 256,
    'num_classes': 14,
}

train_transforms = tv.transforms.Compose(
    [
        tv.transforms.Resize((config['image_size'], config['image_size'])),
        tv.transforms.RandomHorizontalFlip(),
        tv.transforms.ToTensor(),
    ]
)

tune_csv_path = 'CheXpert-v1.0-small/valid.csv'
train_csv_path = 'CheXpert-v1.0-small/train.csv'
train_dataset = CxrDataset(root_dir, train_csv_path, train_transforms)
tune_dataset = CxrDataset(root_dir, tune_csv_path, train_transforms)    

In [13]:
vit_model = ViT(
  image_size = 256,
  patch_size = 16,
  num_classes = config['num_classes'],
  dim = 128,
  depth = 4,
  heads = 8,
  channels = 1,
  mlp_dim = 3,
)

class NeuralNetwork(nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(256*256, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, config['num_classes']),
        )

    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

vanilla_model = NeuralNetwork()

In [16]:
lr = 3e-5
gamma = 0.7
# https://drive.google.com/corp/drive/u/0/folders/1N5Iir5AEKZO2Lev4nOg0qi96kK9QQrKW
import math

class Trainer():
  def __init__(self,
               model,
               train_dataset,
               tune_dataset,
               **kwargs):
    self.batch_size = kwargs.pop("batch_size", 16)
    self.num_epochs = kwargs.pop("num_epochs", 1)
    self.lr = kwargs.pop("lr")
    self.device = kwargs.pop("device")

    self.train_loader = DataLoader(train_dataset, self.batch_size, shuffle=True)
    self.tune_loader = DataLoader(tune_dataset, self.batch_size * 2)
    self.model = model.to(self.device)

    self.optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    self.scheduler = torch.optim.lr_scheduler.StepLR(
        self.optimizer, step_size=1, gamma=kwargs.pop("gamma"))
    self.loss_fn = torch.nn.BCEWithLogitsLoss()


  def _step(self, image, label):
    image = image.to(self.device)
    label = label.to(self.device)

    model = self.model.to(device)
    predictions = model(image)
    loss = self.loss_fn(predictions, label)

    self.optimizer.zero_grad()
    loss.backward()
    self.optimizer.step()

  def eval_epoch(self):
    eval_loss = 0
    with torch.no_grad():
      for image, label in self.tune_loader:
        image = image.to(device)
        label = label.to(device)
        
        predictions = self.model(image)
        eval_loss += self.loss_fn(predictions, label)
    eval_loss /= self.tune_loader.__len__()
    return eval_loss
    
      
  def train(self):
    for epoch in range(self.num_epochs):
      for image, label in tqdm(self.train_loader):
        self._step(image, label)
      eval_loss = self.eval_epoch()
      print(f'epoch {epoch} eval_loss {eval_loss:.4f}')


trainer = Trainer(
    model=vit_model,
    train_dataset=train_dataset,
    tune_dataset=tune_dataset,
    num_epochs=500,
    batch_size=64,
    lr=lr,
    gamma=gamma,
    device=device,
)
trainer.train()

  0%|          | 0/1 [00:00<?, ?it/s]

epoch 0 eval_loss 0.4072


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 1 eval_loss 0.4070


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 2 eval_loss 0.4065


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 3 eval_loss 0.4059


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 4 eval_loss 0.4057


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 5 eval_loss 0.4057


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 6 eval_loss 0.4059


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 7 eval_loss 0.4062


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 8 eval_loss 0.4064


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 9 eval_loss 0.4064


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 10 eval_loss 0.4064


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 11 eval_loss 0.4062


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 12 eval_loss 0.4059


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 13 eval_loss 0.4058


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 14 eval_loss 0.4058


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 15 eval_loss 0.4058


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 16 eval_loss 0.4060


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 17 eval_loss 0.4061


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 18 eval_loss 0.4061


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 19 eval_loss 0.4061


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 20 eval_loss 0.4060


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 21 eval_loss 0.4059


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 22 eval_loss 0.4059


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 23 eval_loss 0.4058


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 24 eval_loss 0.4059


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 25 eval_loss 0.4059


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 26 eval_loss 0.4059


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 27 eval_loss 0.4059


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 28 eval_loss 0.4059


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 29 eval_loss 0.4059


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 30 eval_loss 0.4059


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 31 eval_loss 0.4059


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 32 eval_loss 0.4059


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 33 eval_loss 0.4059


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 34 eval_loss 0.4059


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 35 eval_loss 0.4059


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 36 eval_loss 0.4058


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 37 eval_loss 0.4058


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 38 eval_loss 0.4058


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 39 eval_loss 0.4058


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 40 eval_loss 0.4059


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 41 eval_loss 0.4059


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 42 eval_loss 0.4058


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 43 eval_loss 0.4058


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 44 eval_loss 0.4058


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 45 eval_loss 0.4057


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 46 eval_loss 0.4058


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 47 eval_loss 0.4058


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 48 eval_loss 0.4058


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 49 eval_loss 0.4058


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 50 eval_loss 0.4058


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 51 eval_loss 0.4058


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 52 eval_loss 0.4057


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 53 eval_loss 0.4057


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 54 eval_loss 0.4057


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 55 eval_loss 0.4057


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 56 eval_loss 0.4057


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 57 eval_loss 0.4056


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 58 eval_loss 0.4055


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 59 eval_loss 0.4056


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 60 eval_loss 0.4056


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 61 eval_loss 0.4055


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 62 eval_loss 0.4054


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 63 eval_loss 0.4054


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 64 eval_loss 0.4055


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 65 eval_loss 0.4055


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 66 eval_loss 0.4054


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 67 eval_loss 0.4053


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 68 eval_loss 0.4051


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 69 eval_loss 0.4052


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 70 eval_loss 0.4053


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 71 eval_loss 0.4051


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 72 eval_loss 0.4050


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 73 eval_loss 0.4049


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 74 eval_loss 0.4049


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 75 eval_loss 0.4049


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 76 eval_loss 0.4048


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 77 eval_loss 0.4045


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 78 eval_loss 0.4044


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 79 eval_loss 0.4047


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 80 eval_loss 0.4043


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 81 eval_loss 0.4041


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 82 eval_loss 0.4038


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 83 eval_loss 0.4037


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 84 eval_loss 0.4035


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 85 eval_loss 0.4037


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 86 eval_loss 0.4030


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 87 eval_loss 0.4048


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 88 eval_loss 0.4034


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 89 eval_loss 0.4034


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 90 eval_loss 0.4040


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 91 eval_loss 0.4030


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 92 eval_loss 0.4037


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 93 eval_loss 0.4042


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 94 eval_loss 0.4043


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 95 eval_loss 0.4021


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 96 eval_loss 0.4051


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 97 eval_loss 0.4050


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 98 eval_loss 0.4060


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 99 eval_loss 0.4083


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 100 eval_loss 0.4080


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 101 eval_loss 0.4037


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 102 eval_loss 0.4164


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 103 eval_loss 0.4101


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 104 eval_loss 0.4098


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 105 eval_loss 0.4082


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 106 eval_loss 0.4157


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 107 eval_loss 0.4104


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 108 eval_loss 0.4069


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 109 eval_loss 0.4084


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 110 eval_loss 0.4126


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 111 eval_loss 0.4176


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 112 eval_loss 0.4124


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 113 eval_loss 0.4104


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 114 eval_loss 0.4104


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 115 eval_loss 0.4151


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 116 eval_loss 0.4171


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 117 eval_loss 0.4098


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 118 eval_loss 0.4144


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 119 eval_loss 0.4123


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 120 eval_loss 0.4187


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 121 eval_loss 0.4161


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 122 eval_loss 0.4105


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 123 eval_loss 0.4166


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 124 eval_loss 0.4153


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 125 eval_loss 0.4215


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 126 eval_loss 0.4174


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 127 eval_loss 0.4136


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 128 eval_loss 0.4199


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 129 eval_loss 0.4171


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 130 eval_loss 0.4218


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 131 eval_loss 0.4185


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 132 eval_loss 0.4182


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 133 eval_loss 0.4160


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 134 eval_loss 0.4180


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 135 eval_loss 0.4193


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 136 eval_loss 0.4170


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 137 eval_loss 0.4110


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 138 eval_loss 0.4150


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 139 eval_loss 0.4206


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 140 eval_loss 0.4181


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 141 eval_loss 0.4185


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 142 eval_loss 0.4195


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 143 eval_loss 0.4204


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 144 eval_loss 0.4184


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 145 eval_loss 0.4192


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 146 eval_loss 0.4156


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 147 eval_loss 0.4182


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 148 eval_loss 0.4132


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 149 eval_loss 0.4189


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 150 eval_loss 0.4222


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 151 eval_loss 0.4196


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 152 eval_loss 0.4210


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 153 eval_loss 0.4197


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 154 eval_loss 0.4268


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 155 eval_loss 0.4164


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 156 eval_loss 0.4130


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 157 eval_loss 0.4223


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 158 eval_loss 0.4127


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 159 eval_loss 0.4116


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 160 eval_loss 0.4151


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 161 eval_loss 0.4202


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 162 eval_loss 0.4153


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 163 eval_loss 0.4205


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 164 eval_loss 0.4211


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 165 eval_loss 0.4213


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 166 eval_loss 0.4213


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 167 eval_loss 0.4170


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 168 eval_loss 0.4122


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 169 eval_loss 0.4198


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 170 eval_loss 0.4228


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 171 eval_loss 0.4246


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 172 eval_loss 0.4174


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 173 eval_loss 0.4229


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 174 eval_loss 0.4221


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 175 eval_loss 0.4194


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 176 eval_loss 0.4276


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 177 eval_loss 0.4238


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 178 eval_loss 0.4130


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 179 eval_loss 0.4221


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 180 eval_loss 0.4268


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 181 eval_loss 0.4193


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 182 eval_loss 0.4213


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 183 eval_loss 0.4208


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 184 eval_loss 0.4204


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 185 eval_loss 0.4213


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 186 eval_loss 0.4235


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 187 eval_loss 0.4219


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 188 eval_loss 0.4216


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 189 eval_loss 0.4224


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 190 eval_loss 0.4257


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 191 eval_loss 0.4212


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 192 eval_loss 0.4209


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 193 eval_loss 0.4188


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 194 eval_loss 0.4253


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 195 eval_loss 0.4198


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 196 eval_loss 0.4177


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 197 eval_loss 0.4157


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 198 eval_loss 0.4225


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 199 eval_loss 0.4284


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 200 eval_loss 0.4244


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 201 eval_loss 0.4196


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 202 eval_loss 0.4262


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 203 eval_loss 0.4232


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 204 eval_loss 0.4297


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 205 eval_loss 0.4145


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 206 eval_loss 0.4185


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 207 eval_loss 0.4316


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 208 eval_loss 0.4240


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 209 eval_loss 0.4172


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 210 eval_loss 0.4259


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 211 eval_loss 0.4170


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 212 eval_loss 0.4137


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 213 eval_loss 0.4269


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 214 eval_loss 0.4182


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 215 eval_loss 0.4179


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 216 eval_loss 0.4214


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 217 eval_loss 0.4294


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 218 eval_loss 0.4219


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 219 eval_loss 0.4159


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 220 eval_loss 0.4240


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 221 eval_loss 0.4181


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 222 eval_loss 0.4112


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 223 eval_loss 0.4227


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 224 eval_loss 0.4285


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 225 eval_loss 0.4168


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 226 eval_loss 0.4135


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 227 eval_loss 0.4242


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 228 eval_loss 0.4291


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 229 eval_loss 0.4196


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 230 eval_loss 0.4230


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 231 eval_loss 0.4264


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 232 eval_loss 0.4234


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 233 eval_loss 0.4217


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 234 eval_loss 0.4186


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 235 eval_loss 0.4277


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 236 eval_loss 0.4204


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 237 eval_loss 0.4195


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 238 eval_loss 0.4205


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 239 eval_loss 0.4346


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 240 eval_loss 0.4280


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 241 eval_loss 0.4236


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 242 eval_loss 0.4267


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 243 eval_loss 0.4204


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 244 eval_loss 0.4274


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 245 eval_loss 0.4273


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 246 eval_loss 0.4269


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 247 eval_loss 0.4175


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 248 eval_loss 0.4201


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 249 eval_loss 0.4313


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 250 eval_loss 0.4244


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 251 eval_loss 0.4278


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 252 eval_loss 0.4271


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 253 eval_loss 0.4266


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 254 eval_loss 0.4237


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 255 eval_loss 0.4270


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 256 eval_loss 0.4225


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 257 eval_loss 0.4229


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 258 eval_loss 0.4323


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 259 eval_loss 0.4246


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 260 eval_loss 0.4174


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 261 eval_loss 0.4259


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 262 eval_loss 0.4267


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 263 eval_loss 0.4268


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 264 eval_loss 0.4263


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 265 eval_loss 0.4190


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 266 eval_loss 0.4264


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 267 eval_loss 0.4329


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 268 eval_loss 0.4219


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 269 eval_loss 0.4236


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 270 eval_loss 0.4283


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 271 eval_loss 0.4254


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 272 eval_loss 0.4246


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 273 eval_loss 0.4281


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 274 eval_loss 0.4181


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 275 eval_loss 0.4274


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 276 eval_loss 0.4263


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 277 eval_loss 0.4304


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 278 eval_loss 0.4286


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 279 eval_loss 0.4263


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 280 eval_loss 0.4265


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 281 eval_loss 0.4315


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 282 eval_loss 0.4293


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 283 eval_loss 0.4248


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 284 eval_loss 0.4208


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 285 eval_loss 0.4247


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 286 eval_loss 0.4180


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 287 eval_loss 0.4235


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 288 eval_loss 0.4293


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 289 eval_loss 0.4165


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 290 eval_loss 0.4220


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 291 eval_loss 0.4350


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 292 eval_loss 0.4197


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 293 eval_loss 0.4194


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 294 eval_loss 0.4317


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 295 eval_loss 0.4200


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 296 eval_loss 0.4243


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 297 eval_loss 0.4328


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 298 eval_loss 0.4251


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 299 eval_loss 0.4161


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 300 eval_loss 0.4272


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 301 eval_loss 0.4160


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 302 eval_loss 0.4238


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 303 eval_loss 0.4292


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 304 eval_loss 0.4299


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 305 eval_loss 0.4289


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 306 eval_loss 0.4277


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 307 eval_loss 0.4183


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 308 eval_loss 0.4198


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 309 eval_loss 0.4293


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 310 eval_loss 0.4288


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 311 eval_loss 0.4271


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 312 eval_loss 0.4281


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 313 eval_loss 0.4232


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 314 eval_loss 0.4231


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 315 eval_loss 0.4336


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 316 eval_loss 0.4367


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 317 eval_loss 0.4221


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 318 eval_loss 0.4172


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 319 eval_loss 0.4306


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 320 eval_loss 0.4276


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 321 eval_loss 0.4224


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 322 eval_loss 0.4280


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 323 eval_loss 0.4290


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 324 eval_loss 0.4324


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 325 eval_loss 0.4345


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 326 eval_loss 0.4275


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 327 eval_loss 0.4256


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 328 eval_loss 0.4282


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 329 eval_loss 0.4266


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 330 eval_loss 0.4290


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 331 eval_loss 0.4251


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 332 eval_loss 0.4281


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 333 eval_loss 0.4247


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 334 eval_loss 0.4224


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 335 eval_loss 0.4242


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 336 eval_loss 0.4329


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 337 eval_loss 0.4235


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 338 eval_loss 0.4234


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 339 eval_loss 0.4301


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 340 eval_loss 0.4213


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 341 eval_loss 0.4280


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 342 eval_loss 0.4314


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 343 eval_loss 0.4273


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 344 eval_loss 0.4335


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 345 eval_loss 0.4276


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 346 eval_loss 0.4261


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 347 eval_loss 0.4312


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 348 eval_loss 0.4267


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 349 eval_loss 0.4271


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 350 eval_loss 0.4225


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 351 eval_loss 0.4224


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 352 eval_loss 0.4261


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 353 eval_loss 0.4329


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 354 eval_loss 0.4268


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 355 eval_loss 0.4297


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 356 eval_loss 0.4294


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 357 eval_loss 0.4184


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 358 eval_loss 0.4217


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 359 eval_loss 0.4396


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 360 eval_loss 0.4188


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 361 eval_loss 0.4164


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 362 eval_loss 0.4181


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 363 eval_loss 0.4361


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 364 eval_loss 0.4253


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 365 eval_loss 0.4223


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 366 eval_loss 0.4314


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 367 eval_loss 0.4210


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 368 eval_loss 0.4162


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 369 eval_loss 0.4238


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 370 eval_loss 0.4378


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 371 eval_loss 0.4272


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 372 eval_loss 0.4201


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 373 eval_loss 0.4240


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 374 eval_loss 0.4397


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 375 eval_loss 0.4282


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 376 eval_loss 0.4290


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 377 eval_loss 0.4361


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 378 eval_loss 0.4235


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 379 eval_loss 0.4275


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 380 eval_loss 0.4322


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 381 eval_loss 0.4301


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 382 eval_loss 0.4201


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 383 eval_loss 0.4271


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 384 eval_loss 0.4333


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 385 eval_loss 0.4303


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 386 eval_loss 0.4305


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 387 eval_loss 0.4302


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 388 eval_loss 0.4304


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 389 eval_loss 0.4285


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 390 eval_loss 0.4396


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 391 eval_loss 0.4269


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 392 eval_loss 0.4304


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 393 eval_loss 0.4357


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 394 eval_loss 0.4266


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 395 eval_loss 0.4337


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 396 eval_loss 0.4266


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 397 eval_loss 0.4265


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 398 eval_loss 0.4233


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 399 eval_loss 0.4255


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 400 eval_loss 0.4301


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 401 eval_loss 0.4323


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 402 eval_loss 0.4283


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 403 eval_loss 0.4280


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 404 eval_loss 0.4359


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 405 eval_loss 0.4346


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 406 eval_loss 0.4324


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 407 eval_loss 0.4333


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 408 eval_loss 0.4311


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 409 eval_loss 0.4336


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 410 eval_loss 0.4344


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 411 eval_loss 0.4335


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 412 eval_loss 0.4303


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 413 eval_loss 0.4251


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 414 eval_loss 0.4334


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 415 eval_loss 0.4304


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 416 eval_loss 0.4293


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 417 eval_loss 0.4281


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 418 eval_loss 0.4327


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 419 eval_loss 0.4302


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 420 eval_loss 0.4264


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 421 eval_loss 0.4312


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 422 eval_loss 0.4329


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 423 eval_loss 0.4264


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 424 eval_loss 0.4413


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 425 eval_loss 0.4341


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 426 eval_loss 0.4258


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 427 eval_loss 0.4389


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 428 eval_loss 0.4234


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 429 eval_loss 0.4257


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 430 eval_loss 0.4375


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 431 eval_loss 0.4338


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 432 eval_loss 0.4273


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 433 eval_loss 0.4372


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 434 eval_loss 0.4336


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 435 eval_loss 0.4311


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 436 eval_loss 0.4318


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 437 eval_loss 0.4266


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 438 eval_loss 0.4357


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 439 eval_loss 0.4331


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 440 eval_loss 0.4346


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 441 eval_loss 0.4245


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 442 eval_loss 0.4248


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 443 eval_loss 0.4364


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 444 eval_loss 0.4494


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 445 eval_loss 0.4286


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 446 eval_loss 0.4286


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 447 eval_loss 0.4243


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 448 eval_loss 0.4279


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 449 eval_loss 0.4359


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 450 eval_loss 0.4320


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 451 eval_loss 0.4298


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 452 eval_loss 0.4387


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 453 eval_loss 0.4258


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 454 eval_loss 0.4280


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 455 eval_loss 0.4319


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 456 eval_loss 0.4401


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 457 eval_loss 0.4494


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 458 eval_loss 0.4374


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 459 eval_loss 0.4250


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 460 eval_loss 0.4293


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 461 eval_loss 0.4267


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 462 eval_loss 0.4425


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 463 eval_loss 0.4392


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 464 eval_loss 0.4292


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 465 eval_loss 0.4307


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 466 eval_loss 0.4310


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 467 eval_loss 0.4399


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 468 eval_loss 0.4365


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 469 eval_loss 0.4317


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 470 eval_loss 0.4287


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 471 eval_loss 0.4361


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 472 eval_loss 0.4328


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 473 eval_loss 0.4216


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 474 eval_loss 0.4339


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 475 eval_loss 0.4398


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 476 eval_loss 0.4316


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 477 eval_loss 0.4350


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 478 eval_loss 0.4339


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 479 eval_loss 0.4249


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 480 eval_loss 0.4402


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 481 eval_loss 0.4343


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 482 eval_loss 0.4272


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 483 eval_loss 0.4350


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 484 eval_loss 0.4296


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 485 eval_loss 0.4357


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 486 eval_loss 0.4349


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 487 eval_loss 0.4309


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 488 eval_loss 0.4330


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 489 eval_loss 0.4385


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 490 eval_loss 0.4372


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 491 eval_loss 0.4332


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 492 eval_loss 0.4338


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 493 eval_loss 0.4420


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 494 eval_loss 0.4408


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 495 eval_loss 0.4210


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 496 eval_loss 0.4406


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 497 eval_loss 0.4316


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 498 eval_loss 0.4374


  0%|          | 0/1 [00:00<?, ?it/s]

epoch 499 eval_loss 0.4355
