In [1]:
import os
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
import matplotlib.pyplot as plt

import torch
from torch import nn, optim
from torch.nn import functional as F
from torch.utils.data import Dataset, DataLoader

import torchmetrics
from torchmetrics.functional import accuracy

import pytorch_lightning as pl
from pytorch_lightning.loggers import TensorBoardLogger
from pytorch_lightning.callbacks import ModelCheckpoint

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


## Model Setup 

In [2]:
pl.seed_everything(42, workers=True)

class DrowsyDataset(Dataset):
    
    def __init__(self, sequences):
        self.sequences = sequences
        
    def __len__(self):
        return len(self.sequences)
    
    def __getitem__(self, idx):
        sequence, label = self.sequences[idx]
        return dict(
            sequence=torch.Tensor(sequence.to_numpy()),
            label=torch.tensor(label).long()
        )
    
class DrowsyDataModule(pl.LightningDataModule):
    
    def __init__(self, train_sequences, val_sequences, test_sequences, batch_size):
        super().__init__()
        self.train_sequences = train_sequences
        self.val_sequences = val_sequences
        self.test_sequences = test_sequences
        self.batch_size = batch_size

    def setup(self, stage=None):
        self.train_dataset = DrowsyDataset(self.train_sequences)
        self.val_dataset = DrowsyDataset(self.val_sequences)
        self.test_dataset = DrowsyDataset(self.test_sequences)
        
    def train_dataloader(self):
        return DataLoader(
            self.train_dataset,
            batch_size=self.batch_size,
            shuffle=True,
            num_workers=os.cpu_count()
        )
    
    def val_dataloader(self):
        return DataLoader(
            self.val_dataset,
            batch_size=self.batch_size,
            shuffle=False,
            num_workers=os.cpu_count()
        )
    
    def test_dataloader(self):
        return DataLoader(
            self.test_dataset,
            batch_size=self.batch_size,
            shuffle=False,
            num_workers=os.cpu_count()
        ) 

class DrowsyModel(nn.Module):
    
    def __init__(self, n_features, n_classes, n_hidden=256, n_layers=3):
        super().__init__()
        
        self.n_hidden = n_hidden
        
        self.lstm = nn.LSTM(
            input_size=n_features,
            hidden_size=n_hidden,
            num_layers=n_layers,
            batch_first=True,
            dropout=0.75
        )
        
        self.classifier = nn.Linear(n_hidden, n_classes)
        
    def forward(self, x):
        self.lstm.flatten_parameters()
        _, (hidden, _) = self.lstm(x)
        
        out = hidden[-1]
        return self.classifier(out)
    
class DrowsyPredictor(pl.LightningModule):
    
    def __init__(self, n_features: int, n_classes: int):
        super().__init__()
        self.model = DrowsyModel(n_features, n_classes)
        self.criterion = nn.CrossEntropyLoss()
        
    def forward(self, x, label=None):
        output = self.model(x)
        loss = 0
        if label is not None:
            loss = self.criterion(output, label)
        return loss, output
        
    def training_step(self, batch, batch_idx):
        sequences = batch["sequence"]
        labels = batch["label"]
        loss, outputs = self(sequences, labels)
        predictions = torch.argmax(outputs, dim=1)
        step_accuracy = accuracy(predictions, labels)
        
        self.log("train_loss", loss, prog_bar=True, logger=True)
        self.log("train_accuracy", step_accuracy, prog_bar=True, logger=True)
        
        return {"loss": loss, "accuracy": step_accuracy}
    
    def validation_step(self, batch, batch_idx):
        sequences = batch["sequence"]
        labels = batch["label"]
        loss, outputs = self(sequences, labels)
        predictions = torch.argmax(outputs, dim=1)
        step_accuracy = accuracy(predictions, labels)
        
        self.log("val_loss", loss, prog_bar=True, logger=True)
        self.log("val_accuracy", step_accuracy, prog_bar=True, logger=True)
        
        return {"loss": loss, "accuracy": step_accuracy}
    
    def test_step(self, batch, batch_idx):
        sequences = batch["sequence"]
        labels = batch["label"]
        loss, outputs = self(sequences, labels)
        predictions = torch.argmax(outputs, dim=1)
        step_accuracy = accuracy(predictions, labels)
        
        self.log("test_loss", loss, prog_bar=True, logger=True)
        self.log("test_accuracy", step_accuracy, prog_bar=True, logger=True)
        
        return {"loss": loss, "accuracy": step_accuracy}
    
    def configure_optimizers(self):
        return optim.Adam(self.parameters(), lr=0.0001)

Global seed set to 42


## 參數設置

In [3]:
%load_ext tensorboard
%reload_ext tensorboard
%tensorboard --logdir ./lightning_logs --host 0.0.0.0 --port=8888

In [4]:
X_train = pd.read_csv('../data2.csv')

In [5]:
N_EPOCHS = 200
BATCH_SIZE = 50

# randomed 
ORG_FEATURE_COLUMNS = ['FZ', 'CZ', 'O2', 'T4', 'FT7', 'CPZ', 'F8', 'TP8', 'VEOU', 'P4', 'VEOL', 'C4', 'PO1', 'TP7', 'C3', 'FT10', 'O1', 'T6', 'T3', 'OZ', 'FP1', 'P3', 'F7', 'FT8', 'A2', 'FP2', 'FT9', 'HEOL', 'T5', 'FC4', 'FC3', 'HEOR', 'PZ', 'PO2', 'F4', 'F3', 'CP4', 'CP3', 'FCZ', 'A1']
g = X_train.groupby("group")

In [6]:
# import random
# ORG_FEATURE_COLUMNS = ['TP8', 'FP2', 'FCZ', 'FT10', 'O2', 'O1', 'FT7', 'F4', 'TP7', 'C3', 'C4', 'F3', 'FT8', 'T6', 'HEOR', 'T5', 'VEOL', 'F7', 'FZ', 'VEOU', 'A1', 'P3', 'PZ', 'CP3', 'P4', 'CPZ', 'A2', 'HEOL', 'CP4', 'FT9', 'F8', 'OZ', 'CZ', 'FC4', 'FC3', 'FP1', 'PO1', 'T3', 'T4', 'PO2']
# random.shuffle(ORG_FEATURE_COLUMNS)
# print(ORG_FEATURE_COLUMNS)

## Main

In [7]:
n = 20
FEATURE_COLUMNS = ORG_FEATURE_COLUMNS[n:]

while(n < 21):
    print("------------------ Round: " + str(n) + " ------------------")
    print(FEATURE_COLUMNS)
    print("Len:", len(FEATURE_COLUMNS))
    
    label_encoder = LabelEncoder()
    encoded_labels = label_encoder.fit_transform(X_train.state)

    label_encoder.classes_

    X_train['label'] = encoded_labels

    # Prepare data ###########################################
    sequences = [] 

    for name, group in g:
        sequence_features = group[FEATURE_COLUMNS]
        label = group.label.iloc[0]

        # print((sequence_features, label))
        sequences.append((sequence_features, label))

    # Setting up train, test, val gruop #######################
    train_sequences, test_sequences = train_test_split(sequences, test_size=0.2)
    val_sequences, test_sequences = train_test_split(test_sequences, test_size=0.5)


    # Setting up data module ##################################
    data_module = DrowsyDataModule(
        train_sequences, val_sequences, test_sequences, BATCH_SIZE
    )

    model = DrowsyPredictor(
        n_features=len(FEATURE_COLUMNS), 
        n_classes=len(label_encoder.classes_)
    )

    # Check points and logger #################################
    checkpoint_callback = ModelCheckpoint(
        dirpath="checkpoints",
        filename="best-checkpoint-corr-random-" + str(n),
        save_top_k=2,
        verbose=True,
        monitor="val_loss",
        mode="min"
    )

    logger = TensorBoardLogger("lightning_logs", name="Drowsy_Random" + str(n))

    trainer = pl.Trainer(
        logger=logger,
        callbacks=[checkpoint_callback],
        max_epochs=N_EPOCHS,
        gpus=[2],
        # gpus=2, 
        # auto_select_gpus=True,
        auto_lr_find=True, 
        # check_val_every_n_epoch=10
        # refresh_rate=20,
    )
    
    # Training start
    if (n == 20):
        trainer.fit(model, data_module, ckpt_path="/Workspace/code/checkpoints/best-checkpoint-corr-random-20-v6.ckpt")
    else:
        trainer.fit(model, data_module)
    trainer.test(model, data_module)
    
    FEATURE_COLUMNS.pop(0)
    n = n + 1

------------------ Round: 20 ------------------
['FP1', 'P3', 'F7', 'FT8', 'A2', 'FP2', 'FT9', 'HEOL', 'T5', 'FC4', 'FC3', 'HEOR', 'PZ', 'PO2', 'F4', 'F3', 'CP4', 'CP3', 'FCZ', 'A1']
Len: 20


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
Restoring states from the checkpoint path at /Workspace/code/checkpoints/best-checkpoint-corr-random-20-v6.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2]
  "You're resuming from a checkpoint that ended mid-epoch."
Restored all states from the checkpoint file at /Workspace/code/checkpoints/best-checkpoint-corr-random-20-v6.ckpt

  | Name      | Type             | Params
-----------------------------------------------
0 | model     | DrowsyModel      | 1.3 M 
1 | criterion | CrossEntropyLoss | 0     
-----------------------------------------------
1.3 M     Trainable params
0         Non-trainable params
1.3 M     Total params
5.351     Total estimated model params size (MB)
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")


                                                                      

Global seed set to 42


Epoch 85:  89%|████████▊ | 226/255 [01:22<00:10,  2.75it/s, loss=0.13, v_num=1, train_loss=0.0412, train_accuracy=1.000] 
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/29 [00:00<?, ?it/s][A
Epoch 85:  89%|████████▉ | 228/255 [01:24<00:09,  2.71it/s, loss=0.13, v_num=1, train_loss=0.0412, train_accuracy=1.000]
Epoch 85:  90%|█████████ | 230/255 [01:24<00:09,  2.72it/s, loss=0.13, v_num=1, train_loss=0.0412, train_accuracy=1.000]
Epoch 85:  91%|█████████ | 232/255 [01:24<00:08,  2.74it/s, loss=0.13, v_num=1, train_loss=0.0412, train_accuracy=1.000]
Epoch 85:  92%|█████████▏| 234/255 [01:24<00:07,  2.76it/s, loss=0.13, v_num=1, train_loss=0.0412, train_accuracy=1.000]
Epoch 85:  93%|█████████▎| 236/255 [01:24<00:06,  2.78it/s, loss=0.13, v_num=1, train_loss=0.0412, train_accuracy=1.000]
Epoch 85:  93%|█████████▎| 238/255 [01:25<00:06,  2.79it/s, loss=0.13, v_num=1, train_loss=0.0412, train_accuracy=1.000]
Epoch 85:  94%|█████████▍| 240/255 [01:25<00:05,  2.81it/s, loss

Epoch 85, global step 19435: val_loss was not in top 2


Epoch 86:  89%|████████▊ | 226/255 [01:20<00:10,  2.80it/s, loss=0.132, v_num=1, train_loss=0.109, train_accuracy=0.944, val_loss=0.237, val_accuracy=0.908] 
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/29 [00:00<?, ?it/s][A
Epoch 86:  89%|████████▉ | 228/255 [01:22<00:09,  2.76it/s, loss=0.132, v_num=1, train_loss=0.109, train_accuracy=0.944, val_loss=0.237, val_accuracy=0.908]
Epoch 86:  90%|█████████ | 230/255 [01:22<00:09,  2.77it/s, loss=0.132, v_num=1, train_loss=0.109, train_accuracy=0.944, val_loss=0.237, val_accuracy=0.908]
Epoch 86:  91%|█████████ | 232/255 [01:23<00:08,  2.79it/s, loss=0.132, v_num=1, train_loss=0.109, train_accuracy=0.944, val_loss=0.237, val_accuracy=0.908]
Epoch 86:  92%|█████████▏| 234/255 [01:23<00:07,  2.81it/s, loss=0.132, v_num=1, train_loss=0.109, train_accuracy=0.944, val_loss=0.237, val_accuracy=0.908]
Epoch 86:  93%|█████████▎| 236/255 [01:23<00:06,  2.83it/s, loss=0.132, v_num=1, train_loss=0.109, train_accuracy=0.944, val_l

Epoch 86, global step 19661: val_loss reached 0.20019 (best 0.19623), saving model to "/Workspace/code/checkpoints/best-checkpoint-corr-random-20-v6.ckpt" as top 2


Epoch 87:  89%|████████▊ | 226/255 [01:21<00:10,  2.76it/s, loss=0.118, v_num=1, train_loss=0.0978, train_accuracy=0.944, val_loss=0.200, val_accuracy=0.928]
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/29 [00:00<?, ?it/s][A
Epoch 87:  89%|████████▉ | 228/255 [01:23<00:09,  2.72it/s, loss=0.118, v_num=1, train_loss=0.0978, train_accuracy=0.944, val_loss=0.200, val_accuracy=0.928]
Epoch 87:  90%|█████████ | 230/255 [01:24<00:09,  2.73it/s, loss=0.118, v_num=1, train_loss=0.0978, train_accuracy=0.944, val_loss=0.200, val_accuracy=0.928]
Epoch 87:  91%|█████████ | 232/255 [01:24<00:08,  2.75it/s, loss=0.118, v_num=1, train_loss=0.0978, train_accuracy=0.944, val_loss=0.200, val_accuracy=0.928]
Epoch 87:  92%|█████████▏| 234/255 [01:24<00:07,  2.77it/s, loss=0.118, v_num=1, train_loss=0.0978, train_accuracy=0.944, val_loss=0.200, val_accuracy=0.928]
Epoch 87:  93%|█████████▎| 236/255 [01:24<00:06,  2.79it/s, loss=0.118, v_num=1, train_loss=0.0978, train_accuracy=0.944, 

Epoch 87, global step 19887: val_loss reached 0.19600 (best 0.19600), saving model to "/Workspace/code/checkpoints/best-checkpoint-corr-random-20-v6.ckpt" as top 2


Epoch 88:  89%|████████▊ | 226/255 [01:21<00:10,  2.77it/s, loss=0.117, v_num=1, train_loss=0.278, train_accuracy=0.833, val_loss=0.196, val_accuracy=0.930] 
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/29 [00:00<?, ?it/s][A
Epoch 88:  89%|████████▉ | 228/255 [01:23<00:09,  2.72it/s, loss=0.117, v_num=1, train_loss=0.278, train_accuracy=0.833, val_loss=0.196, val_accuracy=0.930]
Epoch 88:  90%|█████████ | 230/255 [01:24<00:09,  2.74it/s, loss=0.117, v_num=1, train_loss=0.278, train_accuracy=0.833, val_loss=0.196, val_accuracy=0.930]
Epoch 88:  91%|█████████ | 232/255 [01:24<00:08,  2.75it/s, loss=0.117, v_num=1, train_loss=0.278, train_accuracy=0.833, val_loss=0.196, val_accuracy=0.930]
Epoch 88:  92%|█████████▏| 234/255 [01:24<00:07,  2.77it/s, loss=0.117, v_num=1, train_loss=0.278, train_accuracy=0.833, val_loss=0.196, val_accuracy=0.930]
Epoch 88:  93%|█████████▎| 236/255 [01:24<00:06,  2.79it/s, loss=0.117, v_num=1, train_loss=0.278, train_accuracy=0.833, val_l

Epoch 88, global step 20113: val_loss reached 0.19561 (best 0.19561), saving model to "/Workspace/code/checkpoints/best-checkpoint-corr-random-20-v5.ckpt" as top 2


Epoch 89:  89%|████████▊ | 226/255 [01:21<00:10,  2.76it/s, loss=0.181, v_num=1, train_loss=0.367, train_accuracy=0.889, val_loss=0.196, val_accuracy=0.928] 
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/29 [00:00<?, ?it/s][A
Epoch 89:  89%|████████▉ | 228/255 [01:24<00:09,  2.71it/s, loss=0.181, v_num=1, train_loss=0.367, train_accuracy=0.889, val_loss=0.196, val_accuracy=0.928]
Epoch 89:  90%|█████████ | 230/255 [01:24<00:09,  2.73it/s, loss=0.181, v_num=1, train_loss=0.367, train_accuracy=0.889, val_loss=0.196, val_accuracy=0.928]
Epoch 89:  91%|█████████ | 232/255 [01:24<00:08,  2.75it/s, loss=0.181, v_num=1, train_loss=0.367, train_accuracy=0.889, val_loss=0.196, val_accuracy=0.928]
Epoch 89:  92%|█████████▏| 234/255 [01:24<00:07,  2.77it/s, loss=0.181, v_num=1, train_loss=0.367, train_accuracy=0.889, val_loss=0.196, val_accuracy=0.928]
Epoch 89:  93%|█████████▎| 236/255 [01:24<00:06,  2.78it/s, loss=0.181, v_num=1, train_loss=0.367, train_accuracy=0.889, val_l

Epoch 89, global step 20339: val_loss reached 0.19512 (best 0.19512), saving model to "/Workspace/code/checkpoints/best-checkpoint-corr-random-20-v6.ckpt" as top 2


Epoch 90:  89%|████████▊ | 226/255 [01:21<00:10,  2.76it/s, loss=0.126, v_num=1, train_loss=0.110, train_accuracy=0.889, val_loss=0.195, val_accuracy=0.928]  
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/29 [00:00<?, ?it/s][A
Epoch 90:  89%|████████▉ | 228/255 [01:24<00:09,  2.71it/s, loss=0.126, v_num=1, train_loss=0.110, train_accuracy=0.889, val_loss=0.195, val_accuracy=0.928]
Epoch 90:  90%|█████████ | 230/255 [01:24<00:09,  2.73it/s, loss=0.126, v_num=1, train_loss=0.110, train_accuracy=0.889, val_loss=0.195, val_accuracy=0.928]
Epoch 90:  91%|█████████ | 232/255 [01:24<00:08,  2.74it/s, loss=0.126, v_num=1, train_loss=0.110, train_accuracy=0.889, val_loss=0.195, val_accuracy=0.928]
Epoch 90:  92%|█████████▏| 234/255 [01:24<00:07,  2.76it/s, loss=0.126, v_num=1, train_loss=0.110, train_accuracy=0.889, val_loss=0.195, val_accuracy=0.928]
Epoch 90:  93%|█████████▎| 236/255 [01:24<00:06,  2.78it/s, loss=0.126, v_num=1, train_loss=0.110, train_accuracy=0.889, val_

Epoch 90, global step 20565: val_loss was not in top 2


Epoch 91:  89%|████████▊ | 226/255 [01:22<00:10,  2.75it/s, loss=0.172, v_num=1, train_loss=0.142, train_accuracy=0.944, val_loss=0.202, val_accuracy=0.929] 
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/29 [00:00<?, ?it/s][A
Epoch 91:  89%|████████▉ | 228/255 [01:24<00:09,  2.70it/s, loss=0.172, v_num=1, train_loss=0.142, train_accuracy=0.944, val_loss=0.202, val_accuracy=0.929]
Epoch 91:  90%|█████████ | 230/255 [01:24<00:09,  2.72it/s, loss=0.172, v_num=1, train_loss=0.142, train_accuracy=0.944, val_loss=0.202, val_accuracy=0.929]
Epoch 91:  91%|█████████ | 232/255 [01:24<00:08,  2.74it/s, loss=0.172, v_num=1, train_loss=0.142, train_accuracy=0.944, val_loss=0.202, val_accuracy=0.929]
Epoch 91:  92%|█████████▏| 234/255 [01:24<00:07,  2.76it/s, loss=0.172, v_num=1, train_loss=0.142, train_accuracy=0.944, val_loss=0.202, val_accuracy=0.929]
Epoch 91:  93%|█████████▎| 236/255 [01:25<00:06,  2.77it/s, loss=0.172, v_num=1, train_loss=0.142, train_accuracy=0.944, val_l

Epoch 91, global step 20791: val_loss was not in top 2


Epoch 92:  89%|████████▊ | 226/255 [01:22<00:10,  2.75it/s, loss=0.111, v_num=1, train_loss=0.0529, train_accuracy=0.944, val_loss=0.205, val_accuracy=0.923]
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/29 [00:00<?, ?it/s][A
Epoch 92:  89%|████████▉ | 228/255 [01:24<00:09,  2.70it/s, loss=0.111, v_num=1, train_loss=0.0529, train_accuracy=0.944, val_loss=0.205, val_accuracy=0.923]
Epoch 92:  90%|█████████ | 230/255 [01:24<00:09,  2.72it/s, loss=0.111, v_num=1, train_loss=0.0529, train_accuracy=0.944, val_loss=0.205, val_accuracy=0.923]
Epoch 92:  91%|█████████ | 232/255 [01:24<00:08,  2.74it/s, loss=0.111, v_num=1, train_loss=0.0529, train_accuracy=0.944, val_loss=0.205, val_accuracy=0.923]
Epoch 92:  92%|█████████▏| 234/255 [01:24<00:07,  2.76it/s, loss=0.111, v_num=1, train_loss=0.0529, train_accuracy=0.944, val_loss=0.205, val_accuracy=0.923]
Epoch 92:  93%|█████████▎| 236/255 [01:25<00:06,  2.77it/s, loss=0.111, v_num=1, train_loss=0.0529, train_accuracy=0.944, 

Epoch 92, global step 21017: val_loss reached 0.18965 (best 0.18965), saving model to "/Workspace/code/checkpoints/best-checkpoint-corr-random-20-v5.ckpt" as top 2


Epoch 93:  89%|████████▊ | 226/255 [01:22<00:10,  2.75it/s, loss=0.12, v_num=1, train_loss=0.0854, train_accuracy=0.944, val_loss=0.190, val_accuracy=0.936] 
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/29 [00:00<?, ?it/s][A
Epoch 93:  89%|████████▉ | 228/255 [01:24<00:09,  2.70it/s, loss=0.12, v_num=1, train_loss=0.0854, train_accuracy=0.944, val_loss=0.190, val_accuracy=0.936]
Epoch 93:  90%|█████████ | 230/255 [01:24<00:09,  2.72it/s, loss=0.12, v_num=1, train_loss=0.0854, train_accuracy=0.944, val_loss=0.190, val_accuracy=0.936]
Epoch 93:  91%|█████████ | 232/255 [01:24<00:08,  2.74it/s, loss=0.12, v_num=1, train_loss=0.0854, train_accuracy=0.944, val_loss=0.190, val_accuracy=0.936]
Epoch 93:  92%|█████████▏| 234/255 [01:24<00:07,  2.76it/s, loss=0.12, v_num=1, train_loss=0.0854, train_accuracy=0.944, val_loss=0.190, val_accuracy=0.936]
Epoch 93:  93%|█████████▎| 236/255 [01:25<00:06,  2.77it/s, loss=0.12, v_num=1, train_loss=0.0854, train_accuracy=0.944, val_l

Epoch 93, global step 21243: val_loss was not in top 2


Epoch 94:  89%|████████▊ | 226/255 [01:22<00:10,  2.75it/s, loss=0.132, v_num=1, train_loss=0.136, train_accuracy=0.944, val_loss=0.233, val_accuracy=0.912]  
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/29 [00:00<?, ?it/s][A
Epoch 94:  89%|████████▉ | 228/255 [01:24<00:09,  2.70it/s, loss=0.132, v_num=1, train_loss=0.136, train_accuracy=0.944, val_loss=0.233, val_accuracy=0.912]
Epoch 94:  90%|█████████ | 230/255 [01:24<00:09,  2.72it/s, loss=0.132, v_num=1, train_loss=0.136, train_accuracy=0.944, val_loss=0.233, val_accuracy=0.912]
Epoch 94:  91%|█████████ | 232/255 [01:24<00:08,  2.74it/s, loss=0.132, v_num=1, train_loss=0.136, train_accuracy=0.944, val_loss=0.233, val_accuracy=0.912]
Epoch 94:  92%|█████████▏| 234/255 [01:24<00:07,  2.76it/s, loss=0.132, v_num=1, train_loss=0.136, train_accuracy=0.944, val_loss=0.233, val_accuracy=0.912]
Epoch 94:  93%|█████████▎| 236/255 [01:24<00:06,  2.78it/s, loss=0.132, v_num=1, train_loss=0.136, train_accuracy=0.944, val_

Epoch 94, global step 21469: val_loss was not in top 2


Epoch 95:  89%|████████▊ | 226/255 [01:22<00:10,  2.75it/s, loss=0.139, v_num=1, train_loss=0.141, train_accuracy=0.944, val_loss=0.195, val_accuracy=0.935] 
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/29 [00:00<?, ?it/s][A
Epoch 95:  89%|████████▉ | 228/255 [01:24<00:10,  2.70it/s, loss=0.139, v_num=1, train_loss=0.141, train_accuracy=0.944, val_loss=0.195, val_accuracy=0.935]
Epoch 95:  90%|█████████ | 230/255 [01:24<00:09,  2.72it/s, loss=0.139, v_num=1, train_loss=0.141, train_accuracy=0.944, val_loss=0.195, val_accuracy=0.935]
Epoch 95:  91%|█████████ | 232/255 [01:24<00:08,  2.74it/s, loss=0.139, v_num=1, train_loss=0.141, train_accuracy=0.944, val_loss=0.195, val_accuracy=0.935]
Validating:  21%|██        | 6/29 [00:02<00:06,  3.59it/s][A
Epoch 95:  92%|█████████▏| 234/255 [01:25<00:07,  2.75it/s, loss=0.139, v_num=1, train_loss=0.141, train_accuracy=0.944, val_loss=0.195, val_accuracy=0.935]
Epoch 95:  93%|█████████▎| 236/255 [01:25<00:06,  2.77it/s, loss

Epoch 95, global step 21695: val_loss was not in top 2


Epoch 96:  89%|████████▊ | 226/255 [01:22<00:10,  2.75it/s, loss=0.136, v_num=1, train_loss=0.402, train_accuracy=0.833, val_loss=0.274, val_accuracy=0.898] 
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/29 [00:00<?, ?it/s][A
Epoch 96:  89%|████████▉ | 228/255 [01:24<00:09,  2.70it/s, loss=0.136, v_num=1, train_loss=0.402, train_accuracy=0.833, val_loss=0.274, val_accuracy=0.898]
Epoch 96:  90%|█████████ | 230/255 [01:24<00:09,  2.72it/s, loss=0.136, v_num=1, train_loss=0.402, train_accuracy=0.833, val_loss=0.274, val_accuracy=0.898]
Epoch 96:  91%|█████████ | 232/255 [01:24<00:08,  2.74it/s, loss=0.136, v_num=1, train_loss=0.402, train_accuracy=0.833, val_loss=0.274, val_accuracy=0.898]
Epoch 96:  92%|█████████▏| 234/255 [01:24<00:07,  2.76it/s, loss=0.136, v_num=1, train_loss=0.402, train_accuracy=0.833, val_loss=0.274, val_accuracy=0.898]
Epoch 96:  93%|█████████▎| 236/255 [01:25<00:06,  2.78it/s, loss=0.136, v_num=1, train_loss=0.402, train_accuracy=0.833, val_l

Epoch 96, global step 21921: val_loss was not in top 2


Epoch 97:  89%|████████▊ | 226/255 [01:22<00:10,  2.75it/s, loss=0.136, v_num=1, train_loss=0.145, train_accuracy=0.944, val_loss=0.279, val_accuracy=0.908] 
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/29 [00:00<?, ?it/s][A
Epoch 97:  89%|████████▉ | 228/255 [01:24<00:09,  2.70it/s, loss=0.136, v_num=1, train_loss=0.145, train_accuracy=0.944, val_loss=0.279, val_accuracy=0.908]
Epoch 97:  90%|█████████ | 230/255 [01:24<00:09,  2.72it/s, loss=0.136, v_num=1, train_loss=0.145, train_accuracy=0.944, val_loss=0.279, val_accuracy=0.908]
Epoch 97:  91%|█████████ | 232/255 [01:24<00:08,  2.74it/s, loss=0.136, v_num=1, train_loss=0.145, train_accuracy=0.944, val_loss=0.279, val_accuracy=0.908]
Epoch 97:  92%|█████████▏| 234/255 [01:24<00:07,  2.76it/s, loss=0.136, v_num=1, train_loss=0.145, train_accuracy=0.944, val_loss=0.279, val_accuracy=0.908]
Epoch 97:  93%|█████████▎| 236/255 [01:25<00:06,  2.77it/s, loss=0.136, v_num=1, train_loss=0.145, train_accuracy=0.944, val_l

Epoch 97, global step 22147: val_loss reached 0.19110 (best 0.18965), saving model to "/Workspace/code/checkpoints/best-checkpoint-corr-random-20-v6.ckpt" as top 2


Epoch 98:  89%|████████▊ | 226/255 [01:22<00:10,  2.75it/s, loss=0.144, v_num=1, train_loss=0.200, train_accuracy=0.944, val_loss=0.191, val_accuracy=0.932] 
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/29 [00:00<?, ?it/s][A
Epoch 98:  89%|████████▉ | 228/255 [01:24<00:09,  2.71it/s, loss=0.144, v_num=1, train_loss=0.200, train_accuracy=0.944, val_loss=0.191, val_accuracy=0.932]
Epoch 98:  90%|█████████ | 230/255 [01:24<00:09,  2.73it/s, loss=0.144, v_num=1, train_loss=0.200, train_accuracy=0.944, val_loss=0.191, val_accuracy=0.932]
Epoch 98:  91%|█████████ | 232/255 [01:24<00:08,  2.74it/s, loss=0.144, v_num=1, train_loss=0.200, train_accuracy=0.944, val_loss=0.191, val_accuracy=0.932]
Epoch 98:  92%|█████████▏| 234/255 [01:24<00:07,  2.76it/s, loss=0.144, v_num=1, train_loss=0.200, train_accuracy=0.944, val_loss=0.191, val_accuracy=0.932]
Epoch 98:  93%|█████████▎| 236/255 [01:24<00:06,  2.78it/s, loss=0.144, v_num=1, train_loss=0.200, train_accuracy=0.944, val_l

Epoch 98, global step 22373: val_loss was not in top 2


Epoch 99:  89%|████████▊ | 226/255 [01:22<00:10,  2.75it/s, loss=0.107, v_num=1, train_loss=0.236, train_accuracy=0.889, val_loss=0.212, val_accuracy=0.925]  
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/29 [00:00<?, ?it/s][A
Epoch 99:  89%|████████▉ | 228/255 [01:24<00:10,  2.70it/s, loss=0.107, v_num=1, train_loss=0.236, train_accuracy=0.889, val_loss=0.212, val_accuracy=0.925]
Epoch 99:  90%|█████████ | 230/255 [01:24<00:09,  2.72it/s, loss=0.107, v_num=1, train_loss=0.236, train_accuracy=0.889, val_loss=0.212, val_accuracy=0.925]
Validating:  14%|█▍        | 4/29 [00:02<00:10,  2.45it/s][A
Epoch 99:  91%|█████████ | 232/255 [01:24<00:08,  2.73it/s, loss=0.107, v_num=1, train_loss=0.236, train_accuracy=0.889, val_loss=0.212, val_accuracy=0.925]
Epoch 99:  92%|█████████▏| 234/255 [01:25<00:07,  2.75it/s, loss=0.107, v_num=1, train_loss=0.236, train_accuracy=0.889, val_loss=0.212, val_accuracy=0.925]
Epoch 99:  93%|█████████▎| 236/255 [01:25<00:06,  2.77it/s, los

Epoch 99, global step 22599: val_loss reached 0.18601 (best 0.18601), saving model to "/Workspace/code/checkpoints/best-checkpoint-corr-random-20-v6.ckpt" as top 2


Epoch 100:  89%|████████▊ | 226/255 [01:22<00:10,  2.75it/s, loss=0.112, v_num=1, train_loss=0.105, train_accuracy=0.944, val_loss=0.186, val_accuracy=0.935]  
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/29 [00:00<?, ?it/s][A
Epoch 100:  89%|████████▉ | 228/255 [01:24<00:10,  2.70it/s, loss=0.112, v_num=1, train_loss=0.105, train_accuracy=0.944, val_loss=0.186, val_accuracy=0.935]
Epoch 100:  90%|█████████ | 230/255 [01:24<00:09,  2.72it/s, loss=0.112, v_num=1, train_loss=0.105, train_accuracy=0.944, val_loss=0.186, val_accuracy=0.935]
Epoch 100:  91%|█████████ | 232/255 [01:24<00:08,  2.73it/s, loss=0.112, v_num=1, train_loss=0.105, train_accuracy=0.944, val_loss=0.186, val_accuracy=0.935]
Epoch 100:  92%|█████████▏| 234/255 [01:25<00:07,  2.75it/s, loss=0.112, v_num=1, train_loss=0.105, train_accuracy=0.944, val_loss=0.186, val_accuracy=0.935]
Epoch 100:  93%|█████████▎| 236/255 [01:25<00:06,  2.77it/s, loss=0.112, v_num=1, train_loss=0.105, train_accuracy=0.944

Epoch 100, global step 22825: val_loss was not in top 2


Epoch 101:  89%|████████▊ | 226/255 [01:22<00:10,  2.76it/s, loss=0.108, v_num=1, train_loss=0.0607, train_accuracy=1.000, val_loss=0.192, val_accuracy=0.938] 
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/29 [00:00<?, ?it/s][A
Epoch 101:  89%|████████▉ | 228/255 [01:24<00:09,  2.71it/s, loss=0.108, v_num=1, train_loss=0.0607, train_accuracy=1.000, val_loss=0.192, val_accuracy=0.938]
Epoch 101:  90%|█████████ | 230/255 [01:24<00:09,  2.73it/s, loss=0.108, v_num=1, train_loss=0.0607, train_accuracy=1.000, val_loss=0.192, val_accuracy=0.938]
Epoch 101:  91%|█████████ | 232/255 [01:24<00:08,  2.75it/s, loss=0.108, v_num=1, train_loss=0.0607, train_accuracy=1.000, val_loss=0.192, val_accuracy=0.938]
Epoch 101:  92%|█████████▏| 234/255 [01:24<00:07,  2.76it/s, loss=0.108, v_num=1, train_loss=0.0607, train_accuracy=1.000, val_loss=0.192, val_accuracy=0.938]
Epoch 101:  93%|█████████▎| 236/255 [01:24<00:06,  2.78it/s, loss=0.108, v_num=1, train_loss=0.0607, train_accuracy=

Epoch 101, global step 23051: val_loss was not in top 2


Epoch 102:  89%|████████▊ | 226/255 [01:22<00:10,  2.76it/s, loss=0.109, v_num=1, train_loss=0.202, train_accuracy=0.889, val_loss=0.195, val_accuracy=0.929]  
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/29 [00:00<?, ?it/s][A
Epoch 102:  89%|████████▉ | 228/255 [01:24<00:09,  2.71it/s, loss=0.109, v_num=1, train_loss=0.202, train_accuracy=0.889, val_loss=0.195, val_accuracy=0.929]
Validating:   7%|▋         | 2/29 [00:02<00:25,  1.06it/s][A
Epoch 102:  90%|█████████ | 230/255 [01:24<00:09,  2.73it/s, loss=0.109, v_num=1, train_loss=0.202, train_accuracy=0.889, val_loss=0.195, val_accuracy=0.929]
Epoch 102:  91%|█████████ | 232/255 [01:24<00:08,  2.74it/s, loss=0.109, v_num=1, train_loss=0.202, train_accuracy=0.889, val_loss=0.195, val_accuracy=0.929]
Epoch 102:  92%|█████████▏| 234/255 [01:24<00:07,  2.76it/s, loss=0.109, v_num=1, train_loss=0.202, train_accuracy=0.889, val_loss=0.195, val_accuracy=0.929]
Epoch 102:  93%|█████████▎| 236/255 [01:24<00:06,  2.78it/

Epoch 102, global step 23277: val_loss was not in top 2


Epoch 103:  89%|████████▊ | 226/255 [01:22<00:10,  2.74it/s, loss=0.11, v_num=1, train_loss=0.222, train_accuracy=0.944, val_loss=0.206, val_accuracy=0.931]   
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/29 [00:00<?, ?it/s][A
Epoch 103:  89%|████████▉ | 228/255 [01:24<00:10,  2.69it/s, loss=0.11, v_num=1, train_loss=0.222, train_accuracy=0.944, val_loss=0.206, val_accuracy=0.931]
Epoch 103:  90%|█████████ | 230/255 [01:24<00:09,  2.71it/s, loss=0.11, v_num=1, train_loss=0.222, train_accuracy=0.944, val_loss=0.206, val_accuracy=0.931]
Epoch 103:  91%|█████████ | 232/255 [01:24<00:08,  2.73it/s, loss=0.11, v_num=1, train_loss=0.222, train_accuracy=0.944, val_loss=0.206, val_accuracy=0.931]
Epoch 103:  92%|█████████▏| 234/255 [01:25<00:07,  2.75it/s, loss=0.11, v_num=1, train_loss=0.222, train_accuracy=0.944, val_loss=0.206, val_accuracy=0.931]
Epoch 103:  93%|█████████▎| 236/255 [01:25<00:06,  2.77it/s, loss=0.11, v_num=1, train_loss=0.222, train_accuracy=0.944, val

Epoch 103, global step 23503: val_loss reached 0.16747 (best 0.16747), saving model to "/Workspace/code/checkpoints/best-checkpoint-corr-random-20-v5.ckpt" as top 2


Epoch 104:  89%|████████▊ | 226/255 [01:22<00:10,  2.75it/s, loss=0.105, v_num=1, train_loss=0.0277, train_accuracy=1.000, val_loss=0.167, val_accuracy=0.945] 
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/29 [00:00<?, ?it/s][A
Epoch 104:  89%|████████▉ | 228/255 [01:24<00:09,  2.70it/s, loss=0.105, v_num=1, train_loss=0.0277, train_accuracy=1.000, val_loss=0.167, val_accuracy=0.945]
Epoch 104:  90%|█████████ | 230/255 [01:24<00:09,  2.72it/s, loss=0.105, v_num=1, train_loss=0.0277, train_accuracy=1.000, val_loss=0.167, val_accuracy=0.945]
Epoch 104:  91%|█████████ | 232/255 [01:24<00:08,  2.74it/s, loss=0.105, v_num=1, train_loss=0.0277, train_accuracy=1.000, val_loss=0.167, val_accuracy=0.945]
Epoch 104:  92%|█████████▏| 234/255 [01:24<00:07,  2.75it/s, loss=0.105, v_num=1, train_loss=0.0277, train_accuracy=1.000, val_loss=0.167, val_accuracy=0.945]
Epoch 104:  93%|█████████▎| 236/255 [01:25<00:06,  2.77it/s, loss=0.105, v_num=1, train_loss=0.0277, train_accuracy=

Epoch 104, global step 23729: val_loss was not in top 2


Epoch 105:  89%|████████▊ | 226/255 [01:21<00:10,  2.76it/s, loss=0.135, v_num=1, train_loss=0.207, train_accuracy=0.944, val_loss=0.195, val_accuracy=0.935] 
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/29 [00:00<?, ?it/s][A
Epoch 105:  89%|████████▉ | 228/255 [01:24<00:09,  2.71it/s, loss=0.135, v_num=1, train_loss=0.207, train_accuracy=0.944, val_loss=0.195, val_accuracy=0.935]
Epoch 105:  90%|█████████ | 230/255 [01:24<00:09,  2.73it/s, loss=0.135, v_num=1, train_loss=0.207, train_accuracy=0.944, val_loss=0.195, val_accuracy=0.935]
Epoch 105:  91%|█████████ | 232/255 [01:24<00:08,  2.74it/s, loss=0.135, v_num=1, train_loss=0.207, train_accuracy=0.944, val_loss=0.195, val_accuracy=0.935]
Epoch 105:  92%|█████████▏| 234/255 [01:24<00:07,  2.76it/s, loss=0.135, v_num=1, train_loss=0.207, train_accuracy=0.944, val_loss=0.195, val_accuracy=0.935]
Epoch 105:  93%|█████████▎| 236/255 [01:24<00:06,  2.78it/s, loss=0.135, v_num=1, train_loss=0.207, train_accuracy=0.944,

Epoch 105, global step 23955: val_loss was not in top 2


Epoch 106:  89%|████████▊ | 226/255 [01:22<00:10,  2.75it/s, loss=0.129, v_num=1, train_loss=0.194, train_accuracy=0.944, val_loss=0.263, val_accuracy=0.917]  
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/29 [00:00<?, ?it/s][A
Epoch 106:  89%|████████▉ | 228/255 [01:24<00:09,  2.70it/s, loss=0.129, v_num=1, train_loss=0.194, train_accuracy=0.944, val_loss=0.263, val_accuracy=0.917]
Epoch 106:  90%|█████████ | 230/255 [01:24<00:09,  2.72it/s, loss=0.129, v_num=1, train_loss=0.194, train_accuracy=0.944, val_loss=0.263, val_accuracy=0.917]
Epoch 106:  91%|█████████ | 232/255 [01:24<00:08,  2.74it/s, loss=0.129, v_num=1, train_loss=0.194, train_accuracy=0.944, val_loss=0.263, val_accuracy=0.917]
Epoch 106:  92%|█████████▏| 234/255 [01:24<00:07,  2.76it/s, loss=0.129, v_num=1, train_loss=0.194, train_accuracy=0.944, val_loss=0.263, val_accuracy=0.917]
Epoch 106:  93%|█████████▎| 236/255 [01:25<00:06,  2.77it/s, loss=0.129, v_num=1, train_loss=0.194, train_accuracy=0.944

Epoch 106, global step 24181: val_loss was not in top 2


Epoch 107:  89%|████████▊ | 226/255 [01:22<00:10,  2.75it/s, loss=0.106, v_num=1, train_loss=0.233, train_accuracy=0.944, val_loss=0.235, val_accuracy=0.901]  
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/29 [00:00<?, ?it/s][A
Epoch 107:  89%|████████▉ | 228/255 [01:24<00:09,  2.71it/s, loss=0.106, v_num=1, train_loss=0.233, train_accuracy=0.944, val_loss=0.235, val_accuracy=0.901]
Epoch 107:  90%|█████████ | 230/255 [01:24<00:09,  2.72it/s, loss=0.106, v_num=1, train_loss=0.233, train_accuracy=0.944, val_loss=0.235, val_accuracy=0.901]
Epoch 107:  91%|█████████ | 232/255 [01:24<00:08,  2.74it/s, loss=0.106, v_num=1, train_loss=0.233, train_accuracy=0.944, val_loss=0.235, val_accuracy=0.901]
Epoch 107:  92%|█████████▏| 234/255 [01:24<00:07,  2.76it/s, loss=0.106, v_num=1, train_loss=0.233, train_accuracy=0.944, val_loss=0.235, val_accuracy=0.901]
Epoch 107:  93%|█████████▎| 236/255 [01:24<00:06,  2.78it/s, loss=0.106, v_num=1, train_loss=0.233, train_accuracy=0.944

Epoch 107, global step 24407: val_loss reached 0.17917 (best 0.16747), saving model to "/Workspace/code/checkpoints/best-checkpoint-corr-random-20-v6.ckpt" as top 2


Epoch 108:  89%|████████▊ | 226/255 [01:22<00:10,  2.74it/s, loss=0.115, v_num=1, train_loss=0.256, train_accuracy=0.889, val_loss=0.179, val_accuracy=0.939]  
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/29 [00:00<?, ?it/s][A
Epoch 108:  89%|████████▉ | 228/255 [01:24<00:10,  2.70it/s, loss=0.115, v_num=1, train_loss=0.256, train_accuracy=0.889, val_loss=0.179, val_accuracy=0.939]
Epoch 108:  90%|█████████ | 230/255 [01:24<00:09,  2.71it/s, loss=0.115, v_num=1, train_loss=0.256, train_accuracy=0.889, val_loss=0.179, val_accuracy=0.939]
Epoch 108:  91%|█████████ | 232/255 [01:24<00:08,  2.73it/s, loss=0.115, v_num=1, train_loss=0.256, train_accuracy=0.889, val_loss=0.179, val_accuracy=0.939]
Epoch 108:  92%|█████████▏| 234/255 [01:25<00:07,  2.75it/s, loss=0.115, v_num=1, train_loss=0.256, train_accuracy=0.889, val_loss=0.179, val_accuracy=0.939]
Epoch 108:  93%|█████████▎| 236/255 [01:25<00:06,  2.77it/s, loss=0.115, v_num=1, train_loss=0.256, train_accuracy=0.889

Epoch 108, global step 24633: val_loss was not in top 2


Epoch 109:  89%|████████▊ | 226/255 [01:22<00:10,  2.75it/s, loss=0.132, v_num=1, train_loss=0.145, train_accuracy=0.944, val_loss=0.232, val_accuracy=0.924] 
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/29 [00:00<?, ?it/s][A
Epoch 109:  89%|████████▉ | 228/255 [01:24<00:10,  2.70it/s, loss=0.132, v_num=1, train_loss=0.145, train_accuracy=0.944, val_loss=0.232, val_accuracy=0.924]
Epoch 109:  90%|█████████ | 230/255 [01:24<00:09,  2.72it/s, loss=0.132, v_num=1, train_loss=0.145, train_accuracy=0.944, val_loss=0.232, val_accuracy=0.924]
Epoch 109:  91%|█████████ | 232/255 [01:24<00:08,  2.74it/s, loss=0.132, v_num=1, train_loss=0.145, train_accuracy=0.944, val_loss=0.232, val_accuracy=0.924]
Epoch 109:  92%|█████████▏| 234/255 [01:24<00:07,  2.75it/s, loss=0.132, v_num=1, train_loss=0.145, train_accuracy=0.944, val_loss=0.232, val_accuracy=0.924]
Epoch 109:  93%|█████████▎| 236/255 [01:25<00:06,  2.77it/s, loss=0.132, v_num=1, train_loss=0.145, train_accuracy=0.944,

Epoch 109, global step 24859: val_loss was not in top 2


Epoch 110:  89%|████████▊ | 226/255 [01:22<00:10,  2.74it/s, loss=0.104, v_num=1, train_loss=0.121, train_accuracy=0.944, val_loss=0.214, val_accuracy=0.930]  
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/29 [00:00<?, ?it/s][A
Epoch 110:  89%|████████▉ | 228/255 [01:24<00:10,  2.70it/s, loss=0.104, v_num=1, train_loss=0.121, train_accuracy=0.944, val_loss=0.214, val_accuracy=0.930]
Epoch 110:  90%|█████████ | 230/255 [01:24<00:09,  2.72it/s, loss=0.104, v_num=1, train_loss=0.121, train_accuracy=0.944, val_loss=0.214, val_accuracy=0.930]
Epoch 110:  91%|█████████ | 232/255 [01:24<00:08,  2.73it/s, loss=0.104, v_num=1, train_loss=0.121, train_accuracy=0.944, val_loss=0.214, val_accuracy=0.930]
Epoch 110:  92%|█████████▏| 234/255 [01:25<00:07,  2.75it/s, loss=0.104, v_num=1, train_loss=0.121, train_accuracy=0.944, val_loss=0.214, val_accuracy=0.930]
Epoch 110:  93%|█████████▎| 236/255 [01:25<00:06,  2.77it/s, loss=0.104, v_num=1, train_loss=0.121, train_accuracy=0.944

Epoch 110, global step 25085: val_loss was not in top 2


Epoch 111:  89%|████████▊ | 226/255 [01:22<00:10,  2.74it/s, loss=0.0713, v_num=1, train_loss=0.0425, train_accuracy=1.000, val_loss=0.201, val_accuracy=0.931]
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/29 [00:00<?, ?it/s][A
Epoch 111:  89%|████████▉ | 228/255 [01:24<00:10,  2.69it/s, loss=0.0713, v_num=1, train_loss=0.0425, train_accuracy=1.000, val_loss=0.201, val_accuracy=0.931]
Validating:   7%|▋         | 2/29 [00:02<00:26,  1.01it/s][A
Epoch 111:  90%|█████████ | 230/255 [01:24<00:09,  2.71it/s, loss=0.0713, v_num=1, train_loss=0.0425, train_accuracy=1.000, val_loss=0.201, val_accuracy=0.931]
Epoch 111:  91%|█████████ | 232/255 [01:24<00:08,  2.73it/s, loss=0.0713, v_num=1, train_loss=0.0425, train_accuracy=1.000, val_loss=0.201, val_accuracy=0.931]
Epoch 111:  92%|█████████▏| 234/255 [01:25<00:07,  2.75it/s, loss=0.0713, v_num=1, train_loss=0.0425, train_accuracy=1.000, val_loss=0.201, val_accuracy=0.931]
Epoch 111:  93%|█████████▎| 236/255 [01:25<00:06, 

Epoch 111, global step 25311: val_loss was not in top 2


Epoch 112:  89%|████████▊ | 226/255 [01:22<00:10,  2.74it/s, loss=0.118, v_num=1, train_loss=0.199, train_accuracy=0.889, val_loss=0.192, val_accuracy=0.936]   
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/29 [00:00<?, ?it/s][A
Epoch 112:  89%|████████▉ | 228/255 [01:24<00:10,  2.69it/s, loss=0.118, v_num=1, train_loss=0.199, train_accuracy=0.889, val_loss=0.192, val_accuracy=0.936]
Epoch 112:  90%|█████████ | 230/255 [01:24<00:09,  2.71it/s, loss=0.118, v_num=1, train_loss=0.199, train_accuracy=0.889, val_loss=0.192, val_accuracy=0.936]
Epoch 112:  91%|█████████ | 232/255 [01:25<00:08,  2.73it/s, loss=0.118, v_num=1, train_loss=0.199, train_accuracy=0.889, val_loss=0.192, val_accuracy=0.936]
Epoch 112:  92%|█████████▏| 234/255 [01:25<00:07,  2.74it/s, loss=0.118, v_num=1, train_loss=0.199, train_accuracy=0.889, val_loss=0.192, val_accuracy=0.936]
Epoch 112:  93%|█████████▎| 236/255 [01:25<00:06,  2.76it/s, loss=0.118, v_num=1, train_loss=0.199, train_accuracy=0.88

Epoch 112, global step 25537: val_loss was not in top 2


Epoch 113:  89%|████████▊ | 226/255 [01:22<00:10,  2.74it/s, loss=0.0966, v_num=1, train_loss=0.0417, train_accuracy=1.000, val_loss=0.194, val_accuracy=0.936]
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/29 [00:00<?, ?it/s][A
Epoch 113:  89%|████████▉ | 228/255 [01:24<00:10,  2.70it/s, loss=0.0966, v_num=1, train_loss=0.0417, train_accuracy=1.000, val_loss=0.194, val_accuracy=0.936]
Epoch 113:  90%|█████████ | 230/255 [01:24<00:09,  2.71it/s, loss=0.0966, v_num=1, train_loss=0.0417, train_accuracy=1.000, val_loss=0.194, val_accuracy=0.936]
Validating:  14%|█▍        | 4/29 [00:02<00:11,  2.17it/s][A
Epoch 113:  91%|█████████ | 232/255 [01:25<00:08,  2.73it/s, loss=0.0966, v_num=1, train_loss=0.0417, train_accuracy=1.000, val_loss=0.194, val_accuracy=0.936]
Epoch 113:  92%|█████████▏| 234/255 [01:25<00:07,  2.75it/s, loss=0.0966, v_num=1, train_loss=0.0417, train_accuracy=1.000, val_loss=0.194, val_accuracy=0.936]
Epoch 113:  93%|█████████▎| 236/255 [01:25<00:06, 

Epoch 113, global step 25763: val_loss was not in top 2


Epoch 114:  89%|████████▊ | 226/255 [01:22<00:10,  2.72it/s, loss=0.113, v_num=1, train_loss=0.153, train_accuracy=0.944, val_loss=0.187, val_accuracy=0.938]  
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/29 [00:00<?, ?it/s][A
Epoch 114:  89%|████████▉ | 228/255 [01:25<00:10,  2.68it/s, loss=0.113, v_num=1, train_loss=0.153, train_accuracy=0.944, val_loss=0.187, val_accuracy=0.938]
Epoch 114:  90%|█████████ | 230/255 [01:25<00:09,  2.69it/s, loss=0.113, v_num=1, train_loss=0.153, train_accuracy=0.944, val_loss=0.187, val_accuracy=0.938]
Epoch 114:  91%|█████████ | 232/255 [01:25<00:08,  2.71it/s, loss=0.113, v_num=1, train_loss=0.153, train_accuracy=0.944, val_loss=0.187, val_accuracy=0.938]
Epoch 114:  92%|█████████▏| 234/255 [01:25<00:07,  2.73it/s, loss=0.113, v_num=1, train_loss=0.153, train_accuracy=0.944, val_loss=0.187, val_accuracy=0.938]
Epoch 114:  93%|█████████▎| 236/255 [01:25<00:06,  2.75it/s, loss=0.113, v_num=1, train_loss=0.153, train_accuracy=0.944

Epoch 114, global step 25989: val_loss was not in top 2


Epoch 115:  89%|████████▊ | 226/255 [01:22<00:10,  2.74it/s, loss=0.0843, v_num=1, train_loss=0.0236, train_accuracy=1.000, val_loss=0.211, val_accuracy=0.942]
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/29 [00:00<?, ?it/s][A
Epoch 115:  89%|████████▉ | 228/255 [01:24<00:10,  2.68it/s, loss=0.0843, v_num=1, train_loss=0.0236, train_accuracy=1.000, val_loss=0.211, val_accuracy=0.942]
Epoch 115:  90%|█████████ | 230/255 [01:25<00:09,  2.70it/s, loss=0.0843, v_num=1, train_loss=0.0236, train_accuracy=1.000, val_loss=0.211, val_accuracy=0.942]
Epoch 115:  91%|█████████ | 232/255 [01:25<00:08,  2.72it/s, loss=0.0843, v_num=1, train_loss=0.0236, train_accuracy=1.000, val_loss=0.211, val_accuracy=0.942]
Epoch 115:  92%|█████████▏| 234/255 [01:25<00:07,  2.74it/s, loss=0.0843, v_num=1, train_loss=0.0236, train_accuracy=1.000, val_loss=0.211, val_accuracy=0.942]
Epoch 115:  93%|█████████▎| 236/255 [01:25<00:06,  2.76it/s, loss=0.0843, v_num=1, train_loss=0.0236, train_accu

Epoch 115, global step 26215: val_loss was not in top 2


Epoch 116:  89%|████████▊ | 226/255 [01:22<00:10,  2.74it/s, loss=0.124, v_num=1, train_loss=0.0859, train_accuracy=0.944, val_loss=0.189, val_accuracy=0.943]  
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/29 [00:00<?, ?it/s][A
Epoch 116:  89%|████████▉ | 228/255 [01:24<00:10,  2.70it/s, loss=0.124, v_num=1, train_loss=0.0859, train_accuracy=0.944, val_loss=0.189, val_accuracy=0.943]
Epoch 116:  90%|█████████ | 230/255 [01:24<00:09,  2.71it/s, loss=0.124, v_num=1, train_loss=0.0859, train_accuracy=0.944, val_loss=0.189, val_accuracy=0.943]
Epoch 116:  91%|█████████ | 232/255 [01:24<00:08,  2.73it/s, loss=0.124, v_num=1, train_loss=0.0859, train_accuracy=0.944, val_loss=0.189, val_accuracy=0.943]
Epoch 116:  92%|█████████▏| 234/255 [01:25<00:07,  2.75it/s, loss=0.124, v_num=1, train_loss=0.0859, train_accuracy=0.944, val_loss=0.189, val_accuracy=0.943]
Epoch 116:  93%|█████████▎| 236/255 [01:25<00:06,  2.77it/s, loss=0.124, v_num=1, train_loss=0.0859, train_accuracy

Epoch 116, global step 26441: val_loss was not in top 2


Epoch 117:  89%|████████▊ | 226/255 [01:21<00:10,  2.76it/s, loss=0.0667, v_num=1, train_loss=0.0676, train_accuracy=1.000, val_loss=0.191, val_accuracy=0.937]
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/29 [00:00<?, ?it/s][A
Epoch 117:  89%|████████▉ | 228/255 [01:24<00:09,  2.71it/s, loss=0.0667, v_num=1, train_loss=0.0676, train_accuracy=1.000, val_loss=0.191, val_accuracy=0.937]
Epoch 117:  90%|█████████ | 230/255 [01:24<00:09,  2.73it/s, loss=0.0667, v_num=1, train_loss=0.0676, train_accuracy=1.000, val_loss=0.191, val_accuracy=0.937]
Epoch 117:  91%|█████████ | 232/255 [01:24<00:08,  2.75it/s, loss=0.0667, v_num=1, train_loss=0.0676, train_accuracy=1.000, val_loss=0.191, val_accuracy=0.937]
Epoch 117:  92%|█████████▏| 234/255 [01:24<00:07,  2.76it/s, loss=0.0667, v_num=1, train_loss=0.0676, train_accuracy=1.000, val_loss=0.191, val_accuracy=0.937]
Epoch 117:  93%|█████████▎| 236/255 [01:24<00:06,  2.78it/s, loss=0.0667, v_num=1, train_loss=0.0676, train_accu

Epoch 117, global step 26667: val_loss was not in top 2


Epoch 118:  89%|████████▊ | 226/255 [01:22<00:10,  2.75it/s, loss=0.13, v_num=1, train_loss=0.0911, train_accuracy=0.944, val_loss=0.230, val_accuracy=0.938]  
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/29 [00:00<?, ?it/s][A
Epoch 118:  89%|████████▉ | 228/255 [01:24<00:09,  2.70it/s, loss=0.13, v_num=1, train_loss=0.0911, train_accuracy=0.944, val_loss=0.230, val_accuracy=0.938]
Epoch 118:  90%|█████████ | 230/255 [01:24<00:09,  2.72it/s, loss=0.13, v_num=1, train_loss=0.0911, train_accuracy=0.944, val_loss=0.230, val_accuracy=0.938]
Epoch 118:  91%|█████████ | 232/255 [01:24<00:08,  2.74it/s, loss=0.13, v_num=1, train_loss=0.0911, train_accuracy=0.944, val_loss=0.230, val_accuracy=0.938]
Epoch 118:  92%|█████████▏| 234/255 [01:24<00:07,  2.76it/s, loss=0.13, v_num=1, train_loss=0.0911, train_accuracy=0.944, val_loss=0.230, val_accuracy=0.938]
Epoch 118:  93%|█████████▎| 236/255 [01:25<00:06,  2.77it/s, loss=0.13, v_num=1, train_loss=0.0911, train_accuracy=0.944

Epoch 118, global step 26893: val_loss was not in top 2


Epoch 119:  89%|████████▊ | 226/255 [01:22<00:10,  2.75it/s, loss=0.0746, v_num=1, train_loss=0.0831, train_accuracy=0.944, val_loss=0.206, val_accuracy=0.922]
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/29 [00:00<?, ?it/s][A
Epoch 119:  89%|████████▉ | 228/255 [01:24<00:09,  2.70it/s, loss=0.0746, v_num=1, train_loss=0.0831, train_accuracy=0.944, val_loss=0.206, val_accuracy=0.922]
Epoch 119:  90%|█████████ | 230/255 [01:24<00:09,  2.72it/s, loss=0.0746, v_num=1, train_loss=0.0831, train_accuracy=0.944, val_loss=0.206, val_accuracy=0.922]
Epoch 119:  91%|█████████ | 232/255 [01:24<00:08,  2.74it/s, loss=0.0746, v_num=1, train_loss=0.0831, train_accuracy=0.944, val_loss=0.206, val_accuracy=0.922]
Epoch 119:  92%|█████████▏| 234/255 [01:24<00:07,  2.76it/s, loss=0.0746, v_num=1, train_loss=0.0831, train_accuracy=0.944, val_loss=0.206, val_accuracy=0.922]
Epoch 119:  93%|█████████▎| 236/255 [01:25<00:06,  2.77it/s, loss=0.0746, v_num=1, train_loss=0.0831, train_accu

Epoch 119, global step 27119: val_loss was not in top 2


Epoch 120:  89%|████████▊ | 226/255 [01:22<00:10,  2.75it/s, loss=0.0881, v_num=1, train_loss=0.0601, train_accuracy=1.000, val_loss=0.207, val_accuracy=0.945] 
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/29 [00:00<?, ?it/s][A
Epoch 120:  89%|████████▉ | 228/255 [01:24<00:09,  2.70it/s, loss=0.0881, v_num=1, train_loss=0.0601, train_accuracy=1.000, val_loss=0.207, val_accuracy=0.945]
Epoch 120:  90%|█████████ | 230/255 [01:24<00:09,  2.72it/s, loss=0.0881, v_num=1, train_loss=0.0601, train_accuracy=1.000, val_loss=0.207, val_accuracy=0.945]
Epoch 120:  91%|█████████ | 232/255 [01:24<00:08,  2.74it/s, loss=0.0881, v_num=1, train_loss=0.0601, train_accuracy=1.000, val_loss=0.207, val_accuracy=0.945]
Epoch 120:  92%|█████████▏| 234/255 [01:24<00:07,  2.76it/s, loss=0.0881, v_num=1, train_loss=0.0601, train_accuracy=1.000, val_loss=0.207, val_accuracy=0.945]
Epoch 120:  93%|█████████▎| 236/255 [01:25<00:06,  2.77it/s, loss=0.0881, v_num=1, train_loss=0.0601, train_acc

Epoch 120, global step 27345: val_loss was not in top 2


Epoch 121:  89%|████████▊ | 226/255 [01:22<00:10,  2.74it/s, loss=0.0906, v_num=1, train_loss=0.0495, train_accuracy=1.000, val_loss=0.198, val_accuracy=0.939]
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/29 [00:00<?, ?it/s][A
Epoch 121:  89%|████████▉ | 228/255 [01:24<00:10,  2.69it/s, loss=0.0906, v_num=1, train_loss=0.0495, train_accuracy=1.000, val_loss=0.198, val_accuracy=0.939]
Epoch 121:  90%|█████████ | 230/255 [01:24<00:09,  2.71it/s, loss=0.0906, v_num=1, train_loss=0.0495, train_accuracy=1.000, val_loss=0.198, val_accuracy=0.939]
Epoch 121:  91%|█████████ | 232/255 [01:25<00:08,  2.73it/s, loss=0.0906, v_num=1, train_loss=0.0495, train_accuracy=1.000, val_loss=0.198, val_accuracy=0.939]
Epoch 121:  92%|█████████▏| 234/255 [01:25<00:07,  2.74it/s, loss=0.0906, v_num=1, train_loss=0.0495, train_accuracy=1.000, val_loss=0.198, val_accuracy=0.939]
Epoch 121:  93%|█████████▎| 236/255 [01:25<00:06,  2.76it/s, loss=0.0906, v_num=1, train_loss=0.0495, train_accu

Epoch 121, global step 27571: val_loss was not in top 2


Epoch 122:  89%|████████▊ | 226/255 [01:22<00:10,  2.75it/s, loss=0.253, v_num=1, train_loss=0.195, train_accuracy=0.944, val_loss=0.207, val_accuracy=0.940]   
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/29 [00:00<?, ?it/s][A
Epoch 122:  89%|████████▉ | 228/255 [01:24<00:10,  2.70it/s, loss=0.253, v_num=1, train_loss=0.195, train_accuracy=0.944, val_loss=0.207, val_accuracy=0.940]
Epoch 122:  90%|█████████ | 230/255 [01:24<00:09,  2.71it/s, loss=0.253, v_num=1, train_loss=0.195, train_accuracy=0.944, val_loss=0.207, val_accuracy=0.940]
Epoch 122:  91%|█████████ | 232/255 [01:24<00:08,  2.73it/s, loss=0.253, v_num=1, train_loss=0.195, train_accuracy=0.944, val_loss=0.207, val_accuracy=0.940]
Epoch 122:  92%|█████████▏| 234/255 [01:25<00:07,  2.75it/s, loss=0.253, v_num=1, train_loss=0.195, train_accuracy=0.944, val_loss=0.207, val_accuracy=0.940]
Epoch 122:  93%|█████████▎| 236/255 [01:25<00:06,  2.77it/s, loss=0.253, v_num=1, train_loss=0.195, train_accuracy=0.94

Epoch 122, global step 27797: val_loss was not in top 2


Epoch 123:  89%|████████▊ | 226/255 [01:22<00:10,  2.74it/s, loss=0.184, v_num=1, train_loss=0.211, train_accuracy=0.889, val_loss=0.296, val_accuracy=0.871] 
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/29 [00:00<?, ?it/s][A
Epoch 123:  89%|████████▉ | 228/255 [01:24<00:10,  2.69it/s, loss=0.184, v_num=1, train_loss=0.211, train_accuracy=0.889, val_loss=0.296, val_accuracy=0.871]
Epoch 123:  90%|█████████ | 230/255 [01:24<00:09,  2.71it/s, loss=0.184, v_num=1, train_loss=0.211, train_accuracy=0.889, val_loss=0.296, val_accuracy=0.871]
Epoch 123:  91%|█████████ | 232/255 [01:25<00:08,  2.73it/s, loss=0.184, v_num=1, train_loss=0.211, train_accuracy=0.889, val_loss=0.296, val_accuracy=0.871]
Epoch 123:  92%|█████████▏| 234/255 [01:25<00:07,  2.74it/s, loss=0.184, v_num=1, train_loss=0.211, train_accuracy=0.889, val_loss=0.296, val_accuracy=0.871]
Epoch 123:  93%|█████████▎| 236/255 [01:25<00:06,  2.76it/s, loss=0.184, v_num=1, train_loss=0.211, train_accuracy=0.889,

Epoch 123, global step 28023: val_loss was not in top 2


Epoch 124:  89%|████████▊ | 226/255 [01:22<00:10,  2.75it/s, loss=0.182, v_num=1, train_loss=0.186, train_accuracy=0.889, val_loss=0.253, val_accuracy=0.878] 
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/29 [00:00<?, ?it/s][A
Epoch 124:  89%|████████▉ | 228/255 [01:24<00:10,  2.69it/s, loss=0.182, v_num=1, train_loss=0.186, train_accuracy=0.889, val_loss=0.253, val_accuracy=0.878]
Epoch 124:  90%|█████████ | 230/255 [01:24<00:09,  2.71it/s, loss=0.182, v_num=1, train_loss=0.186, train_accuracy=0.889, val_loss=0.253, val_accuracy=0.878]
Epoch 124:  91%|█████████ | 232/255 [01:25<00:08,  2.73it/s, loss=0.182, v_num=1, train_loss=0.186, train_accuracy=0.889, val_loss=0.253, val_accuracy=0.878]
Epoch 124:  92%|█████████▏| 234/255 [01:25<00:07,  2.75it/s, loss=0.182, v_num=1, train_loss=0.186, train_accuracy=0.889, val_loss=0.253, val_accuracy=0.878]
Epoch 124:  93%|█████████▎| 236/255 [01:25<00:06,  2.76it/s, loss=0.182, v_num=1, train_loss=0.186, train_accuracy=0.889,

Epoch 124, global step 28249: val_loss was not in top 2


Epoch 125:  89%|████████▊ | 226/255 [01:22<00:10,  2.74it/s, loss=0.182, v_num=1, train_loss=0.141, train_accuracy=1.000, val_loss=0.267, val_accuracy=0.894] 
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/29 [00:00<?, ?it/s][A
Epoch 125:  89%|████████▉ | 228/255 [01:24<00:10,  2.69it/s, loss=0.182, v_num=1, train_loss=0.141, train_accuracy=1.000, val_loss=0.267, val_accuracy=0.894]
Epoch 125:  90%|█████████ | 230/255 [01:25<00:09,  2.71it/s, loss=0.182, v_num=1, train_loss=0.141, train_accuracy=1.000, val_loss=0.267, val_accuracy=0.894]
Epoch 125:  91%|█████████ | 232/255 [01:25<00:08,  2.72it/s, loss=0.182, v_num=1, train_loss=0.141, train_accuracy=1.000, val_loss=0.267, val_accuracy=0.894]
Epoch 125:  92%|█████████▏| 234/255 [01:25<00:07,  2.74it/s, loss=0.182, v_num=1, train_loss=0.141, train_accuracy=1.000, val_loss=0.267, val_accuracy=0.894]
Epoch 125:  93%|█████████▎| 236/255 [01:25<00:06,  2.76it/s, loss=0.182, v_num=1, train_loss=0.141, train_accuracy=1.000,

Epoch 125, global step 28475: val_loss was not in top 2


Epoch 126:  89%|████████▊ | 226/255 [01:22<00:10,  2.75it/s, loss=0.172, v_num=1, train_loss=0.122, train_accuracy=0.944, val_loss=0.295, val_accuracy=0.900] 
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/29 [00:00<?, ?it/s][A
Epoch 126:  89%|████████▉ | 228/255 [01:24<00:09,  2.70it/s, loss=0.172, v_num=1, train_loss=0.122, train_accuracy=0.944, val_loss=0.295, val_accuracy=0.900]
Epoch 126:  90%|█████████ | 230/255 [01:24<00:09,  2.72it/s, loss=0.172, v_num=1, train_loss=0.122, train_accuracy=0.944, val_loss=0.295, val_accuracy=0.900]
Epoch 126:  91%|█████████ | 232/255 [01:24<00:08,  2.74it/s, loss=0.172, v_num=1, train_loss=0.122, train_accuracy=0.944, val_loss=0.295, val_accuracy=0.900]
Epoch 126:  92%|█████████▏| 234/255 [01:24<00:07,  2.76it/s, loss=0.172, v_num=1, train_loss=0.122, train_accuracy=0.944, val_loss=0.295, val_accuracy=0.900]
Epoch 126:  93%|█████████▎| 236/255 [01:25<00:06,  2.78it/s, loss=0.172, v_num=1, train_loss=0.122, train_accuracy=0.944,

Epoch 126, global step 28701: val_loss was not in top 2


Epoch 127:  89%|████████▊ | 226/255 [01:22<00:10,  2.74it/s, loss=0.164, v_num=1, train_loss=0.301, train_accuracy=0.889, val_loss=0.283, val_accuracy=0.905] 
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/29 [00:00<?, ?it/s][A
Epoch 127:  89%|████████▉ | 228/255 [01:24<00:10,  2.69it/s, loss=0.164, v_num=1, train_loss=0.301, train_accuracy=0.889, val_loss=0.283, val_accuracy=0.905]
Epoch 127:  90%|█████████ | 230/255 [01:24<00:09,  2.71it/s, loss=0.164, v_num=1, train_loss=0.301, train_accuracy=0.889, val_loss=0.283, val_accuracy=0.905]
Epoch 127:  91%|█████████ | 232/255 [01:25<00:08,  2.73it/s, loss=0.164, v_num=1, train_loss=0.301, train_accuracy=0.889, val_loss=0.283, val_accuracy=0.905]
Epoch 127:  92%|█████████▏| 234/255 [01:25<00:07,  2.75it/s, loss=0.164, v_num=1, train_loss=0.301, train_accuracy=0.889, val_loss=0.283, val_accuracy=0.905]
Epoch 127:  93%|█████████▎| 236/255 [01:25<00:06,  2.76it/s, loss=0.164, v_num=1, train_loss=0.301, train_accuracy=0.889,

Epoch 127, global step 28927: val_loss was not in top 2


Epoch 128:  89%|████████▊ | 226/255 [01:22<00:10,  2.75it/s, loss=0.161, v_num=1, train_loss=0.0744, train_accuracy=1.000, val_loss=0.276, val_accuracy=0.911]
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/29 [00:00<?, ?it/s][A
Epoch 128:  89%|████████▉ | 228/255 [01:24<00:09,  2.70it/s, loss=0.161, v_num=1, train_loss=0.0744, train_accuracy=1.000, val_loss=0.276, val_accuracy=0.911]
Epoch 128:  90%|█████████ | 230/255 [01:24<00:09,  2.72it/s, loss=0.161, v_num=1, train_loss=0.0744, train_accuracy=1.000, val_loss=0.276, val_accuracy=0.911]
Epoch 128:  91%|█████████ | 232/255 [01:24<00:08,  2.74it/s, loss=0.161, v_num=1, train_loss=0.0744, train_accuracy=1.000, val_loss=0.276, val_accuracy=0.911]
Epoch 128:  92%|█████████▏| 234/255 [01:24<00:07,  2.75it/s, loss=0.161, v_num=1, train_loss=0.0744, train_accuracy=1.000, val_loss=0.276, val_accuracy=0.911]
Epoch 128:  93%|█████████▎| 236/255 [01:25<00:06,  2.77it/s, loss=0.161, v_num=1, train_loss=0.0744, train_accuracy=1

Epoch 128, global step 29153: val_loss was not in top 2


Epoch 129:  89%|████████▊ | 226/255 [01:22<00:10,  2.75it/s, loss=0.202, v_num=1, train_loss=0.514, train_accuracy=0.889, val_loss=0.290, val_accuracy=0.917] 
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/29 [00:00<?, ?it/s][A
Epoch 129:  89%|████████▉ | 228/255 [01:24<00:10,  2.69it/s, loss=0.202, v_num=1, train_loss=0.514, train_accuracy=0.889, val_loss=0.290, val_accuracy=0.917]
Epoch 129:  90%|█████████ | 230/255 [01:24<00:09,  2.71it/s, loss=0.202, v_num=1, train_loss=0.514, train_accuracy=0.889, val_loss=0.290, val_accuracy=0.917]
Epoch 129:  91%|█████████ | 232/255 [01:24<00:08,  2.73it/s, loss=0.202, v_num=1, train_loss=0.514, train_accuracy=0.889, val_loss=0.290, val_accuracy=0.917]
Epoch 129:  92%|█████████▏| 234/255 [01:25<00:07,  2.75it/s, loss=0.202, v_num=1, train_loss=0.514, train_accuracy=0.889, val_loss=0.290, val_accuracy=0.917]
Epoch 129:  93%|█████████▎| 236/255 [01:25<00:06,  2.77it/s, loss=0.202, v_num=1, train_loss=0.514, train_accuracy=0.889,

Epoch 129, global step 29379: val_loss was not in top 2


Epoch 130:  89%|████████▊ | 226/255 [01:22<00:10,  2.75it/s, loss=0.202, v_num=1, train_loss=0.163, train_accuracy=0.889, val_loss=0.266, val_accuracy=0.918]  
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/29 [00:00<?, ?it/s][A
Epoch 130:  89%|████████▉ | 228/255 [01:24<00:10,  2.70it/s, loss=0.202, v_num=1, train_loss=0.163, train_accuracy=0.889, val_loss=0.266, val_accuracy=0.918]
Epoch 130:  90%|█████████ | 230/255 [01:24<00:09,  2.71it/s, loss=0.202, v_num=1, train_loss=0.163, train_accuracy=0.889, val_loss=0.266, val_accuracy=0.918]
Epoch 130:  91%|█████████ | 232/255 [01:24<00:08,  2.73it/s, loss=0.202, v_num=1, train_loss=0.163, train_accuracy=0.889, val_loss=0.266, val_accuracy=0.918]
Epoch 130:  92%|█████████▏| 234/255 [01:25<00:07,  2.75it/s, loss=0.202, v_num=1, train_loss=0.163, train_accuracy=0.889, val_loss=0.266, val_accuracy=0.918]
Epoch 130:  93%|█████████▎| 236/255 [01:25<00:06,  2.77it/s, loss=0.202, v_num=1, train_loss=0.163, train_accuracy=0.889

Epoch 130, global step 29605: val_loss was not in top 2


Epoch 131:  89%|████████▊ | 226/255 [01:22<00:10,  2.75it/s, loss=0.13, v_num=1, train_loss=0.142, train_accuracy=0.889, val_loss=0.387, val_accuracy=0.818]  
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/29 [00:00<?, ?it/s][A
Epoch 131:  89%|████████▉ | 228/255 [01:24<00:10,  2.69it/s, loss=0.13, v_num=1, train_loss=0.142, train_accuracy=0.889, val_loss=0.387, val_accuracy=0.818]
Epoch 131:  90%|█████████ | 230/255 [01:24<00:09,  2.71it/s, loss=0.13, v_num=1, train_loss=0.142, train_accuracy=0.889, val_loss=0.387, val_accuracy=0.818]
Epoch 131:  91%|█████████ | 232/255 [01:24<00:08,  2.73it/s, loss=0.13, v_num=1, train_loss=0.142, train_accuracy=0.889, val_loss=0.387, val_accuracy=0.818]
Epoch 131:  92%|█████████▏| 234/255 [01:25<00:07,  2.75it/s, loss=0.13, v_num=1, train_loss=0.142, train_accuracy=0.889, val_loss=0.387, val_accuracy=0.818]
Epoch 131:  93%|█████████▎| 236/255 [01:25<00:06,  2.77it/s, loss=0.13, v_num=1, train_loss=0.142, train_accuracy=0.889, val_

Epoch 131, global step 29831: val_loss was not in top 2


Epoch 132:  89%|████████▊ | 226/255 [01:22<00:10,  2.75it/s, loss=0.127, v_num=1, train_loss=0.0592, train_accuracy=1.000, val_loss=0.242, val_accuracy=0.913]
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/29 [00:00<?, ?it/s][A
Epoch 132:  89%|████████▉ | 228/255 [01:24<00:10,  2.70it/s, loss=0.127, v_num=1, train_loss=0.0592, train_accuracy=1.000, val_loss=0.242, val_accuracy=0.913]
Epoch 132:  90%|█████████ | 230/255 [01:24<00:09,  2.71it/s, loss=0.127, v_num=1, train_loss=0.0592, train_accuracy=1.000, val_loss=0.242, val_accuracy=0.913]
Epoch 132:  91%|█████████ | 232/255 [01:24<00:08,  2.73it/s, loss=0.127, v_num=1, train_loss=0.0592, train_accuracy=1.000, val_loss=0.242, val_accuracy=0.913]
Validating:  21%|██        | 6/29 [00:02<00:06,  3.44it/s][A
Epoch 132:  92%|█████████▏| 234/255 [01:25<00:07,  2.75it/s, loss=0.127, v_num=1, train_loss=0.0592, train_accuracy=1.000, val_loss=0.242, val_accuracy=0.913]
Epoch 132:  93%|█████████▎| 236/255 [01:25<00:06,  2.77

Epoch 132, global step 30057: val_loss was not in top 2


Epoch 133:  89%|████████▊ | 226/255 [01:22<00:10,  2.75it/s, loss=0.114, v_num=1, train_loss=0.0834, train_accuracy=1.000, val_loss=0.257, val_accuracy=0.924] 
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/29 [00:00<?, ?it/s][A
Epoch 133:  89%|████████▉ | 228/255 [01:24<00:10,  2.70it/s, loss=0.114, v_num=1, train_loss=0.0834, train_accuracy=1.000, val_loss=0.257, val_accuracy=0.924]
Epoch 133:  90%|█████████ | 230/255 [01:24<00:09,  2.72it/s, loss=0.114, v_num=1, train_loss=0.0834, train_accuracy=1.000, val_loss=0.257, val_accuracy=0.924]
Epoch 133:  91%|█████████ | 232/255 [01:24<00:08,  2.73it/s, loss=0.114, v_num=1, train_loss=0.0834, train_accuracy=1.000, val_loss=0.257, val_accuracy=0.924]
Epoch 133:  92%|█████████▏| 234/255 [01:25<00:07,  2.75it/s, loss=0.114, v_num=1, train_loss=0.0834, train_accuracy=1.000, val_loss=0.257, val_accuracy=0.924]
Epoch 133:  93%|█████████▎| 236/255 [01:25<00:06,  2.77it/s, loss=0.114, v_num=1, train_loss=0.0834, train_accuracy=

Epoch 133, global step 30283: val_loss was not in top 2


Epoch 134:  89%|████████▊ | 226/255 [01:22<00:10,  2.75it/s, loss=0.0924, v_num=1, train_loss=0.0832, train_accuracy=1.000, val_loss=0.242, val_accuracy=0.927]
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/29 [00:00<?, ?it/s][A
Epoch 134:  89%|████████▉ | 228/255 [01:24<00:10,  2.70it/s, loss=0.0924, v_num=1, train_loss=0.0832, train_accuracy=1.000, val_loss=0.242, val_accuracy=0.927]
Epoch 134:  90%|█████████ | 230/255 [01:24<00:09,  2.71it/s, loss=0.0924, v_num=1, train_loss=0.0832, train_accuracy=1.000, val_loss=0.242, val_accuracy=0.927]
Epoch 134:  91%|█████████ | 232/255 [01:24<00:08,  2.73it/s, loss=0.0924, v_num=1, train_loss=0.0832, train_accuracy=1.000, val_loss=0.242, val_accuracy=0.927]
Epoch 134:  92%|█████████▏| 234/255 [01:25<00:07,  2.75it/s, loss=0.0924, v_num=1, train_loss=0.0832, train_accuracy=1.000, val_loss=0.242, val_accuracy=0.927]
Epoch 134:  93%|█████████▎| 236/255 [01:25<00:06,  2.77it/s, loss=0.0924, v_num=1, train_loss=0.0832, train_accu

Epoch 134, global step 30509: val_loss was not in top 2


Epoch 135:  89%|████████▊ | 226/255 [01:22<00:10,  2.75it/s, loss=0.135, v_num=1, train_loss=0.116, train_accuracy=0.944, val_loss=0.210, val_accuracy=0.931]  
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/29 [00:00<?, ?it/s][A
Epoch 135:  89%|████████▉ | 228/255 [01:24<00:10,  2.70it/s, loss=0.135, v_num=1, train_loss=0.116, train_accuracy=0.944, val_loss=0.210, val_accuracy=0.931]
Epoch 135:  90%|█████████ | 230/255 [01:24<00:09,  2.71it/s, loss=0.135, v_num=1, train_loss=0.116, train_accuracy=0.944, val_loss=0.210, val_accuracy=0.931]
Epoch 135:  91%|█████████ | 232/255 [01:24<00:08,  2.73it/s, loss=0.135, v_num=1, train_loss=0.116, train_accuracy=0.944, val_loss=0.210, val_accuracy=0.931]
Epoch 135:  92%|█████████▏| 234/255 [01:25<00:07,  2.75it/s, loss=0.135, v_num=1, train_loss=0.116, train_accuracy=0.944, val_loss=0.210, val_accuracy=0.931]
Epoch 135:  93%|█████████▎| 236/255 [01:25<00:06,  2.77it/s, loss=0.135, v_num=1, train_loss=0.116, train_accuracy=0.944

Epoch 135, global step 30735: val_loss was not in top 2


Epoch 136:  89%|████████▊ | 226/255 [01:22<00:10,  2.73it/s, loss=0.229, v_num=1, train_loss=0.0715, train_accuracy=1.000, val_loss=0.289, val_accuracy=0.922] 
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/29 [00:00<?, ?it/s][A
Epoch 136:  89%|████████▉ | 228/255 [01:24<00:10,  2.68it/s, loss=0.229, v_num=1, train_loss=0.0715, train_accuracy=1.000, val_loss=0.289, val_accuracy=0.922]
Epoch 136:  90%|█████████ | 230/255 [01:25<00:09,  2.70it/s, loss=0.229, v_num=1, train_loss=0.0715, train_accuracy=1.000, val_loss=0.289, val_accuracy=0.922]
Epoch 136:  91%|█████████ | 232/255 [01:25<00:08,  2.72it/s, loss=0.229, v_num=1, train_loss=0.0715, train_accuracy=1.000, val_loss=0.289, val_accuracy=0.922]
Epoch 136:  92%|█████████▏| 234/255 [01:25<00:07,  2.74it/s, loss=0.229, v_num=1, train_loss=0.0715, train_accuracy=1.000, val_loss=0.289, val_accuracy=0.922]
Epoch 136:  93%|█████████▎| 236/255 [01:25<00:06,  2.75it/s, loss=0.229, v_num=1, train_loss=0.0715, train_accuracy=

Epoch 136, global step 30961: val_loss was not in top 2


Epoch 137:  89%|████████▊ | 226/255 [01:22<00:10,  2.75it/s, loss=0.083, v_num=1, train_loss=0.00713, train_accuracy=1.000, val_loss=0.366, val_accuracy=0.898]
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/29 [00:00<?, ?it/s][A
Epoch 137:  89%|████████▉ | 228/255 [01:24<00:10,  2.70it/s, loss=0.083, v_num=1, train_loss=0.00713, train_accuracy=1.000, val_loss=0.366, val_accuracy=0.898]
Epoch 137:  90%|█████████ | 230/255 [01:24<00:09,  2.71it/s, loss=0.083, v_num=1, train_loss=0.00713, train_accuracy=1.000, val_loss=0.366, val_accuracy=0.898]
Epoch 137:  91%|█████████ | 232/255 [01:24<00:08,  2.73it/s, loss=0.083, v_num=1, train_loss=0.00713, train_accuracy=1.000, val_loss=0.366, val_accuracy=0.898]
Epoch 137:  92%|█████████▏| 234/255 [01:25<00:07,  2.75it/s, loss=0.083, v_num=1, train_loss=0.00713, train_accuracy=1.000, val_loss=0.366, val_accuracy=0.898]
Epoch 137:  93%|█████████▎| 236/255 [01:25<00:06,  2.77it/s, loss=0.083, v_num=1, train_loss=0.00713, train_accu

Epoch 137, global step 31187: val_loss was not in top 2


Epoch 138:  89%|████████▊ | 226/255 [01:22<00:10,  2.74it/s, loss=0.0714, v_num=1, train_loss=0.0941, train_accuracy=0.944, val_loss=0.193, val_accuracy=0.935]
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/29 [00:00<?, ?it/s][A
Epoch 138:  89%|████████▉ | 228/255 [01:24<00:10,  2.69it/s, loss=0.0714, v_num=1, train_loss=0.0941, train_accuracy=0.944, val_loss=0.193, val_accuracy=0.935]
Epoch 138:  90%|█████████ | 230/255 [01:25<00:09,  2.71it/s, loss=0.0714, v_num=1, train_loss=0.0941, train_accuracy=0.944, val_loss=0.193, val_accuracy=0.935]
Epoch 138:  91%|█████████ | 232/255 [01:25<00:08,  2.72it/s, loss=0.0714, v_num=1, train_loss=0.0941, train_accuracy=0.944, val_loss=0.193, val_accuracy=0.935]
Epoch 138:  92%|█████████▏| 234/255 [01:25<00:07,  2.74it/s, loss=0.0714, v_num=1, train_loss=0.0941, train_accuracy=0.944, val_loss=0.193, val_accuracy=0.935]
Epoch 138:  93%|█████████▎| 236/255 [01:25<00:06,  2.76it/s, loss=0.0714, v_num=1, train_loss=0.0941, train_accu

Epoch 138, global step 31413: val_loss was not in top 2


Epoch 139:  89%|████████▊ | 226/255 [01:22<00:10,  2.74it/s, loss=0.108, v_num=1, train_loss=0.0681, train_accuracy=0.944, val_loss=0.199, val_accuracy=0.936] 
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/29 [00:00<?, ?it/s][A
Epoch 139:  89%|████████▉ | 228/255 [01:24<00:10,  2.69it/s, loss=0.108, v_num=1, train_loss=0.0681, train_accuracy=0.944, val_loss=0.199, val_accuracy=0.936]
Epoch 139:  90%|█████████ | 230/255 [01:24<00:09,  2.71it/s, loss=0.108, v_num=1, train_loss=0.0681, train_accuracy=0.944, val_loss=0.199, val_accuracy=0.936]
Epoch 139:  91%|█████████ | 232/255 [01:25<00:08,  2.73it/s, loss=0.108, v_num=1, train_loss=0.0681, train_accuracy=0.944, val_loss=0.199, val_accuracy=0.936]
Epoch 139:  92%|█████████▏| 234/255 [01:25<00:07,  2.74it/s, loss=0.108, v_num=1, train_loss=0.0681, train_accuracy=0.944, val_loss=0.199, val_accuracy=0.936]
Epoch 139:  93%|█████████▎| 236/255 [01:25<00:06,  2.76it/s, loss=0.108, v_num=1, train_loss=0.0681, train_accuracy=

Epoch 139, global step 31639: val_loss was not in top 2


Epoch 140:  89%|████████▊ | 226/255 [01:22<00:10,  2.74it/s, loss=0.0879, v_num=1, train_loss=0.0781, train_accuracy=1.000, val_loss=0.202, val_accuracy=0.934] 
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/29 [00:00<?, ?it/s][A
Epoch 140:  89%|████████▉ | 228/255 [01:24<00:10,  2.68it/s, loss=0.0879, v_num=1, train_loss=0.0781, train_accuracy=1.000, val_loss=0.202, val_accuracy=0.934]
Epoch 140:  90%|█████████ | 230/255 [01:25<00:09,  2.70it/s, loss=0.0879, v_num=1, train_loss=0.0781, train_accuracy=1.000, val_loss=0.202, val_accuracy=0.934]
Epoch 140:  91%|█████████ | 232/255 [01:25<00:08,  2.72it/s, loss=0.0879, v_num=1, train_loss=0.0781, train_accuracy=1.000, val_loss=0.202, val_accuracy=0.934]
Epoch 140:  92%|█████████▏| 234/255 [01:25<00:07,  2.74it/s, loss=0.0879, v_num=1, train_loss=0.0781, train_accuracy=1.000, val_loss=0.202, val_accuracy=0.934]
Epoch 140:  93%|█████████▎| 236/255 [01:25<00:06,  2.76it/s, loss=0.0879, v_num=1, train_loss=0.0781, train_acc

Epoch 140, global step 31865: val_loss was not in top 2


Epoch 141:  89%|████████▊ | 226/255 [01:22<00:10,  2.74it/s, loss=0.16, v_num=1, train_loss=0.119, train_accuracy=0.944, val_loss=0.247, val_accuracy=0.921]   
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/29 [00:00<?, ?it/s][A
Epoch 141:  89%|████████▉ | 228/255 [01:24<00:10,  2.69it/s, loss=0.16, v_num=1, train_loss=0.119, train_accuracy=0.944, val_loss=0.247, val_accuracy=0.921]
Epoch 141:  90%|█████████ | 230/255 [01:24<00:09,  2.71it/s, loss=0.16, v_num=1, train_loss=0.119, train_accuracy=0.944, val_loss=0.247, val_accuracy=0.921]
Epoch 141:  91%|█████████ | 232/255 [01:25<00:08,  2.72it/s, loss=0.16, v_num=1, train_loss=0.119, train_accuracy=0.944, val_loss=0.247, val_accuracy=0.921]
Epoch 141:  92%|█████████▏| 234/255 [01:25<00:07,  2.74it/s, loss=0.16, v_num=1, train_loss=0.119, train_accuracy=0.944, val_loss=0.247, val_accuracy=0.921]
Epoch 141:  93%|█████████▎| 236/255 [01:25<00:06,  2.76it/s, loss=0.16, v_num=1, train_loss=0.119, train_accuracy=0.944, val

Epoch 141, global step 32091: val_loss was not in top 2


Epoch 142:  89%|████████▊ | 226/255 [01:23<00:10,  2.69it/s, loss=0.109, v_num=1, train_loss=0.157, train_accuracy=0.944, val_loss=0.339, val_accuracy=0.889]  
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/29 [00:00<?, ?it/s][A
Epoch 142:  89%|████████▉ | 228/255 [01:28<00:10,  2.56it/s, loss=0.109, v_num=1, train_loss=0.157, train_accuracy=0.944, val_loss=0.339, val_accuracy=0.889]
Validating:   7%|▋         | 2/29 [00:05<00:58,  2.16s/it][A
Epoch 142:  90%|█████████ | 230/255 [01:29<00:09,  2.58it/s, loss=0.109, v_num=1, train_loss=0.157, train_accuracy=0.944, val_loss=0.339, val_accuracy=0.889]
Validating:  14%|█▍        | 4/29 [00:05<00:20,  1.24it/s][A
Epoch 142:  91%|█████████ | 232/255 [01:29<00:08,  2.59it/s, loss=0.109, v_num=1, train_loss=0.157, train_accuracy=0.944, val_loss=0.339, val_accuracy=0.889]
Validating:  21%|██        | 6/29 [00:05<00:09,  2.37it/s][A
Epoch 142:  92%|█████████▏| 234/255 [01:29<00:08,  2.61it/s, loss=0.109, v_num=1, train_loss

Epoch 142, global step 32317: val_loss was not in top 2


Epoch 143:  89%|████████▊ | 226/255 [01:27<00:11,  2.58it/s, loss=0.0635, v_num=1, train_loss=0.0112, train_accuracy=1.000, val_loss=0.256, val_accuracy=0.930] 
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/29 [00:00<?, ?it/s][A
Epoch 143:  89%|████████▉ | 228/255 [01:29<00:10,  2.54it/s, loss=0.0635, v_num=1, train_loss=0.0112, train_accuracy=1.000, val_loss=0.256, val_accuracy=0.930]
Epoch 143:  90%|█████████ | 230/255 [01:29<00:09,  2.56it/s, loss=0.0635, v_num=1, train_loss=0.0112, train_accuracy=1.000, val_loss=0.256, val_accuracy=0.930]
Epoch 143:  91%|█████████ | 232/255 [01:30<00:08,  2.57it/s, loss=0.0635, v_num=1, train_loss=0.0112, train_accuracy=1.000, val_loss=0.256, val_accuracy=0.930]
Epoch 143:  92%|█████████▏| 234/255 [01:30<00:08,  2.59it/s, loss=0.0635, v_num=1, train_loss=0.0112, train_accuracy=1.000, val_loss=0.256, val_accuracy=0.930]
Epoch 143:  93%|█████████▎| 236/255 [01:30<00:07,  2.61it/s, loss=0.0635, v_num=1, train_loss=0.0112, train_acc

Epoch 143, global step 32543: val_loss was not in top 2


Epoch 144:  89%|████████▊ | 226/255 [01:22<00:10,  2.74it/s, loss=0.0531, v_num=1, train_loss=0.0197, train_accuracy=1.000, val_loss=0.208, val_accuracy=0.932] 
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/29 [00:00<?, ?it/s][A
Epoch 144:  89%|████████▉ | 228/255 [01:24<00:10,  2.69it/s, loss=0.0531, v_num=1, train_loss=0.0197, train_accuracy=1.000, val_loss=0.208, val_accuracy=0.932]
Epoch 144:  90%|█████████ | 230/255 [01:25<00:09,  2.70it/s, loss=0.0531, v_num=1, train_loss=0.0197, train_accuracy=1.000, val_loss=0.208, val_accuracy=0.932]
Epoch 144:  91%|█████████ | 232/255 [01:25<00:08,  2.72it/s, loss=0.0531, v_num=1, train_loss=0.0197, train_accuracy=1.000, val_loss=0.208, val_accuracy=0.932]
Epoch 144:  92%|█████████▏| 234/255 [01:25<00:07,  2.74it/s, loss=0.0531, v_num=1, train_loss=0.0197, train_accuracy=1.000, val_loss=0.208, val_accuracy=0.932]
Epoch 144:  93%|█████████▎| 236/255 [01:25<00:06,  2.76it/s, loss=0.0531, v_num=1, train_loss=0.0197, train_acc

Epoch 144, global step 32769: val_loss was not in top 2


Epoch 145:  89%|████████▊ | 226/255 [01:22<00:10,  2.74it/s, loss=0.0906, v_num=1, train_loss=0.362, train_accuracy=0.889, val_loss=0.208, val_accuracy=0.942]  
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/29 [00:00<?, ?it/s][A
Epoch 145:  89%|████████▉ | 228/255 [01:24<00:10,  2.69it/s, loss=0.0906, v_num=1, train_loss=0.362, train_accuracy=0.889, val_loss=0.208, val_accuracy=0.942]
Epoch 145:  90%|█████████ | 230/255 [01:24<00:09,  2.71it/s, loss=0.0906, v_num=1, train_loss=0.362, train_accuracy=0.889, val_loss=0.208, val_accuracy=0.942]
Epoch 145:  91%|█████████ | 232/255 [01:25<00:08,  2.73it/s, loss=0.0906, v_num=1, train_loss=0.362, train_accuracy=0.889, val_loss=0.208, val_accuracy=0.942]
Epoch 145:  92%|█████████▏| 234/255 [01:25<00:07,  2.75it/s, loss=0.0906, v_num=1, train_loss=0.362, train_accuracy=0.889, val_loss=0.208, val_accuracy=0.942]
Epoch 145:  93%|█████████▎| 236/255 [01:25<00:06,  2.76it/s, loss=0.0906, v_num=1, train_loss=0.362, train_accuracy

Epoch 145, global step 32995: val_loss was not in top 2


Epoch 146:  89%|████████▊ | 226/255 [01:22<00:10,  2.73it/s, loss=0.113, v_num=1, train_loss=0.0755, train_accuracy=0.944, val_loss=0.211, val_accuracy=0.943] 
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/29 [00:00<?, ?it/s][A
Epoch 146:  89%|████████▉ | 228/255 [01:25<00:10,  2.68it/s, loss=0.113, v_num=1, train_loss=0.0755, train_accuracy=0.944, val_loss=0.211, val_accuracy=0.943]
Epoch 146:  90%|█████████ | 230/255 [01:25<00:09,  2.70it/s, loss=0.113, v_num=1, train_loss=0.0755, train_accuracy=0.944, val_loss=0.211, val_accuracy=0.943]
Epoch 146:  91%|█████████ | 232/255 [01:25<00:08,  2.72it/s, loss=0.113, v_num=1, train_loss=0.0755, train_accuracy=0.944, val_loss=0.211, val_accuracy=0.943]
Epoch 146:  92%|█████████▏| 234/255 [01:25<00:07,  2.73it/s, loss=0.113, v_num=1, train_loss=0.0755, train_accuracy=0.944, val_loss=0.211, val_accuracy=0.943]
Epoch 146:  93%|█████████▎| 236/255 [01:25<00:06,  2.75it/s, loss=0.113, v_num=1, train_loss=0.0755, train_accuracy=

Epoch 146, global step 33221: val_loss was not in top 2


Epoch 147:  89%|████████▊ | 226/255 [01:22<00:10,  2.74it/s, loss=0.0955, v_num=1, train_loss=0.124, train_accuracy=0.944, val_loss=0.235, val_accuracy=0.912] 
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/29 [00:00<?, ?it/s][A
Epoch 147:  89%|████████▉ | 228/255 [01:24<00:10,  2.69it/s, loss=0.0955, v_num=1, train_loss=0.124, train_accuracy=0.944, val_loss=0.235, val_accuracy=0.912]
Epoch 147:  90%|█████████ | 230/255 [01:24<00:09,  2.71it/s, loss=0.0955, v_num=1, train_loss=0.124, train_accuracy=0.944, val_loss=0.235, val_accuracy=0.912]
Epoch 147:  91%|█████████ | 232/255 [01:25<00:08,  2.73it/s, loss=0.0955, v_num=1, train_loss=0.124, train_accuracy=0.944, val_loss=0.235, val_accuracy=0.912]
Epoch 147:  92%|█████████▏| 234/255 [01:25<00:07,  2.75it/s, loss=0.0955, v_num=1, train_loss=0.124, train_accuracy=0.944, val_loss=0.235, val_accuracy=0.912]
Epoch 147:  93%|█████████▎| 236/255 [01:25<00:06,  2.76it/s, loss=0.0955, v_num=1, train_loss=0.124, train_accuracy=

Epoch 147, global step 33447: val_loss was not in top 2


Epoch 148:  89%|████████▊ | 226/255 [01:22<00:10,  2.75it/s, loss=0.0773, v_num=1, train_loss=0.0118, train_accuracy=1.000, val_loss=0.220, val_accuracy=0.931] 
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/29 [00:00<?, ?it/s][A
Epoch 148:  89%|████████▉ | 228/255 [01:24<00:10,  2.70it/s, loss=0.0773, v_num=1, train_loss=0.0118, train_accuracy=1.000, val_loss=0.220, val_accuracy=0.931]
Epoch 148:  90%|█████████ | 230/255 [01:24<00:09,  2.72it/s, loss=0.0773, v_num=1, train_loss=0.0118, train_accuracy=1.000, val_loss=0.220, val_accuracy=0.931]
Epoch 148:  91%|█████████ | 232/255 [01:24<00:08,  2.73it/s, loss=0.0773, v_num=1, train_loss=0.0118, train_accuracy=1.000, val_loss=0.220, val_accuracy=0.931]
Epoch 148:  92%|█████████▏| 234/255 [01:25<00:07,  2.75it/s, loss=0.0773, v_num=1, train_loss=0.0118, train_accuracy=1.000, val_loss=0.220, val_accuracy=0.931]
Epoch 148:  93%|█████████▎| 236/255 [01:25<00:06,  2.77it/s, loss=0.0773, v_num=1, train_loss=0.0118, train_acc

Epoch 148, global step 33673: val_loss was not in top 2


Epoch 149:  89%|████████▊ | 226/255 [01:22<00:10,  2.74it/s, loss=0.0885, v_num=1, train_loss=0.0527, train_accuracy=1.000, val_loss=0.189, val_accuracy=0.949]
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/29 [00:00<?, ?it/s][A
Epoch 149:  89%|████████▉ | 228/255 [01:24<00:10,  2.69it/s, loss=0.0885, v_num=1, train_loss=0.0527, train_accuracy=1.000, val_loss=0.189, val_accuracy=0.949]
Epoch 149:  90%|█████████ | 230/255 [01:24<00:09,  2.71it/s, loss=0.0885, v_num=1, train_loss=0.0527, train_accuracy=1.000, val_loss=0.189, val_accuracy=0.949]
Epoch 149:  91%|█████████ | 232/255 [01:25<00:08,  2.73it/s, loss=0.0885, v_num=1, train_loss=0.0527, train_accuracy=1.000, val_loss=0.189, val_accuracy=0.949]
Epoch 149:  92%|█████████▏| 234/255 [01:25<00:07,  2.74it/s, loss=0.0885, v_num=1, train_loss=0.0527, train_accuracy=1.000, val_loss=0.189, val_accuracy=0.949]
Epoch 149:  93%|█████████▎| 236/255 [01:25<00:06,  2.76it/s, loss=0.0885, v_num=1, train_loss=0.0527, train_accu

Epoch 149, global step 33899: val_loss was not in top 2


Epoch 150:  89%|████████▊ | 226/255 [01:21<00:10,  2.76it/s, loss=0.0613, v_num=1, train_loss=0.0831, train_accuracy=0.944, val_loss=0.193, val_accuracy=0.937] 
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/29 [00:00<?, ?it/s][A
Epoch 150:  89%|████████▉ | 228/255 [01:24<00:09,  2.71it/s, loss=0.0613, v_num=1, train_loss=0.0831, train_accuracy=0.944, val_loss=0.193, val_accuracy=0.937]
Epoch 150:  90%|█████████ | 230/255 [01:24<00:09,  2.73it/s, loss=0.0613, v_num=1, train_loss=0.0831, train_accuracy=0.944, val_loss=0.193, val_accuracy=0.937]
Epoch 150:  91%|█████████ | 232/255 [01:24<00:08,  2.74it/s, loss=0.0613, v_num=1, train_loss=0.0831, train_accuracy=0.944, val_loss=0.193, val_accuracy=0.937]
Epoch 150:  92%|█████████▏| 234/255 [01:24<00:07,  2.76it/s, loss=0.0613, v_num=1, train_loss=0.0831, train_accuracy=0.944, val_loss=0.193, val_accuracy=0.937]
Epoch 150:  93%|█████████▎| 236/255 [01:24<00:06,  2.78it/s, loss=0.0613, v_num=1, train_loss=0.0831, train_acc

Epoch 150, global step 34125: val_loss was not in top 2


Epoch 151:  89%|████████▊ | 226/255 [01:22<00:10,  2.75it/s, loss=0.0744, v_num=1, train_loss=0.0617, train_accuracy=0.944, val_loss=0.188, val_accuracy=0.938] 
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/29 [00:00<?, ?it/s][A
Epoch 151:  89%|████████▉ | 228/255 [01:24<00:10,  2.69it/s, loss=0.0744, v_num=1, train_loss=0.0617, train_accuracy=0.944, val_loss=0.188, val_accuracy=0.938]
Epoch 151:  90%|█████████ | 230/255 [01:24<00:09,  2.71it/s, loss=0.0744, v_num=1, train_loss=0.0617, train_accuracy=0.944, val_loss=0.188, val_accuracy=0.938]
Epoch 151:  91%|█████████ | 232/255 [01:25<00:08,  2.73it/s, loss=0.0744, v_num=1, train_loss=0.0617, train_accuracy=0.944, val_loss=0.188, val_accuracy=0.938]
Epoch 151:  92%|█████████▏| 234/255 [01:25<00:07,  2.75it/s, loss=0.0744, v_num=1, train_loss=0.0617, train_accuracy=0.944, val_loss=0.188, val_accuracy=0.938]
Epoch 151:  93%|█████████▎| 236/255 [01:25<00:06,  2.76it/s, loss=0.0744, v_num=1, train_loss=0.0617, train_acc

Epoch 151, global step 34351: val_loss was not in top 2


Epoch 152:  89%|████████▊ | 226/255 [01:22<00:10,  2.75it/s, loss=0.0623, v_num=1, train_loss=0.045, train_accuracy=1.000, val_loss=0.192, val_accuracy=0.945]  
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/29 [00:00<?, ?it/s][A
Epoch 152:  89%|████████▉ | 228/255 [01:24<00:09,  2.70it/s, loss=0.0623, v_num=1, train_loss=0.045, train_accuracy=1.000, val_loss=0.192, val_accuracy=0.945]
Epoch 152:  90%|█████████ | 230/255 [01:24<00:09,  2.72it/s, loss=0.0623, v_num=1, train_loss=0.045, train_accuracy=1.000, val_loss=0.192, val_accuracy=0.945]
Epoch 152:  91%|█████████ | 232/255 [01:24<00:08,  2.74it/s, loss=0.0623, v_num=1, train_loss=0.045, train_accuracy=1.000, val_loss=0.192, val_accuracy=0.945]
Epoch 152:  92%|█████████▏| 234/255 [01:24<00:07,  2.76it/s, loss=0.0623, v_num=1, train_loss=0.045, train_accuracy=1.000, val_loss=0.192, val_accuracy=0.945]
Epoch 152:  93%|█████████▎| 236/255 [01:25<00:06,  2.77it/s, loss=0.0623, v_num=1, train_loss=0.045, train_accuracy

Epoch 152, global step 34577: val_loss was not in top 2


Epoch 153:  89%|████████▊ | 226/255 [01:21<00:10,  2.76it/s, loss=0.153, v_num=1, train_loss=0.138, train_accuracy=0.944, val_loss=0.199, val_accuracy=0.940]   
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/29 [00:00<?, ?it/s][A
Epoch 153:  89%|████████▉ | 228/255 [01:24<00:09,  2.70it/s, loss=0.153, v_num=1, train_loss=0.138, train_accuracy=0.944, val_loss=0.199, val_accuracy=0.940]
Epoch 153:  90%|█████████ | 230/255 [01:24<00:09,  2.72it/s, loss=0.153, v_num=1, train_loss=0.138, train_accuracy=0.944, val_loss=0.199, val_accuracy=0.940]
Epoch 153:  91%|█████████ | 232/255 [01:24<00:08,  2.74it/s, loss=0.153, v_num=1, train_loss=0.138, train_accuracy=0.944, val_loss=0.199, val_accuracy=0.940]
Epoch 153:  92%|█████████▏| 234/255 [01:24<00:07,  2.76it/s, loss=0.153, v_num=1, train_loss=0.138, train_accuracy=0.944, val_loss=0.199, val_accuracy=0.940]
Epoch 153:  93%|█████████▎| 236/255 [01:25<00:06,  2.78it/s, loss=0.153, v_num=1, train_loss=0.138, train_accuracy=0.94

Epoch 153, global step 34803: val_loss was not in top 2


Epoch 154:  89%|████████▊ | 226/255 [01:21<00:10,  2.76it/s, loss=0.0548, v_num=1, train_loss=0.00459, train_accuracy=1.000, val_loss=0.235, val_accuracy=0.939]
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/29 [00:00<?, ?it/s][A
Epoch 154:  89%|████████▉ | 228/255 [01:24<00:09,  2.70it/s, loss=0.0548, v_num=1, train_loss=0.00459, train_accuracy=1.000, val_loss=0.235, val_accuracy=0.939]
Epoch 154:  90%|█████████ | 230/255 [01:24<00:09,  2.72it/s, loss=0.0548, v_num=1, train_loss=0.00459, train_accuracy=1.000, val_loss=0.235, val_accuracy=0.939]
Epoch 154:  91%|█████████ | 232/255 [01:24<00:08,  2.74it/s, loss=0.0548, v_num=1, train_loss=0.00459, train_accuracy=1.000, val_loss=0.235, val_accuracy=0.939]
Epoch 154:  92%|█████████▏| 234/255 [01:24<00:07,  2.76it/s, loss=0.0548, v_num=1, train_loss=0.00459, train_accuracy=1.000, val_loss=0.235, val_accuracy=0.939]
Epoch 154:  93%|█████████▎| 236/255 [01:25<00:06,  2.78it/s, loss=0.0548, v_num=1, train_loss=0.00459, trai

Epoch 154, global step 35029: val_loss was not in top 2


Epoch 155:  89%|████████▊ | 226/255 [01:22<00:10,  2.75it/s, loss=0.0585, v_num=1, train_loss=0.00521, train_accuracy=1.000, val_loss=0.189, val_accuracy=0.940]
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/29 [00:00<?, ?it/s][A
Epoch 155:  89%|████████▉ | 228/255 [01:24<00:10,  2.70it/s, loss=0.0585, v_num=1, train_loss=0.00521, train_accuracy=1.000, val_loss=0.189, val_accuracy=0.940]
Epoch 155:  90%|█████████ | 230/255 [01:24<00:09,  2.71it/s, loss=0.0585, v_num=1, train_loss=0.00521, train_accuracy=1.000, val_loss=0.189, val_accuracy=0.940]
Validating:  14%|█▍        | 4/29 [00:02<00:11,  2.11it/s][A
Epoch 155:  91%|█████████ | 232/255 [01:24<00:08,  2.73it/s, loss=0.0585, v_num=1, train_loss=0.00521, train_accuracy=1.000, val_loss=0.189, val_accuracy=0.940]
Epoch 155:  92%|█████████▏| 234/255 [01:25<00:07,  2.75it/s, loss=0.0585, v_num=1, train_loss=0.00521, train_accuracy=1.000, val_loss=0.189, val_accuracy=0.940]
Epoch 155:  93%|█████████▎| 236/255 [01:25<00

Epoch 155, global step 35255: val_loss was not in top 2


Epoch 156:  89%|████████▊ | 226/255 [01:21<00:10,  2.76it/s, loss=0.0559, v_num=1, train_loss=0.0077, train_accuracy=1.000, val_loss=0.187, val_accuracy=0.945] 
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/29 [00:00<?, ?it/s][A
Epoch 156:  89%|████████▉ | 228/255 [01:24<00:09,  2.71it/s, loss=0.0559, v_num=1, train_loss=0.0077, train_accuracy=1.000, val_loss=0.187, val_accuracy=0.945]
Epoch 156:  90%|█████████ | 230/255 [01:24<00:09,  2.72it/s, loss=0.0559, v_num=1, train_loss=0.0077, train_accuracy=1.000, val_loss=0.187, val_accuracy=0.945]
Epoch 156:  91%|█████████ | 232/255 [01:24<00:08,  2.74it/s, loss=0.0559, v_num=1, train_loss=0.0077, train_accuracy=1.000, val_loss=0.187, val_accuracy=0.945]
Epoch 156:  92%|█████████▏| 234/255 [01:24<00:07,  2.76it/s, loss=0.0559, v_num=1, train_loss=0.0077, train_accuracy=1.000, val_loss=0.187, val_accuracy=0.945]
Epoch 156:  93%|█████████▎| 236/255 [01:24<00:06,  2.78it/s, loss=0.0559, v_num=1, train_loss=0.0077, train_acc

Epoch 156, global step 35481: val_loss was not in top 2


Epoch 157:  89%|████████▊ | 226/255 [01:22<00:10,  2.75it/s, loss=0.0496, v_num=1, train_loss=0.0798, train_accuracy=0.944, val_loss=0.254, val_accuracy=0.934] 
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/29 [00:00<?, ?it/s][A
Epoch 157:  89%|████████▉ | 228/255 [01:24<00:10,  2.70it/s, loss=0.0496, v_num=1, train_loss=0.0798, train_accuracy=0.944, val_loss=0.254, val_accuracy=0.934]
Epoch 157:  90%|█████████ | 230/255 [01:24<00:09,  2.71it/s, loss=0.0496, v_num=1, train_loss=0.0798, train_accuracy=0.944, val_loss=0.254, val_accuracy=0.934]
Epoch 157:  91%|█████████ | 232/255 [01:24<00:08,  2.73it/s, loss=0.0496, v_num=1, train_loss=0.0798, train_accuracy=0.944, val_loss=0.254, val_accuracy=0.934]
Epoch 157:  92%|█████████▏| 234/255 [01:25<00:07,  2.75it/s, loss=0.0496, v_num=1, train_loss=0.0798, train_accuracy=0.944, val_loss=0.254, val_accuracy=0.934]
Epoch 157:  93%|█████████▎| 236/255 [01:25<00:06,  2.77it/s, loss=0.0496, v_num=1, train_loss=0.0798, train_acc

Epoch 157, global step 35707: val_loss was not in top 2


Epoch 158:  89%|████████▊ | 226/255 [01:22<00:10,  2.75it/s, loss=0.0671, v_num=1, train_loss=0.0105, train_accuracy=1.000, val_loss=0.182, val_accuracy=0.946] 
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/29 [00:00<?, ?it/s][A
Epoch 158:  89%|████████▉ | 228/255 [01:24<00:09,  2.70it/s, loss=0.0671, v_num=1, train_loss=0.0105, train_accuracy=1.000, val_loss=0.182, val_accuracy=0.946]
Epoch 158:  90%|█████████ | 230/255 [01:24<00:09,  2.72it/s, loss=0.0671, v_num=1, train_loss=0.0105, train_accuracy=1.000, val_loss=0.182, val_accuracy=0.946]
Epoch 158:  91%|█████████ | 232/255 [01:24<00:08,  2.74it/s, loss=0.0671, v_num=1, train_loss=0.0105, train_accuracy=1.000, val_loss=0.182, val_accuracy=0.946]
Epoch 158:  92%|█████████▏| 234/255 [01:24<00:07,  2.75it/s, loss=0.0671, v_num=1, train_loss=0.0105, train_accuracy=1.000, val_loss=0.182, val_accuracy=0.946]
Epoch 158:  93%|█████████▎| 236/255 [01:25<00:06,  2.77it/s, loss=0.0671, v_num=1, train_loss=0.0105, train_acc

Epoch 158, global step 35933: val_loss reached 0.16947 (best 0.16747), saving model to "/Workspace/code/checkpoints/best-checkpoint-corr-random-20-v6.ckpt" as top 2


Epoch 159:  89%|████████▊ | 226/255 [01:22<00:10,  2.74it/s, loss=0.0462, v_num=1, train_loss=0.00455, train_accuracy=1.000, val_loss=0.169, val_accuracy=0.946]
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/29 [00:00<?, ?it/s][A
Epoch 159:  89%|████████▉ | 228/255 [01:24<00:10,  2.69it/s, loss=0.0462, v_num=1, train_loss=0.00455, train_accuracy=1.000, val_loss=0.169, val_accuracy=0.946]
Epoch 159:  90%|█████████ | 230/255 [01:24<00:09,  2.71it/s, loss=0.0462, v_num=1, train_loss=0.00455, train_accuracy=1.000, val_loss=0.169, val_accuracy=0.946]
Epoch 159:  91%|█████████ | 232/255 [01:25<00:08,  2.73it/s, loss=0.0462, v_num=1, train_loss=0.00455, train_accuracy=1.000, val_loss=0.169, val_accuracy=0.946]
Epoch 159:  92%|█████████▏| 234/255 [01:25<00:07,  2.74it/s, loss=0.0462, v_num=1, train_loss=0.00455, train_accuracy=1.000, val_loss=0.169, val_accuracy=0.946]
Epoch 159:  93%|█████████▎| 236/255 [01:25<00:06,  2.76it/s, loss=0.0462, v_num=1, train_loss=0.00455, trai

Epoch 159, global step 36159: val_loss was not in top 2


Epoch 160:  89%|████████▊ | 226/255 [01:22<00:10,  2.74it/s, loss=0.056, v_num=1, train_loss=0.0337, train_accuracy=1.000, val_loss=0.179, val_accuracy=0.948]  
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/29 [00:00<?, ?it/s][A
Epoch 160:  89%|████████▉ | 228/255 [01:24<00:10,  2.69it/s, loss=0.056, v_num=1, train_loss=0.0337, train_accuracy=1.000, val_loss=0.179, val_accuracy=0.948]
Epoch 160:  90%|█████████ | 230/255 [01:25<00:09,  2.70it/s, loss=0.056, v_num=1, train_loss=0.0337, train_accuracy=1.000, val_loss=0.179, val_accuracy=0.948]
Epoch 160:  91%|█████████ | 232/255 [01:25<00:08,  2.72it/s, loss=0.056, v_num=1, train_loss=0.0337, train_accuracy=1.000, val_loss=0.179, val_accuracy=0.948]
Epoch 160:  92%|█████████▏| 234/255 [01:25<00:07,  2.74it/s, loss=0.056, v_num=1, train_loss=0.0337, train_accuracy=1.000, val_loss=0.179, val_accuracy=0.948]
Epoch 160:  93%|█████████▎| 236/255 [01:25<00:06,  2.76it/s, loss=0.056, v_num=1, train_loss=0.0337, train_accuracy

Epoch 160, global step 36385: val_loss reached 0.16892 (best 0.16747), saving model to "/Workspace/code/checkpoints/best-checkpoint-corr-random-20-v6.ckpt" as top 2


Epoch 161:  89%|████████▊ | 226/255 [01:22<00:10,  2.74it/s, loss=0.0687, v_num=1, train_loss=0.0107, train_accuracy=1.000, val_loss=0.169, val_accuracy=0.950] 
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/29 [00:00<?, ?it/s][A
Epoch 161:  89%|████████▉ | 228/255 [01:24<00:10,  2.69it/s, loss=0.0687, v_num=1, train_loss=0.0107, train_accuracy=1.000, val_loss=0.169, val_accuracy=0.950]
Epoch 161:  90%|█████████ | 230/255 [01:25<00:09,  2.71it/s, loss=0.0687, v_num=1, train_loss=0.0107, train_accuracy=1.000, val_loss=0.169, val_accuracy=0.950]
Epoch 161:  91%|█████████ | 232/255 [01:25<00:08,  2.72it/s, loss=0.0687, v_num=1, train_loss=0.0107, train_accuracy=1.000, val_loss=0.169, val_accuracy=0.950]
Epoch 161:  92%|█████████▏| 234/255 [01:25<00:07,  2.74it/s, loss=0.0687, v_num=1, train_loss=0.0107, train_accuracy=1.000, val_loss=0.169, val_accuracy=0.950]
Epoch 161:  93%|█████████▎| 236/255 [01:25<00:06,  2.76it/s, loss=0.0687, v_num=1, train_loss=0.0107, train_acc

Epoch 161, global step 36611: val_loss was not in top 2


Epoch 162:  89%|████████▊ | 226/255 [01:22<00:10,  2.73it/s, loss=0.0811, v_num=1, train_loss=0.131, train_accuracy=0.944, val_loss=0.178, val_accuracy=0.950]  
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/29 [00:00<?, ?it/s][A
Epoch 162:  89%|████████▉ | 228/255 [01:25<00:10,  2.68it/s, loss=0.0811, v_num=1, train_loss=0.131, train_accuracy=0.944, val_loss=0.178, val_accuracy=0.950]
Epoch 162:  90%|█████████ | 230/255 [01:25<00:09,  2.70it/s, loss=0.0811, v_num=1, train_loss=0.131, train_accuracy=0.944, val_loss=0.178, val_accuracy=0.950]
Epoch 162:  91%|█████████ | 232/255 [01:25<00:08,  2.71it/s, loss=0.0811, v_num=1, train_loss=0.131, train_accuracy=0.944, val_loss=0.178, val_accuracy=0.950]
Epoch 162:  92%|█████████▏| 234/255 [01:25<00:07,  2.73it/s, loss=0.0811, v_num=1, train_loss=0.131, train_accuracy=0.944, val_loss=0.178, val_accuracy=0.950]
Epoch 162:  93%|█████████▎| 236/255 [01:25<00:06,  2.75it/s, loss=0.0811, v_num=1, train_loss=0.131, train_accuracy

Epoch 162, global step 36837: val_loss was not in top 2


Epoch 163:  89%|████████▊ | 226/255 [01:22<00:10,  2.75it/s, loss=0.162, v_num=1, train_loss=0.0461, train_accuracy=1.000, val_loss=0.235, val_accuracy=0.935]  
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/29 [00:00<?, ?it/s][A
Epoch 163:  89%|████████▉ | 228/255 [01:24<00:10,  2.69it/s, loss=0.162, v_num=1, train_loss=0.0461, train_accuracy=1.000, val_loss=0.235, val_accuracy=0.935]
Epoch 163:  90%|█████████ | 230/255 [01:24<00:09,  2.71it/s, loss=0.162, v_num=1, train_loss=0.0461, train_accuracy=1.000, val_loss=0.235, val_accuracy=0.935]
Epoch 163:  91%|█████████ | 232/255 [01:24<00:08,  2.73it/s, loss=0.162, v_num=1, train_loss=0.0461, train_accuracy=1.000, val_loss=0.235, val_accuracy=0.935]
Epoch 163:  92%|█████████▏| 234/255 [01:25<00:07,  2.75it/s, loss=0.162, v_num=1, train_loss=0.0461, train_accuracy=1.000, val_loss=0.235, val_accuracy=0.935]
Epoch 163:  93%|█████████▎| 236/255 [01:25<00:06,  2.77it/s, loss=0.162, v_num=1, train_loss=0.0461, train_accuracy

Epoch 163, global step 37063: val_loss was not in top 2


Epoch 164:  89%|████████▊ | 226/255 [01:22<00:10,  2.74it/s, loss=0.305, v_num=1, train_loss=0.312, train_accuracy=0.833, val_loss=0.276, val_accuracy=0.921]   
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/29 [00:00<?, ?it/s][A
Epoch 164:  89%|████████▉ | 228/255 [01:24<00:10,  2.69it/s, loss=0.305, v_num=1, train_loss=0.312, train_accuracy=0.833, val_loss=0.276, val_accuracy=0.921]
Epoch 164:  90%|█████████ | 230/255 [01:25<00:09,  2.70it/s, loss=0.305, v_num=1, train_loss=0.312, train_accuracy=0.833, val_loss=0.276, val_accuracy=0.921]
Epoch 164:  91%|█████████ | 232/255 [01:25<00:08,  2.72it/s, loss=0.305, v_num=1, train_loss=0.312, train_accuracy=0.833, val_loss=0.276, val_accuracy=0.921]
Epoch 164:  92%|█████████▏| 234/255 [01:25<00:07,  2.74it/s, loss=0.305, v_num=1, train_loss=0.312, train_accuracy=0.833, val_loss=0.276, val_accuracy=0.921]
Epoch 164:  93%|█████████▎| 236/255 [01:25<00:06,  2.76it/s, loss=0.305, v_num=1, train_loss=0.312, train_accuracy=0.83

Epoch 164, global step 37289: val_loss was not in top 2


Epoch 165:  89%|████████▊ | 226/255 [01:22<00:10,  2.75it/s, loss=0.0729, v_num=1, train_loss=0.0192, train_accuracy=1.000, val_loss=0.264, val_accuracy=0.881]
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/29 [00:00<?, ?it/s][A
Epoch 165:  89%|████████▉ | 228/255 [01:24<00:10,  2.69it/s, loss=0.0729, v_num=1, train_loss=0.0192, train_accuracy=1.000, val_loss=0.264, val_accuracy=0.881]
Epoch 165:  90%|█████████ | 230/255 [01:24<00:09,  2.71it/s, loss=0.0729, v_num=1, train_loss=0.0192, train_accuracy=1.000, val_loss=0.264, val_accuracy=0.881]
Epoch 165:  91%|█████████ | 232/255 [01:24<00:08,  2.73it/s, loss=0.0729, v_num=1, train_loss=0.0192, train_accuracy=1.000, val_loss=0.264, val_accuracy=0.881]
Epoch 165:  92%|█████████▏| 234/255 [01:25<00:07,  2.75it/s, loss=0.0729, v_num=1, train_loss=0.0192, train_accuracy=1.000, val_loss=0.264, val_accuracy=0.881]
Epoch 165:  93%|█████████▎| 236/255 [01:25<00:06,  2.77it/s, loss=0.0729, v_num=1, train_loss=0.0192, train_accu

Epoch 165, global step 37515: val_loss was not in top 2


Epoch 166:  89%|████████▊ | 226/255 [01:22<00:10,  2.74it/s, loss=0.0649, v_num=1, train_loss=0.0905, train_accuracy=0.944, val_loss=0.216, val_accuracy=0.925]
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/29 [00:00<?, ?it/s][A
Epoch 166:  89%|████████▉ | 228/255 [01:24<00:10,  2.69it/s, loss=0.0649, v_num=1, train_loss=0.0905, train_accuracy=0.944, val_loss=0.216, val_accuracy=0.925]
Epoch 166:  90%|█████████ | 230/255 [01:24<00:09,  2.71it/s, loss=0.0649, v_num=1, train_loss=0.0905, train_accuracy=0.944, val_loss=0.216, val_accuracy=0.925]
Epoch 166:  91%|█████████ | 232/255 [01:25<00:08,  2.72it/s, loss=0.0649, v_num=1, train_loss=0.0905, train_accuracy=0.944, val_loss=0.216, val_accuracy=0.925]
Epoch 166:  92%|█████████▏| 234/255 [01:25<00:07,  2.74it/s, loss=0.0649, v_num=1, train_loss=0.0905, train_accuracy=0.944, val_loss=0.216, val_accuracy=0.925]
Epoch 166:  93%|█████████▎| 236/255 [01:25<00:06,  2.76it/s, loss=0.0649, v_num=1, train_loss=0.0905, train_accu

Epoch 166, global step 37741: val_loss was not in top 2


Epoch 167:  89%|████████▊ | 226/255 [01:22<00:10,  2.74it/s, loss=0.0474, v_num=1, train_loss=0.121, train_accuracy=0.944, val_loss=0.207, val_accuracy=0.933]  
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/29 [00:00<?, ?it/s][A
Epoch 167:  89%|████████▉ | 228/255 [01:24<00:10,  2.69it/s, loss=0.0474, v_num=1, train_loss=0.121, train_accuracy=0.944, val_loss=0.207, val_accuracy=0.933]
Epoch 167:  90%|█████████ | 230/255 [01:24<00:09,  2.71it/s, loss=0.0474, v_num=1, train_loss=0.121, train_accuracy=0.944, val_loss=0.207, val_accuracy=0.933]
Epoch 167:  91%|█████████ | 232/255 [01:25<00:08,  2.73it/s, loss=0.0474, v_num=1, train_loss=0.121, train_accuracy=0.944, val_loss=0.207, val_accuracy=0.933]
Epoch 167:  92%|█████████▏| 234/255 [01:25<00:07,  2.75it/s, loss=0.0474, v_num=1, train_loss=0.121, train_accuracy=0.944, val_loss=0.207, val_accuracy=0.933]
Epoch 167:  93%|█████████▎| 236/255 [01:25<00:06,  2.77it/s, loss=0.0474, v_num=1, train_loss=0.121, train_accuracy

Epoch 167, global step 37967: val_loss was not in top 2


Epoch 168:  89%|████████▊ | 226/255 [01:22<00:10,  2.75it/s, loss=0.0997, v_num=1, train_loss=0.00585, train_accuracy=1.000, val_loss=0.240, val_accuracy=0.932]
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/29 [00:00<?, ?it/s][A
Epoch 168:  89%|████████▉ | 228/255 [01:24<00:10,  2.69it/s, loss=0.0997, v_num=1, train_loss=0.00585, train_accuracy=1.000, val_loss=0.240, val_accuracy=0.932]
Epoch 168:  90%|█████████ | 230/255 [01:24<00:09,  2.71it/s, loss=0.0997, v_num=1, train_loss=0.00585, train_accuracy=1.000, val_loss=0.240, val_accuracy=0.932]
Epoch 168:  91%|█████████ | 232/255 [01:25<00:08,  2.73it/s, loss=0.0997, v_num=1, train_loss=0.00585, train_accuracy=1.000, val_loss=0.240, val_accuracy=0.932]
Epoch 168:  92%|█████████▏| 234/255 [01:25<00:07,  2.75it/s, loss=0.0997, v_num=1, train_loss=0.00585, train_accuracy=1.000, val_loss=0.240, val_accuracy=0.932]
Epoch 168:  93%|█████████▎| 236/255 [01:25<00:06,  2.76it/s, loss=0.0997, v_num=1, train_loss=0.00585, trai

Epoch 168, global step 38193: val_loss was not in top 2


Epoch 169:  89%|████████▊ | 226/255 [01:22<00:10,  2.75it/s, loss=0.0472, v_num=1, train_loss=0.00254, train_accuracy=1.000, val_loss=0.191, val_accuracy=0.942]
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/29 [00:00<?, ?it/s][A
Epoch 169:  89%|████████▉ | 228/255 [01:24<00:10,  2.69it/s, loss=0.0472, v_num=1, train_loss=0.00254, train_accuracy=1.000, val_loss=0.191, val_accuracy=0.942]
Epoch 169:  90%|█████████ | 230/255 [01:24<00:09,  2.71it/s, loss=0.0472, v_num=1, train_loss=0.00254, train_accuracy=1.000, val_loss=0.191, val_accuracy=0.942]
Epoch 169:  91%|█████████ | 232/255 [01:25<00:08,  2.73it/s, loss=0.0472, v_num=1, train_loss=0.00254, train_accuracy=1.000, val_loss=0.191, val_accuracy=0.942]
Epoch 169:  92%|█████████▏| 234/255 [01:25<00:07,  2.74it/s, loss=0.0472, v_num=1, train_loss=0.00254, train_accuracy=1.000, val_loss=0.191, val_accuracy=0.942]
Epoch 169:  93%|█████████▎| 236/255 [01:25<00:06,  2.76it/s, loss=0.0472, v_num=1, train_loss=0.00254, trai

Epoch 169, global step 38419: val_loss was not in top 2


Epoch 170:  89%|████████▊ | 226/255 [01:22<00:10,  2.75it/s, loss=0.0502, v_num=1, train_loss=0.0209, train_accuracy=1.000, val_loss=0.240, val_accuracy=0.938] 
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/29 [00:00<?, ?it/s][A
Epoch 170:  89%|████████▉ | 228/255 [01:24<00:09,  2.70it/s, loss=0.0502, v_num=1, train_loss=0.0209, train_accuracy=1.000, val_loss=0.240, val_accuracy=0.938]
Epoch 170:  90%|█████████ | 230/255 [01:24<00:09,  2.72it/s, loss=0.0502, v_num=1, train_loss=0.0209, train_accuracy=1.000, val_loss=0.240, val_accuracy=0.938]
Epoch 170:  91%|█████████ | 232/255 [01:24<00:08,  2.74it/s, loss=0.0502, v_num=1, train_loss=0.0209, train_accuracy=1.000, val_loss=0.240, val_accuracy=0.938]
Epoch 170:  92%|█████████▏| 234/255 [01:24<00:07,  2.76it/s, loss=0.0502, v_num=1, train_loss=0.0209, train_accuracy=1.000, val_loss=0.240, val_accuracy=0.938]
Epoch 170:  93%|█████████▎| 236/255 [01:25<00:06,  2.77it/s, loss=0.0502, v_num=1, train_loss=0.0209, train_acc

Epoch 170, global step 38645: val_loss was not in top 2


Epoch 171:  89%|████████▊ | 226/255 [01:22<00:10,  2.75it/s, loss=0.0706, v_num=1, train_loss=0.0346, train_accuracy=1.000, val_loss=0.175, val_accuracy=0.948] 
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/29 [00:00<?, ?it/s][A
Epoch 171:  89%|████████▉ | 228/255 [01:24<00:10,  2.70it/s, loss=0.0706, v_num=1, train_loss=0.0346, train_accuracy=1.000, val_loss=0.175, val_accuracy=0.948]
Epoch 171:  90%|█████████ | 230/255 [01:24<00:09,  2.72it/s, loss=0.0706, v_num=1, train_loss=0.0346, train_accuracy=1.000, val_loss=0.175, val_accuracy=0.948]
Epoch 171:  91%|█████████ | 232/255 [01:24<00:08,  2.73it/s, loss=0.0706, v_num=1, train_loss=0.0346, train_accuracy=1.000, val_loss=0.175, val_accuracy=0.948]
Epoch 171:  92%|█████████▏| 234/255 [01:25<00:07,  2.75it/s, loss=0.0706, v_num=1, train_loss=0.0346, train_accuracy=1.000, val_loss=0.175, val_accuracy=0.948]
Epoch 171:  93%|█████████▎| 236/255 [01:25<00:06,  2.77it/s, loss=0.0706, v_num=1, train_loss=0.0346, train_acc

Epoch 171, global step 38871: val_loss reached 0.16827 (best 0.16747), saving model to "/Workspace/code/checkpoints/best-checkpoint-corr-random-20-v6.ckpt" as top 2


Epoch 172:  89%|████████▊ | 226/255 [01:22<00:10,  2.75it/s, loss=0.0478, v_num=1, train_loss=0.00789, train_accuracy=1.000, val_loss=0.168, val_accuracy=0.941]
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/29 [00:00<?, ?it/s][A
Epoch 172:  89%|████████▉ | 228/255 [01:24<00:10,  2.70it/s, loss=0.0478, v_num=1, train_loss=0.00789, train_accuracy=1.000, val_loss=0.168, val_accuracy=0.941]
Epoch 172:  90%|█████████ | 230/255 [01:24<00:09,  2.72it/s, loss=0.0478, v_num=1, train_loss=0.00789, train_accuracy=1.000, val_loss=0.168, val_accuracy=0.941]
Epoch 172:  91%|█████████ | 232/255 [01:24<00:08,  2.74it/s, loss=0.0478, v_num=1, train_loss=0.00789, train_accuracy=1.000, val_loss=0.168, val_accuracy=0.941]
Epoch 172:  92%|█████████▏| 234/255 [01:25<00:07,  2.75it/s, loss=0.0478, v_num=1, train_loss=0.00789, train_accuracy=1.000, val_loss=0.168, val_accuracy=0.941]
Epoch 172:  93%|█████████▎| 236/255 [01:25<00:06,  2.77it/s, loss=0.0478, v_num=1, train_loss=0.00789, trai

Epoch 172, global step 39097: val_loss was not in top 2


Epoch 173:  89%|████████▊ | 226/255 [01:22<00:10,  2.74it/s, loss=0.0693, v_num=1, train_loss=0.191, train_accuracy=0.889, val_loss=0.195, val_accuracy=0.947]  
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/29 [00:00<?, ?it/s][A
Epoch 173:  89%|████████▉ | 228/255 [01:24<00:10,  2.69it/s, loss=0.0693, v_num=1, train_loss=0.191, train_accuracy=0.889, val_loss=0.195, val_accuracy=0.947]
Epoch 173:  90%|█████████ | 230/255 [01:24<00:09,  2.71it/s, loss=0.0693, v_num=1, train_loss=0.191, train_accuracy=0.889, val_loss=0.195, val_accuracy=0.947]
Epoch 173:  91%|█████████ | 232/255 [01:25<00:08,  2.73it/s, loss=0.0693, v_num=1, train_loss=0.191, train_accuracy=0.889, val_loss=0.195, val_accuracy=0.947]
Epoch 173:  92%|█████████▏| 234/255 [01:25<00:07,  2.74it/s, loss=0.0693, v_num=1, train_loss=0.191, train_accuracy=0.889, val_loss=0.195, val_accuracy=0.947]
Epoch 173:  93%|█████████▎| 236/255 [01:25<00:06,  2.76it/s, loss=0.0693, v_num=1, train_loss=0.191, train_accuracy

Epoch 173, global step 39323: val_loss was not in top 2


Epoch 174:  89%|████████▊ | 226/255 [01:22<00:10,  2.75it/s, loss=0.0815, v_num=1, train_loss=0.0435, train_accuracy=1.000, val_loss=0.231, val_accuracy=0.930] 
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/29 [00:00<?, ?it/s][A
Epoch 174:  89%|████████▉ | 228/255 [01:24<00:10,  2.70it/s, loss=0.0815, v_num=1, train_loss=0.0435, train_accuracy=1.000, val_loss=0.231, val_accuracy=0.930]
Epoch 174:  90%|█████████ | 230/255 [01:24<00:09,  2.72it/s, loss=0.0815, v_num=1, train_loss=0.0435, train_accuracy=1.000, val_loss=0.231, val_accuracy=0.930]
Epoch 174:  91%|█████████ | 232/255 [01:24<00:08,  2.73it/s, loss=0.0815, v_num=1, train_loss=0.0435, train_accuracy=1.000, val_loss=0.231, val_accuracy=0.930]
Epoch 174:  92%|█████████▏| 234/255 [01:25<00:07,  2.75it/s, loss=0.0815, v_num=1, train_loss=0.0435, train_accuracy=1.000, val_loss=0.231, val_accuracy=0.930]
Epoch 174:  93%|█████████▎| 236/255 [01:25<00:06,  2.77it/s, loss=0.0815, v_num=1, train_loss=0.0435, train_acc

Epoch 174, global step 39549: val_loss was not in top 2


Epoch 175:  89%|████████▊ | 226/255 [01:22<00:10,  2.75it/s, loss=0.0387, v_num=1, train_loss=0.0127, train_accuracy=1.000, val_loss=0.174, val_accuracy=0.944] 
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/29 [00:00<?, ?it/s][A
Epoch 175:  89%|████████▉ | 228/255 [01:24<00:10,  2.69it/s, loss=0.0387, v_num=1, train_loss=0.0127, train_accuracy=1.000, val_loss=0.174, val_accuracy=0.944]
Epoch 175:  90%|█████████ | 230/255 [01:24<00:09,  2.71it/s, loss=0.0387, v_num=1, train_loss=0.0127, train_accuracy=1.000, val_loss=0.174, val_accuracy=0.944]
Epoch 175:  91%|█████████ | 232/255 [01:24<00:08,  2.73it/s, loss=0.0387, v_num=1, train_loss=0.0127, train_accuracy=1.000, val_loss=0.174, val_accuracy=0.944]
Epoch 175:  92%|█████████▏| 234/255 [01:25<00:07,  2.75it/s, loss=0.0387, v_num=1, train_loss=0.0127, train_accuracy=1.000, val_loss=0.174, val_accuracy=0.944]
Epoch 175:  93%|█████████▎| 236/255 [01:25<00:06,  2.77it/s, loss=0.0387, v_num=1, train_loss=0.0127, train_acc

Epoch 175, global step 39775: val_loss was not in top 2


Epoch 176:  89%|████████▊ | 226/255 [01:22<00:10,  2.74it/s, loss=0.0453, v_num=1, train_loss=0.0062, train_accuracy=1.000, val_loss=0.188, val_accuracy=0.947] 
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/29 [00:00<?, ?it/s][A
Epoch 176:  89%|████████▉ | 228/255 [01:24<00:10,  2.69it/s, loss=0.0453, v_num=1, train_loss=0.0062, train_accuracy=1.000, val_loss=0.188, val_accuracy=0.947]
Epoch 176:  90%|█████████ | 230/255 [01:25<00:09,  2.71it/s, loss=0.0453, v_num=1, train_loss=0.0062, train_accuracy=1.000, val_loss=0.188, val_accuracy=0.947]
Epoch 176:  91%|█████████ | 232/255 [01:25<00:08,  2.72it/s, loss=0.0453, v_num=1, train_loss=0.0062, train_accuracy=1.000, val_loss=0.188, val_accuracy=0.947]
Epoch 176:  92%|█████████▏| 234/255 [01:25<00:07,  2.74it/s, loss=0.0453, v_num=1, train_loss=0.0062, train_accuracy=1.000, val_loss=0.188, val_accuracy=0.947]
Epoch 176:  93%|█████████▎| 236/255 [01:25<00:06,  2.76it/s, loss=0.0453, v_num=1, train_loss=0.0062, train_acc

Epoch 176, global step 40001: val_loss reached 0.16676 (best 0.16676), saving model to "/Workspace/code/checkpoints/best-checkpoint-corr-random-20-v6.ckpt" as top 2


Epoch 177:  89%|████████▊ | 226/255 [01:22<00:10,  2.74it/s, loss=0.0368, v_num=1, train_loss=0.0887, train_accuracy=0.944, val_loss=0.167, val_accuracy=0.947] 
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/29 [00:00<?, ?it/s][A
Epoch 177:  89%|████████▉ | 228/255 [01:24<00:10,  2.68it/s, loss=0.0368, v_num=1, train_loss=0.0887, train_accuracy=0.944, val_loss=0.167, val_accuracy=0.947]
Epoch 177:  90%|█████████ | 230/255 [01:25<00:09,  2.70it/s, loss=0.0368, v_num=1, train_loss=0.0887, train_accuracy=0.944, val_loss=0.167, val_accuracy=0.947]
Epoch 177:  91%|█████████ | 232/255 [01:25<00:08,  2.72it/s, loss=0.0368, v_num=1, train_loss=0.0887, train_accuracy=0.944, val_loss=0.167, val_accuracy=0.947]
Epoch 177:  92%|█████████▏| 234/255 [01:25<00:07,  2.74it/s, loss=0.0368, v_num=1, train_loss=0.0887, train_accuracy=0.944, val_loss=0.167, val_accuracy=0.947]
Epoch 177:  93%|█████████▎| 236/255 [01:25<00:06,  2.76it/s, loss=0.0368, v_num=1, train_loss=0.0887, train_acc

Epoch 177, global step 40227: val_loss reached 0.16703 (best 0.16676), saving model to "/Workspace/code/checkpoints/best-checkpoint-corr-random-20-v5.ckpt" as top 2


Epoch 178:  89%|████████▊ | 226/255 [01:22<00:10,  2.74it/s, loss=0.0391, v_num=1, train_loss=0.0107, train_accuracy=1.000, val_loss=0.167, val_accuracy=0.950] 
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/29 [00:00<?, ?it/s][A
Epoch 178:  89%|████████▉ | 228/255 [01:24<00:10,  2.68it/s, loss=0.0391, v_num=1, train_loss=0.0107, train_accuracy=1.000, val_loss=0.167, val_accuracy=0.950]
Epoch 178:  90%|█████████ | 230/255 [01:25<00:09,  2.70it/s, loss=0.0391, v_num=1, train_loss=0.0107, train_accuracy=1.000, val_loss=0.167, val_accuracy=0.950]
Epoch 178:  91%|█████████ | 232/255 [01:25<00:08,  2.72it/s, loss=0.0391, v_num=1, train_loss=0.0107, train_accuracy=1.000, val_loss=0.167, val_accuracy=0.950]
Epoch 178:  92%|█████████▏| 234/255 [01:25<00:07,  2.74it/s, loss=0.0391, v_num=1, train_loss=0.0107, train_accuracy=1.000, val_loss=0.167, val_accuracy=0.950]
Epoch 178:  93%|█████████▎| 236/255 [01:25<00:06,  2.76it/s, loss=0.0391, v_num=1, train_loss=0.0107, train_acc

Epoch 178, global step 40453: val_loss was not in top 2


Epoch 179:  89%|████████▊ | 226/255 [01:22<00:10,  2.73it/s, loss=0.0969, v_num=1, train_loss=0.220, train_accuracy=0.889, val_loss=0.175, val_accuracy=0.951]  
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/29 [00:00<?, ?it/s][A
Epoch 179:  89%|████████▉ | 228/255 [01:25<00:10,  2.67it/s, loss=0.0969, v_num=1, train_loss=0.220, train_accuracy=0.889, val_loss=0.175, val_accuracy=0.951]
Epoch 179:  90%|█████████ | 230/255 [01:25<00:09,  2.69it/s, loss=0.0969, v_num=1, train_loss=0.220, train_accuracy=0.889, val_loss=0.175, val_accuracy=0.951]
Epoch 179:  91%|█████████ | 232/255 [01:25<00:08,  2.71it/s, loss=0.0969, v_num=1, train_loss=0.220, train_accuracy=0.889, val_loss=0.175, val_accuracy=0.951]
Epoch 179:  92%|█████████▏| 234/255 [01:25<00:07,  2.73it/s, loss=0.0969, v_num=1, train_loss=0.220, train_accuracy=0.889, val_loss=0.175, val_accuracy=0.951]
Epoch 179:  93%|█████████▎| 236/255 [01:25<00:06,  2.75it/s, loss=0.0969, v_num=1, train_loss=0.220, train_accuracy

Epoch 179, global step 40679: val_loss was not in top 2


Epoch 180:  89%|████████▊ | 226/255 [01:22<00:10,  2.73it/s, loss=0.0534, v_num=1, train_loss=0.0106, train_accuracy=1.000, val_loss=0.210, val_accuracy=0.939] 
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/29 [00:00<?, ?it/s][A
Epoch 180:  89%|████████▉ | 228/255 [01:25<00:10,  2.68it/s, loss=0.0534, v_num=1, train_loss=0.0106, train_accuracy=1.000, val_loss=0.210, val_accuracy=0.939]
Validating:   7%|▋         | 2/29 [00:02<00:28,  1.05s/it][A
Epoch 180:  90%|█████████ | 230/255 [01:25<00:09,  2.69it/s, loss=0.0534, v_num=1, train_loss=0.0106, train_accuracy=1.000, val_loss=0.210, val_accuracy=0.939]
Epoch 180:  91%|█████████ | 232/255 [01:25<00:08,  2.71it/s, loss=0.0534, v_num=1, train_loss=0.0106, train_accuracy=1.000, val_loss=0.210, val_accuracy=0.939]
Epoch 180:  92%|█████████▏| 234/255 [01:25<00:07,  2.73it/s, loss=0.0534, v_num=1, train_loss=0.0106, train_accuracy=1.000, val_loss=0.210, val_accuracy=0.939]
Epoch 180:  93%|█████████▎| 236/255 [01:25<00:06,

Epoch 180, global step 40905: val_loss was not in top 2


Epoch 181:  89%|████████▊ | 226/255 [01:22<00:10,  2.74it/s, loss=0.049, v_num=1, train_loss=0.0371, train_accuracy=1.000, val_loss=0.177, val_accuracy=0.945]  
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/29 [00:00<?, ?it/s][A
Epoch 181:  89%|████████▉ | 228/255 [01:25<00:10,  2.68it/s, loss=0.049, v_num=1, train_loss=0.0371, train_accuracy=1.000, val_loss=0.177, val_accuracy=0.945]
Epoch 181:  90%|█████████ | 230/255 [01:25<00:09,  2.70it/s, loss=0.049, v_num=1, train_loss=0.0371, train_accuracy=1.000, val_loss=0.177, val_accuracy=0.945]
Epoch 181:  91%|█████████ | 232/255 [01:25<00:08,  2.72it/s, loss=0.049, v_num=1, train_loss=0.0371, train_accuracy=1.000, val_loss=0.177, val_accuracy=0.945]
Epoch 181:  92%|█████████▏| 234/255 [01:25<00:07,  2.74it/s, loss=0.049, v_num=1, train_loss=0.0371, train_accuracy=1.000, val_loss=0.177, val_accuracy=0.945]
Epoch 181:  93%|█████████▎| 236/255 [01:25<00:06,  2.75it/s, loss=0.049, v_num=1, train_loss=0.0371, train_accuracy

Epoch 181, global step 41131: val_loss was not in top 2


Epoch 182:  89%|████████▊ | 226/255 [01:22<00:10,  2.75it/s, loss=0.0431, v_num=1, train_loss=0.223, train_accuracy=0.944, val_loss=0.200, val_accuracy=0.945]  
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/29 [00:00<?, ?it/s][A
Epoch 182:  89%|████████▉ | 228/255 [01:24<00:10,  2.70it/s, loss=0.0431, v_num=1, train_loss=0.223, train_accuracy=0.944, val_loss=0.200, val_accuracy=0.945]
Epoch 182:  90%|█████████ | 230/255 [01:24<00:09,  2.72it/s, loss=0.0431, v_num=1, train_loss=0.223, train_accuracy=0.944, val_loss=0.200, val_accuracy=0.945]
Epoch 182:  91%|█████████ | 232/255 [01:24<00:08,  2.73it/s, loss=0.0431, v_num=1, train_loss=0.223, train_accuracy=0.944, val_loss=0.200, val_accuracy=0.945]
Epoch 182:  92%|█████████▏| 234/255 [01:25<00:07,  2.75it/s, loss=0.0431, v_num=1, train_loss=0.223, train_accuracy=0.944, val_loss=0.200, val_accuracy=0.945]
Epoch 182:  93%|█████████▎| 236/255 [01:25<00:06,  2.77it/s, loss=0.0431, v_num=1, train_loss=0.223, train_accuracy

Epoch 182, global step 41357: val_loss was not in top 2


Epoch 183:  89%|████████▊ | 226/255 [01:22<00:10,  2.76it/s, loss=0.0782, v_num=1, train_loss=0.270, train_accuracy=0.944, val_loss=0.203, val_accuracy=0.951]  
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/29 [00:00<?, ?it/s][A
Epoch 183:  89%|████████▉ | 228/255 [01:24<00:10,  2.70it/s, loss=0.0782, v_num=1, train_loss=0.270, train_accuracy=0.944, val_loss=0.203, val_accuracy=0.951]
Epoch 183:  90%|█████████ | 230/255 [01:24<00:09,  2.72it/s, loss=0.0782, v_num=1, train_loss=0.270, train_accuracy=0.944, val_loss=0.203, val_accuracy=0.951]
Epoch 183:  91%|█████████ | 232/255 [01:24<00:08,  2.73it/s, loss=0.0782, v_num=1, train_loss=0.270, train_accuracy=0.944, val_loss=0.203, val_accuracy=0.951]
Epoch 183:  92%|█████████▏| 234/255 [01:25<00:07,  2.75it/s, loss=0.0782, v_num=1, train_loss=0.270, train_accuracy=0.944, val_loss=0.203, val_accuracy=0.951]
Epoch 183:  93%|█████████▎| 236/255 [01:25<00:06,  2.77it/s, loss=0.0782, v_num=1, train_loss=0.270, train_accuracy

Epoch 183, global step 41583: val_loss was not in top 2


Epoch 184:  89%|████████▊ | 226/255 [01:21<00:10,  2.76it/s, loss=0.0371, v_num=1, train_loss=0.0261, train_accuracy=1.000, val_loss=0.172, val_accuracy=0.945] 
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/29 [00:00<?, ?it/s][A
Epoch 184:  89%|████████▉ | 228/255 [01:24<00:09,  2.71it/s, loss=0.0371, v_num=1, train_loss=0.0261, train_accuracy=1.000, val_loss=0.172, val_accuracy=0.945]
Epoch 184:  90%|█████████ | 230/255 [01:24<00:09,  2.72it/s, loss=0.0371, v_num=1, train_loss=0.0261, train_accuracy=1.000, val_loss=0.172, val_accuracy=0.945]
Epoch 184:  91%|█████████ | 232/255 [01:24<00:08,  2.74it/s, loss=0.0371, v_num=1, train_loss=0.0261, train_accuracy=1.000, val_loss=0.172, val_accuracy=0.945]
Epoch 184:  92%|█████████▏| 234/255 [01:24<00:07,  2.76it/s, loss=0.0371, v_num=1, train_loss=0.0261, train_accuracy=1.000, val_loss=0.172, val_accuracy=0.945]
Epoch 184:  93%|█████████▎| 236/255 [01:24<00:06,  2.78it/s, loss=0.0371, v_num=1, train_loss=0.0261, train_acc

Epoch 184, global step 41809: val_loss reached 0.14425 (best 0.14425), saving model to "/Workspace/code/checkpoints/best-checkpoint-corr-random-20-v5.ckpt" as top 2


Epoch 185:  89%|████████▊ | 226/255 [01:22<00:10,  2.75it/s, loss=0.0318, v_num=1, train_loss=0.026, train_accuracy=1.000, val_loss=0.144, val_accuracy=0.955]  
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/29 [00:00<?, ?it/s][A
Epoch 185:  89%|████████▉ | 228/255 [01:24<00:09,  2.70it/s, loss=0.0318, v_num=1, train_loss=0.026, train_accuracy=1.000, val_loss=0.144, val_accuracy=0.955]
Epoch 185:  90%|█████████ | 230/255 [01:24<00:09,  2.72it/s, loss=0.0318, v_num=1, train_loss=0.026, train_accuracy=1.000, val_loss=0.144, val_accuracy=0.955]
Epoch 185:  91%|█████████ | 232/255 [01:24<00:08,  2.74it/s, loss=0.0318, v_num=1, train_loss=0.026, train_accuracy=1.000, val_loss=0.144, val_accuracy=0.955]
Epoch 185:  92%|█████████▏| 234/255 [01:24<00:07,  2.76it/s, loss=0.0318, v_num=1, train_loss=0.026, train_accuracy=1.000, val_loss=0.144, val_accuracy=0.955]
Epoch 185:  93%|█████████▎| 236/255 [01:25<00:06,  2.77it/s, loss=0.0318, v_num=1, train_loss=0.026, train_accuracy

Epoch 185, global step 42035: val_loss reached 0.14876 (best 0.14425), saving model to "/Workspace/code/checkpoints/best-checkpoint-corr-random-20-v6.ckpt" as top 2


Epoch 186:  89%|████████▊ | 226/255 [01:21<00:10,  2.76it/s, loss=0.0493, v_num=1, train_loss=0.112, train_accuracy=0.944, val_loss=0.149, val_accuracy=0.952]  
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/29 [00:00<?, ?it/s][A
Epoch 186:  89%|████████▉ | 228/255 [01:24<00:10,  2.70it/s, loss=0.0493, v_num=1, train_loss=0.112, train_accuracy=0.944, val_loss=0.149, val_accuracy=0.952]
Epoch 186:  90%|█████████ | 230/255 [01:24<00:09,  2.72it/s, loss=0.0493, v_num=1, train_loss=0.112, train_accuracy=0.944, val_loss=0.149, val_accuracy=0.952]
Epoch 186:  91%|█████████ | 232/255 [01:24<00:08,  2.73it/s, loss=0.0493, v_num=1, train_loss=0.112, train_accuracy=0.944, val_loss=0.149, val_accuracy=0.952]
Epoch 186:  92%|█████████▏| 234/255 [01:25<00:07,  2.75it/s, loss=0.0493, v_num=1, train_loss=0.112, train_accuracy=0.944, val_loss=0.149, val_accuracy=0.952]
Epoch 186:  93%|█████████▎| 236/255 [01:25<00:06,  2.77it/s, loss=0.0493, v_num=1, train_loss=0.112, train_accuracy

Epoch 186, global step 42261: val_loss reached 0.14741 (best 0.14425), saving model to "/Workspace/code/checkpoints/best-checkpoint-corr-random-20-v6.ckpt" as top 2


Epoch 187:  89%|████████▊ | 226/255 [01:22<00:10,  2.75it/s, loss=0.0448, v_num=1, train_loss=0.0132, train_accuracy=1.000, val_loss=0.147, val_accuracy=0.955] 
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/29 [00:00<?, ?it/s][A
Epoch 187:  89%|████████▉ | 228/255 [01:24<00:09,  2.70it/s, loss=0.0448, v_num=1, train_loss=0.0132, train_accuracy=1.000, val_loss=0.147, val_accuracy=0.955]
Epoch 187:  90%|█████████ | 230/255 [01:24<00:09,  2.72it/s, loss=0.0448, v_num=1, train_loss=0.0132, train_accuracy=1.000, val_loss=0.147, val_accuracy=0.955]
Epoch 187:  91%|█████████ | 232/255 [01:24<00:08,  2.74it/s, loss=0.0448, v_num=1, train_loss=0.0132, train_accuracy=1.000, val_loss=0.147, val_accuracy=0.955]
Epoch 187:  92%|█████████▏| 234/255 [01:24<00:07,  2.76it/s, loss=0.0448, v_num=1, train_loss=0.0132, train_accuracy=1.000, val_loss=0.147, val_accuracy=0.955]
Epoch 187:  93%|█████████▎| 236/255 [01:25<00:06,  2.77it/s, loss=0.0448, v_num=1, train_loss=0.0132, train_acc

Epoch 187, global step 42487: val_loss was not in top 2


Epoch 188:  89%|████████▊ | 226/255 [01:22<00:10,  2.75it/s, loss=0.0981, v_num=1, train_loss=0.0334, train_accuracy=1.000, val_loss=0.171, val_accuracy=0.956] 
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/29 [00:00<?, ?it/s][A
Epoch 188:  89%|████████▉ | 228/255 [01:24<00:10,  2.70it/s, loss=0.0981, v_num=1, train_loss=0.0334, train_accuracy=1.000, val_loss=0.171, val_accuracy=0.956]
Epoch 188:  90%|█████████ | 230/255 [01:24<00:09,  2.72it/s, loss=0.0981, v_num=1, train_loss=0.0334, train_accuracy=1.000, val_loss=0.171, val_accuracy=0.956]
Epoch 188:  91%|█████████ | 232/255 [01:24<00:08,  2.73it/s, loss=0.0981, v_num=1, train_loss=0.0334, train_accuracy=1.000, val_loss=0.171, val_accuracy=0.956]
Epoch 188:  92%|█████████▏| 234/255 [01:25<00:07,  2.75it/s, loss=0.0981, v_num=1, train_loss=0.0334, train_accuracy=1.000, val_loss=0.171, val_accuracy=0.956]
Epoch 188:  93%|█████████▎| 236/255 [01:25<00:06,  2.77it/s, loss=0.0981, v_num=1, train_loss=0.0334, train_acc

Epoch 188, global step 42713: val_loss was not in top 2


Epoch 189:  89%|████████▊ | 226/255 [01:21<00:10,  2.76it/s, loss=0.0488, v_num=1, train_loss=0.00499, train_accuracy=1.000, val_loss=0.243, val_accuracy=0.929]
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/29 [00:00<?, ?it/s][A
Epoch 189:  89%|████████▉ | 228/255 [01:24<00:09,  2.70it/s, loss=0.0488, v_num=1, train_loss=0.00499, train_accuracy=1.000, val_loss=0.243, val_accuracy=0.929]
Epoch 189:  90%|█████████ | 230/255 [01:24<00:09,  2.72it/s, loss=0.0488, v_num=1, train_loss=0.00499, train_accuracy=1.000, val_loss=0.243, val_accuracy=0.929]
Epoch 189:  91%|█████████ | 232/255 [01:24<00:08,  2.74it/s, loss=0.0488, v_num=1, train_loss=0.00499, train_accuracy=1.000, val_loss=0.243, val_accuracy=0.929]
Epoch 189:  92%|█████████▏| 234/255 [01:24<00:07,  2.76it/s, loss=0.0488, v_num=1, train_loss=0.00499, train_accuracy=1.000, val_loss=0.243, val_accuracy=0.929]
Epoch 189:  93%|█████████▎| 236/255 [01:25<00:06,  2.77it/s, loss=0.0488, v_num=1, train_loss=0.00499, trai

Epoch 189, global step 42939: val_loss was not in top 2


Epoch 190:  89%|████████▊ | 226/255 [01:22<00:10,  2.75it/s, loss=0.0696, v_num=1, train_loss=0.018, train_accuracy=1.000, val_loss=0.174, val_accuracy=0.954]  
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/29 [00:00<?, ?it/s][A
Epoch 190:  89%|████████▉ | 228/255 [01:24<00:09,  2.70it/s, loss=0.0696, v_num=1, train_loss=0.018, train_accuracy=1.000, val_loss=0.174, val_accuracy=0.954]
Epoch 190:  90%|█████████ | 230/255 [01:24<00:09,  2.72it/s, loss=0.0696, v_num=1, train_loss=0.018, train_accuracy=1.000, val_loss=0.174, val_accuracy=0.954]
Epoch 190:  91%|█████████ | 232/255 [01:24<00:08,  2.74it/s, loss=0.0696, v_num=1, train_loss=0.018, train_accuracy=1.000, val_loss=0.174, val_accuracy=0.954]
Epoch 190:  92%|█████████▏| 234/255 [01:24<00:07,  2.75it/s, loss=0.0696, v_num=1, train_loss=0.018, train_accuracy=1.000, val_loss=0.174, val_accuracy=0.954]
Epoch 190:  93%|█████████▎| 236/255 [01:25<00:06,  2.77it/s, loss=0.0696, v_num=1, train_loss=0.018, train_accuracy

Epoch 190, global step 43165: val_loss was not in top 2


Epoch 191:  89%|████████▊ | 226/255 [01:22<00:10,  2.75it/s, loss=0.058, v_num=1, train_loss=0.0177, train_accuracy=1.000, val_loss=0.227, val_accuracy=0.940]  
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/29 [00:00<?, ?it/s][A
Epoch 191:  89%|████████▉ | 228/255 [01:24<00:09,  2.70it/s, loss=0.058, v_num=1, train_loss=0.0177, train_accuracy=1.000, val_loss=0.227, val_accuracy=0.940]
Epoch 191:  90%|█████████ | 230/255 [01:24<00:09,  2.72it/s, loss=0.058, v_num=1, train_loss=0.0177, train_accuracy=1.000, val_loss=0.227, val_accuracy=0.940]
Epoch 191:  91%|█████████ | 232/255 [01:24<00:08,  2.74it/s, loss=0.058, v_num=1, train_loss=0.0177, train_accuracy=1.000, val_loss=0.227, val_accuracy=0.940]
Epoch 191:  92%|█████████▏| 234/255 [01:24<00:07,  2.76it/s, loss=0.058, v_num=1, train_loss=0.0177, train_accuracy=1.000, val_loss=0.227, val_accuracy=0.940]
Epoch 191:  93%|█████████▎| 236/255 [01:25<00:06,  2.77it/s, loss=0.058, v_num=1, train_loss=0.0177, train_accuracy

Epoch 191, global step 43391: val_loss was not in top 2


Epoch 192:  89%|████████▊ | 226/255 [01:22<00:10,  2.75it/s, loss=0.0448, v_num=1, train_loss=0.0692, train_accuracy=0.944, val_loss=0.203, val_accuracy=0.941] 
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/29 [00:00<?, ?it/s][A
Epoch 192:  89%|████████▉ | 228/255 [01:24<00:10,  2.69it/s, loss=0.0448, v_num=1, train_loss=0.0692, train_accuracy=0.944, val_loss=0.203, val_accuracy=0.941]
Epoch 192:  90%|█████████ | 230/255 [01:24<00:09,  2.71it/s, loss=0.0448, v_num=1, train_loss=0.0692, train_accuracy=0.944, val_loss=0.203, val_accuracy=0.941]
Epoch 192:  91%|█████████ | 232/255 [01:24<00:08,  2.73it/s, loss=0.0448, v_num=1, train_loss=0.0692, train_accuracy=0.944, val_loss=0.203, val_accuracy=0.941]
Epoch 192:  92%|█████████▏| 234/255 [01:25<00:07,  2.75it/s, loss=0.0448, v_num=1, train_loss=0.0692, train_accuracy=0.944, val_loss=0.203, val_accuracy=0.941]
Epoch 192:  93%|█████████▎| 236/255 [01:25<00:06,  2.77it/s, loss=0.0448, v_num=1, train_loss=0.0692, train_acc

Epoch 192, global step 43617: val_loss was not in top 2


Epoch 193:  89%|████████▊ | 226/255 [01:22<00:10,  2.75it/s, loss=0.0365, v_num=1, train_loss=0.0934, train_accuracy=0.944, val_loss=0.184, val_accuracy=0.945] 
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/29 [00:00<?, ?it/s][A
Epoch 193:  89%|████████▉ | 228/255 [01:24<00:10,  2.70it/s, loss=0.0365, v_num=1, train_loss=0.0934, train_accuracy=0.944, val_loss=0.184, val_accuracy=0.945]
Epoch 193:  90%|█████████ | 230/255 [01:24<00:09,  2.72it/s, loss=0.0365, v_num=1, train_loss=0.0934, train_accuracy=0.944, val_loss=0.184, val_accuracy=0.945]
Epoch 193:  91%|█████████ | 232/255 [01:24<00:08,  2.74it/s, loss=0.0365, v_num=1, train_loss=0.0934, train_accuracy=0.944, val_loss=0.184, val_accuracy=0.945]
Epoch 193:  92%|█████████▏| 234/255 [01:24<00:07,  2.75it/s, loss=0.0365, v_num=1, train_loss=0.0934, train_accuracy=0.944, val_loss=0.184, val_accuracy=0.945]
Epoch 193:  93%|█████████▎| 236/255 [01:25<00:06,  2.77it/s, loss=0.0365, v_num=1, train_loss=0.0934, train_acc

Epoch 193, global step 43843: val_loss was not in top 2


Epoch 194:  89%|████████▊ | 226/255 [01:22<00:10,  2.75it/s, loss=0.042, v_num=1, train_loss=0.00631, train_accuracy=1.000, val_loss=0.172, val_accuracy=0.952]  
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/29 [00:00<?, ?it/s][A
Epoch 194:  89%|████████▉ | 228/255 [01:24<00:09,  2.70it/s, loss=0.042, v_num=1, train_loss=0.00631, train_accuracy=1.000, val_loss=0.172, val_accuracy=0.952]
Epoch 194:  90%|█████████ | 230/255 [01:24<00:09,  2.72it/s, loss=0.042, v_num=1, train_loss=0.00631, train_accuracy=1.000, val_loss=0.172, val_accuracy=0.952]
Epoch 194:  91%|█████████ | 232/255 [01:24<00:08,  2.74it/s, loss=0.042, v_num=1, train_loss=0.00631, train_accuracy=1.000, val_loss=0.172, val_accuracy=0.952]
Epoch 194:  92%|█████████▏| 234/255 [01:24<00:07,  2.76it/s, loss=0.042, v_num=1, train_loss=0.00631, train_accuracy=1.000, val_loss=0.172, val_accuracy=0.952]
Epoch 194:  93%|█████████▎| 236/255 [01:25<00:06,  2.77it/s, loss=0.042, v_num=1, train_loss=0.00631, train_ac

Epoch 194, global step 44069: val_loss was not in top 2


Epoch 195:  89%|████████▊ | 226/255 [01:22<00:10,  2.75it/s, loss=0.0455, v_num=1, train_loss=0.00173, train_accuracy=1.000, val_loss=0.184, val_accuracy=0.941]
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/29 [00:00<?, ?it/s][A
Epoch 195:  89%|████████▉ | 228/255 [01:24<00:09,  2.70it/s, loss=0.0455, v_num=1, train_loss=0.00173, train_accuracy=1.000, val_loss=0.184, val_accuracy=0.941]
Epoch 195:  90%|█████████ | 230/255 [01:24<00:09,  2.72it/s, loss=0.0455, v_num=1, train_loss=0.00173, train_accuracy=1.000, val_loss=0.184, val_accuracy=0.941]
Epoch 195:  91%|█████████ | 232/255 [01:24<00:08,  2.74it/s, loss=0.0455, v_num=1, train_loss=0.00173, train_accuracy=1.000, val_loss=0.184, val_accuracy=0.941]
Epoch 195:  92%|█████████▏| 234/255 [01:24<00:07,  2.75it/s, loss=0.0455, v_num=1, train_loss=0.00173, train_accuracy=1.000, val_loss=0.184, val_accuracy=0.941]
Epoch 195:  93%|█████████▎| 236/255 [01:25<00:06,  2.77it/s, loss=0.0455, v_num=1, train_loss=0.00173, trai

Epoch 195, global step 44295: val_loss was not in top 2


Epoch 196:  89%|████████▊ | 226/255 [01:22<00:10,  2.74it/s, loss=0.556, v_num=1, train_loss=1.140, train_accuracy=0.778, val_loss=0.198, val_accuracy=0.943]   
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/29 [00:00<?, ?it/s][A
Epoch 196:  89%|████████▉ | 228/255 [01:24<00:10,  2.69it/s, loss=0.556, v_num=1, train_loss=1.140, train_accuracy=0.778, val_loss=0.198, val_accuracy=0.943]
Epoch 196:  90%|█████████ | 230/255 [01:24<00:09,  2.71it/s, loss=0.556, v_num=1, train_loss=1.140, train_accuracy=0.778, val_loss=0.198, val_accuracy=0.943]
Epoch 196:  91%|█████████ | 232/255 [01:25<00:08,  2.73it/s, loss=0.556, v_num=1, train_loss=1.140, train_accuracy=0.778, val_loss=0.198, val_accuracy=0.943]
Epoch 196:  92%|█████████▏| 234/255 [01:25<00:07,  2.75it/s, loss=0.556, v_num=1, train_loss=1.140, train_accuracy=0.778, val_loss=0.198, val_accuracy=0.943]
Epoch 196:  93%|█████████▎| 236/255 [01:25<00:06,  2.77it/s, loss=0.556, v_num=1, train_loss=1.140, train_accuracy=0.77

Epoch 196, global step 44521: val_loss was not in top 2


Epoch 197:  89%|████████▊ | 226/255 [01:22<00:10,  2.75it/s, loss=0.186, v_num=1, train_loss=0.110, train_accuracy=0.944, val_loss=0.881, val_accuracy=0.809] 
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/29 [00:00<?, ?it/s][A
Epoch 197:  89%|████████▉ | 228/255 [01:24<00:10,  2.69it/s, loss=0.186, v_num=1, train_loss=0.110, train_accuracy=0.944, val_loss=0.881, val_accuracy=0.809]
Epoch 197:  90%|█████████ | 230/255 [01:24<00:09,  2.71it/s, loss=0.186, v_num=1, train_loss=0.110, train_accuracy=0.944, val_loss=0.881, val_accuracy=0.809]
Epoch 197:  91%|█████████ | 232/255 [01:24<00:08,  2.73it/s, loss=0.186, v_num=1, train_loss=0.110, train_accuracy=0.944, val_loss=0.881, val_accuracy=0.809]
Epoch 197:  92%|█████████▏| 234/255 [01:25<00:07,  2.75it/s, loss=0.186, v_num=1, train_loss=0.110, train_accuracy=0.944, val_loss=0.881, val_accuracy=0.809]
Epoch 197:  93%|█████████▎| 236/255 [01:25<00:06,  2.77it/s, loss=0.186, v_num=1, train_loss=0.110, train_accuracy=0.944,

Epoch 197, global step 44747: val_loss was not in top 2


Epoch 198:  89%|████████▊ | 226/255 [01:22<00:10,  2.75it/s, loss=0.13, v_num=1, train_loss=0.104, train_accuracy=1.000, val_loss=0.266, val_accuracy=0.868]  
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/29 [00:00<?, ?it/s][A
Epoch 198:  89%|████████▉ | 228/255 [01:24<00:10,  2.70it/s, loss=0.13, v_num=1, train_loss=0.104, train_accuracy=1.000, val_loss=0.266, val_accuracy=0.868]
Epoch 198:  90%|█████████ | 230/255 [01:24<00:09,  2.72it/s, loss=0.13, v_num=1, train_loss=0.104, train_accuracy=1.000, val_loss=0.266, val_accuracy=0.868]
Epoch 198:  91%|█████████ | 232/255 [01:24<00:08,  2.73it/s, loss=0.13, v_num=1, train_loss=0.104, train_accuracy=1.000, val_loss=0.266, val_accuracy=0.868]
Epoch 198:  92%|█████████▏| 234/255 [01:25<00:07,  2.75it/s, loss=0.13, v_num=1, train_loss=0.104, train_accuracy=1.000, val_loss=0.266, val_accuracy=0.868]
Epoch 198:  93%|█████████▎| 236/255 [01:25<00:06,  2.77it/s, loss=0.13, v_num=1, train_loss=0.104, train_accuracy=1.000, val_

Epoch 198, global step 44973: val_loss was not in top 2


Epoch 199:  89%|████████▊ | 226/255 [01:22<00:10,  2.75it/s, loss=0.108, v_num=1, train_loss=0.0187, train_accuracy=1.000, val_loss=0.250, val_accuracy=0.902] 
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/29 [00:00<?, ?it/s][A
Epoch 199:  89%|████████▉ | 228/255 [01:24<00:10,  2.70it/s, loss=0.108, v_num=1, train_loss=0.0187, train_accuracy=1.000, val_loss=0.250, val_accuracy=0.902]
Epoch 199:  90%|█████████ | 230/255 [01:24<00:09,  2.72it/s, loss=0.108, v_num=1, train_loss=0.0187, train_accuracy=1.000, val_loss=0.250, val_accuracy=0.902]
Epoch 199:  91%|█████████ | 232/255 [01:24<00:08,  2.73it/s, loss=0.108, v_num=1, train_loss=0.0187, train_accuracy=1.000, val_loss=0.250, val_accuracy=0.902]
Epoch 199:  92%|█████████▏| 234/255 [01:25<00:07,  2.75it/s, loss=0.108, v_num=1, train_loss=0.0187, train_accuracy=1.000, val_loss=0.250, val_accuracy=0.902]
Epoch 199:  93%|█████████▎| 236/255 [01:25<00:06,  2.77it/s, loss=0.108, v_num=1, train_loss=0.0187, train_accuracy=

Epoch 199, global step 45199: val_loss was not in top 2


Epoch 199: 100%|██████████| 255/255 [01:27<00:00,  2.92it/s, loss=0.108, v_num=1, train_loss=0.0187, train_accuracy=1.000, val_loss=0.252, val_accuracy=0.922]


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2]


Testing: 100%|██████████| 29/29 [00:04<00:00,  7.34it/s]--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'test_accuracy': 0.9212207198143005, 'test_loss': 0.22765043377876282}
--------------------------------------------------------------------------------
Testing: 100%|██████████| 29/29 [00:04<00:00,  5.86it/s]


## Load Checkpoint 

In [8]:
# best-checkpoint-corr-3-v1.ckptn
# trainer.fit(model, data_module, ckpt_path="/Workspace/code/checkpoints/best-checkpoint-corr-3-v1.ckpt")