In [None]:
import marimo as mo

# CIFAR Demonstration

This notebook demonstrates how to use the `hierarchicalsoftmax` module to train a neural network on the [CIFAR](https://www.cs.toronto.edu/~kriz/cifar.html) dataset.

First, choose the hyperparameters.

In [None]:
cifar_radio = mo.ui.radio(options=["10","100"], value=mo.cli_args().get("cifar") or "100", label="CIFAR Dataset")
batch_size_input = mo.ui.number(value=mo.cli_args().get("batch") or 32, label="Batch Size")
epochs_input = mo.ui.number(value=mo.cli_args().get("batch") or 10, label="Epochs")
mo.vstack([cifar_radio, epochs_input, batch_size_input])

In [None]:
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

assert cifar_radio.value in ["10","100"]
batch_size = batch_size_input.value
epochs = epochs_input.value
cifar_dataset = datasets.CIFAR10 if cifar_radio.value == "10" else datasets.CIFAR100

# Use the same data augmentation strategies as in https://arxiv.org/pdf/1605.07146v4
transform = transforms.Compose([
    transforms.RandomCrop(32, padding=4, padding_mode="reflect"),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
])

train_data = cifar_dataset(root=".", train=True, download=True, transform=transform)
test_data = cifar_dataset(root=".", train=False, download=True, transform=transforms.ToTensor())

train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=False)

### Plot the first 10 images

In [None]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

num_images = 10

# Create a row of subplots
cifar_fig = make_subplots(
    rows=1, cols=num_images, 
    subplot_titles=[train_data.classes[train_data[i][1]] for i in range(num_images)], 
    horizontal_spacing=0,
)

for i in range(num_images):
    img, label = train_data[i]
    img = img.permute(1, 2, 0).numpy()  # (C, H, W) -> (H, W, C) and convert to numpy

    cifar_fig.add_trace(
        go.Image(z=(img * 255).astype('uint8')),
        row=1, col=i+1
    )

# Update layout: remove axes and tighten spacing
thumbnail_size = 105
cifar_fig.update_layout(
    height=thumbnail_size,  # adjust height as needed
    width=thumbnail_size * num_images,  # 150px per image
    showlegend=False,
    margin=dict(l=0, r=0, t=30, b=0)
)

# Hide axes
for i in range(1, num_images + 1):
    cifar_fig.update_xaxes(visible=False, row=1, col=i)
    cifar_fig.update_yaxes(visible=False, row=1, col=i)

cifar_fig

## Non-hierarchical model

First we create a basic non-hierarchical model as a baseline

In [None]:
import torch
from torch import nn
from torchmetrics import Accuracy
import lightning as L

import torch
import torch.nn as nn
import torch.nn.functional as F

class BasicBlock(nn.Module):
    def __init__(self, in_planes, out_planes, stride):
        super().__init__()
        self.bn1 = nn.BatchNorm2d(in_planes)
        self.conv1 = nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_planes)
        self.conv2 = nn.Conv2d(out_planes, out_planes, kernel_size=3, stride=1, padding=1, bias=False)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != out_planes:
            self.shortcut = nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)

    def forward(self, x):
        out = self.conv1(F.relu(self.bn1(x)))
        out = self.conv2(F.relu(self.bn2(out)))
        out += self.shortcut(x)
        return out


class WideResNetBody(nn.Module):
    def __init__(self, depth=16, width_factor=8):
        super().__init__()
        assert (depth - 4) % 6 == 0, "Depth should be 6n+4"
        n = (depth - 4) // 6

        k = width_factor
        self.in_planes = 16

        # Initial conv
        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1, bias=False)

        # 3 groups
        self.layer1 = self._make_layer(16*k, n, stride=1)
        self.layer2 = self._make_layer(32*k, n, stride=2)
        self.layer3 = self._make_layer(64*k, n, stride=2)

        self.bn = nn.BatchNorm2d(64*k)

    def _make_layer(self, out_planes, blocks, stride):
        strides = [stride] + [1]*(blocks-1)
        layers = []
        for s in strides:
            layers.append(BasicBlock(self.in_planes, out_planes, s))
            self.in_planes = out_planes
        return nn.Sequential(*layers)

    def forward(self, x):
        out = self.conv1(x)
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = F.relu(self.bn(out))
        out = F.avg_pool2d(out, 8)
        out = out.view(out.size(0), -1)
        return out


class BasicImageClassifier(L.LightningModule):
    def __init__(self):
        super().__init__()
        self.model = nn.Sequential(
            WideResNetBody(),
            nn.LazyLinear(out_features=len(train_data.classes))
        )
        self.loss_fn = nn.CrossEntropyLoss()
        self.metrics = [
            Accuracy(task="multiclass", num_classes=len(train_data.classes))
        ]

    def forward(self, x):
        return self.model(x)

    def training_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = self.loss_fn(logits, y)
        self.log('train_loss', loss, prog_bar=True)
        return loss

    def validation_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = self.loss_fn(logits, y)
        self.log('val_loss', loss, prog_bar=True)
        for metric in self.metrics:
            metric = metric.to(logits.device)
            result = metric(logits, y)
            if isinstance(result, dict):
                for name, value in result.items():
                    self.log(f"val_{name}", value, on_step=False, on_epoch=True, prog_bar=True)
            else:
                self.log(f"val_{metric.__class__.__name__}", result, on_step=False, on_epoch=True, prog_bar=True)

        return loss

    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), lr=1e-3)

basic_model = BasicImageClassifier()
basic_model

# Train the basic model

In [None]:
from lightning.pytorch.loggers import CSVLogger

basic_logger = CSVLogger(save_dir="lightning_logs", name="basic_model")
basic_trainer = L.Trainer(max_epochs=epochs, accelerator="auto", enable_checkpointing=False, logger=basic_logger)
basic_trainer.fit(basic_model, train_dataloaders=train_loader, val_dataloaders=test_loader)

GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


W0514 11:31:24.115000 46152 torch/distributed/elastic/multiprocessing/redirects.py:29] NOTE: Redirects are currently not supported in Windows or MacOs.
/Users/rturnbull/MDAP/hierarchicalsoftmax/.venv/lib/python3.13/site-packages/lightning/pytorch/utilities/model_summary/model_summary.py:477: The total number of parameters detected may be inaccurate because the model contains an instance of `UninitializedParameter`. To get an accurate number, set `self.example_input_array` in your LightningModule.

  | Name    | Type             | Params | Mode 
-----------------------------------------------------
0 | model   | Sequential       | 11.0 M | train
1 | loss_fn | CrossEntropyLoss | 0      | train
-----------------------------------------------------
11.0 M    Trainable params
0         Non-trainable params
11.0 M    Total params
43.825    Total estimated model params size (MB)
45        Modules in train mode
0         Modules in eval mode


Sanity Checking: |                                                                                                                                      | 0/? [00:00<?, ?it/s]

/Users/rturnbull/MDAP/hierarchicalsoftmax/.venv/lib/python3.13/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:425: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=13` in the `DataLoader` to improve performance.


Sanity Checking:   0%|                                                                                                                                  | 0/2 [00:00<?, ?it/s]Sanity Checking DataLoader 0:   0%|                                                                                                                     | 0/2 [00:00<?, ?it/s]

Sanity Checking DataLoader 0:  50%|██████████████████████████████████████████████████████▌                                                      | 1/2 [00:00<00:00,  8.37it/s]Sanity Checking DataLoader 0: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 15.99it/s]

                                                                                                                                                                              

/Users/rturnbull/MDAP/hierarchicalsoftmax/.venv/lib/python3.13/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:425: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=13` in the `DataLoader` to improve performance.


Training: |                                                                                                                                             | 0/? [00:00<?, ?it/s]Training:   0%|                                                                                                                                      | 0/1563 [00:00<?, ?it/s]Epoch 0:   0%|                                                                                                                                       | 0/1563 [00:00<?, ?it/s]

Epoch 0:   0%|                                                                                                                               | 1/1563 [00:00<07:30,  3.47it/s]Epoch 0:   0%|                                                                                                   | 1/1563 [00:00<07:30,  3.46it/s, v_num=15, train_loss=4.640]

Epoch 0:   0%|▏                                                                                                  | 2/1563 [00:00<04:00,  6.50it/s, v_num=15, train_loss=4.640]

Epoch 0:   0%|▏                                                                                                  | 2/1563 [00:00<04:36,  5.65it/s, v_num=15, train_loss=4.640]

Epoch 0:   0%|▏                                                                                                  | 3/1563 [00:00<03:13,  8.08it/s, v_num=15, train_loss=4.640]

Epoch 0:   0%|▏                                                                                                  | 3/1563 [00:00<03:37,  7.16it/s, v_num=15, train_loss=4.740]

Epoch 0:   0%|▎                                                                                                  | 4/1563 [00:00<02:50,  9.15it/s, v_num=15, train_loss=4.740]

Epoch 0:   0%|▎                                                                                                  | 4/1563 [00:00<03:09,  8.24it/s, v_num=15, train_loss=4.910]

Epoch 0:   0%|▎                                                                                                  | 5/1563 [00:00<02:37,  9.90it/s, v_num=15, train_loss=4.910]

Epoch 0:   0%|▎                                                                                                  | 5/1563 [00:00<02:51,  9.06it/s, v_num=15, train_loss=4.970]

Epoch 0:   0%|▍                                                                                                  | 6/1563 [00:00<02:28, 10.47it/s, v_num=15, train_loss=4.970]

Epoch 0:   0%|▍                                                                                                  | 6/1563 [00:00<02:40,  9.70it/s, v_num=15, train_loss=4.490]

Epoch 0:   0%|▍                                                                                                  | 7/1563 [00:00<02:22, 10.93it/s, v_num=15, train_loss=4.490]

Epoch 0:   0%|▍                                                                                                  | 7/1563 [00:00<02:32, 10.21it/s, v_num=15, train_loss=4.520]

Epoch 0:   1%|▌                                                                                                  | 8/1563 [00:00<02:17, 11.30it/s, v_num=15, train_loss=4.520]

Epoch 0:   1%|▌                                                                                                  | 8/1563 [00:00<02:26, 10.63it/s, v_num=15, train_loss=4.700]

Epoch 0:   1%|▌                                                                                                  | 9/1563 [00:00<02:13, 11.60it/s, v_num=15, train_loss=4.700]

Epoch 0:   1%|▌                                                                                                  | 9/1563 [00:00<02:21, 10.98it/s, v_num=15, train_loss=4.510]

Epoch 0:   1%|▋                                                                                                 | 10/1563 [00:00<02:10, 11.86it/s, v_num=15, train_loss=4.510]

Epoch 0:   1%|▋                                                                                                 | 10/1563 [00:00<02:17, 11.26it/s, v_num=15, train_loss=4.740]

Epoch 0:   1%|▋                                                                                                 | 11/1563 [00:00<02:08, 12.07it/s, v_num=15, train_loss=4.740]

Epoch 0:   1%|▋                                                                                                 | 11/1563 [00:00<02:14, 11.52it/s, v_num=15, train_loss=4.610]

Epoch 0:   1%|▊                                                                                                 | 12/1563 [00:00<02:06, 12.25it/s, v_num=15, train_loss=4.610]

Epoch 0:   1%|▊                                                                                                 | 12/1563 [00:01<02:12, 11.71it/s, v_num=15, train_loss=4.920]

Epoch 0:   1%|▊                                                                                                 | 13/1563 [00:01<02:04, 12.43it/s, v_num=15, train_loss=4.920]

Epoch 0:   1%|▊                                                                                                 | 13/1563 [00:01<02:10, 11.91it/s, v_num=15, train_loss=4.880]

Epoch 0:   1%|▉                                                                                                 | 14/1563 [00:01<02:03, 12.57it/s, v_num=15, train_loss=4.880]

Epoch 0:   1%|▉                                                                                                 | 14/1563 [00:01<02:08, 12.09it/s, v_num=15, train_loss=4.680]

Epoch 0:   1%|▉                                                                                                 | 15/1563 [00:01<02:01, 12.70it/s, v_num=15, train_loss=4.680]

Epoch 0:   1%|▉                                                                                                 | 15/1563 [00:01<02:06, 12.25it/s, v_num=15, train_loss=4.790]

Epoch 0:   1%|█                                                                                                 | 16/1563 [00:01<02:00, 12.80it/s, v_num=15, train_loss=4.790]

Epoch 0:   1%|█                                                                                                 | 16/1563 [00:01<02:05, 12.36it/s, v_num=15, train_loss=4.520]

Epoch 0:   1%|█                                                                                                 | 17/1563 [00:01<01:59, 12.93it/s, v_num=15, train_loss=4.520]

Epoch 0:   1%|█                                                                                                 | 17/1563 [00:01<02:03, 12.49it/s, v_num=15, train_loss=4.700]

Epoch 0:   1%|█▏                                                                                                | 18/1563 [00:01<01:58, 13.05it/s, v_num=15, train_loss=4.700]

Epoch 0:   1%|█▏                                                                                                | 18/1563 [00:01<02:02, 12.62it/s, v_num=15, train_loss=4.620]

Epoch 0:   1%|█▏                                                                                                | 19/1563 [00:01<01:57, 13.15it/s, v_num=15, train_loss=4.620]

Epoch 0:   1%|█▏                                                                                                | 19/1563 [00:01<02:01, 12.73it/s, v_num=15, train_loss=4.550]

Epoch 0:   1%|█▎                                                                                                | 20/1563 [00:01<01:56, 13.22it/s, v_num=15, train_loss=4.550]

Epoch 0:   1%|█▎                                                                                                | 20/1563 [00:01<02:00, 12.84it/s, v_num=15, train_loss=4.630]

Epoch 0:   1%|█▎                                                                                                | 21/1563 [00:01<01:55, 13.29it/s, v_num=15, train_loss=4.630]

Epoch 0:   1%|█▎                                                                                                | 21/1563 [00:01<01:59, 12.93it/s, v_num=15, train_loss=4.720]

Epoch 0:   1%|█▍                                                                                                | 22/1563 [00:01<01:55, 13.37it/s, v_num=15, train_loss=4.720]

Epoch 0:   1%|█▍                                                                                                | 22/1563 [00:01<01:58, 13.01it/s, v_num=15, train_loss=4.590]

Epoch 0:   1%|█▍                                                                                                | 23/1563 [00:01<01:54, 13.43it/s, v_num=15, train_loss=4.590]

Epoch 0:   1%|█▍                                                                                                | 23/1563 [00:01<01:57, 13.09it/s, v_num=15, train_loss=4.460]

Epoch 0:   2%|█▌                                                                                                | 24/1563 [00:01<01:54, 13.48it/s, v_num=15, train_loss=4.460]

Epoch 0:   2%|█▌                                                                                                | 24/1563 [00:01<01:57, 13.15it/s, v_num=15, train_loss=4.800]

Epoch 0:   2%|█▌                                                                                                | 25/1563 [00:01<01:53, 13.52it/s, v_num=15, train_loss=4.800]

Epoch 0:   2%|█▌                                                                                                | 25/1563 [00:01<01:56, 13.21it/s, v_num=15, train_loss=4.770]

Epoch 0:   2%|█▋                                                                                                | 26/1563 [00:01<01:53, 13.58it/s, v_num=15, train_loss=4.770]

Epoch 0:   2%|█▋                                                                                                | 26/1563 [00:01<01:55, 13.26it/s, v_num=15, train_loss=4.630]

Epoch 0:   2%|█▋                                                                                                | 27/1563 [00:01<01:52, 13.59it/s, v_num=15, train_loss=4.630]

Epoch 0:   2%|█▋                                                                                                | 27/1563 [00:02<01:55, 13.30it/s, v_num=15, train_loss=4.320]

Epoch 0:   2%|█▊                                                                                                | 28/1563 [00:02<01:52, 13.66it/s, v_num=15, train_loss=4.320]

Epoch 0:   2%|█▊                                                                                                | 28/1563 [00:02<01:54, 13.35it/s, v_num=15, train_loss=4.450]

Epoch 0:   2%|█▊                                                                                                | 29/1563 [00:02<01:51, 13.70it/s, v_num=15, train_loss=4.450]

Epoch 0:   2%|█▊                                                                                                | 29/1563 [00:02<01:54, 13.40it/s, v_num=15, train_loss=4.650]

Epoch 0:   2%|█▉                                                                                                | 30/1563 [00:02<01:51, 13.75it/s, v_num=15, train_loss=4.650]

Epoch 0:   2%|█▉                                                                                                | 30/1563 [00:02<01:53, 13.45it/s, v_num=15, train_loss=4.380]

Epoch 0:   2%|█▉                                                                                                | 31/1563 [00:02<01:51, 13.79it/s, v_num=15, train_loss=4.380]

Epoch 0:   2%|█▉                                                                                                | 31/1563 [00:02<01:53, 13.50it/s, v_num=15, train_loss=4.740]

Epoch 0:   2%|██                                                                                                | 32/1563 [00:02<01:50, 13.81it/s, v_num=15, train_loss=4.740]

Epoch 0:   2%|██                                                                                                | 32/1563 [00:02<01:53, 13.55it/s, v_num=15, train_loss=4.630]

Epoch 0:   2%|██                                                                                                | 33/1563 [00:02<01:50, 13.86it/s, v_num=15, train_loss=4.630]

Epoch 0:   2%|██                                                                                                | 33/1563 [00:02<01:52, 13.59it/s, v_num=15, train_loss=4.590]

Epoch 0:   2%|██▏                                                                                               | 34/1563 [00:02<01:50, 13.89it/s, v_num=15, train_loss=4.590]

Epoch 0:   2%|██▏                                                                                               | 34/1563 [00:02<01:52, 13.64it/s, v_num=15, train_loss=4.340]

Epoch 0:   2%|██▏                                                                                               | 35/1563 [00:02<01:49, 13.93it/s, v_num=15, train_loss=4.340]

Epoch 0:   2%|██▏                                                                                               | 35/1563 [00:02<01:51, 13.67it/s, v_num=15, train_loss=4.510]

Epoch 0:   2%|██▎                                                                                               | 36/1563 [00:02<01:49, 13.94it/s, v_num=15, train_loss=4.510]

Epoch 0:   2%|██▎                                                                                               | 36/1563 [00:02<01:51, 13.71it/s, v_num=15, train_loss=4.500]

Epoch 0:   2%|██▎                                                                                               | 37/1563 [00:02<01:49, 13.97it/s, v_num=15, train_loss=4.500]

Epoch 0:   2%|██▎                                                                                               | 37/1563 [00:02<01:51, 13.74it/s, v_num=15, train_loss=4.400]

Epoch 0:   2%|██▍                                                                                               | 38/1563 [00:02<01:48, 14.00it/s, v_num=15, train_loss=4.400]

Epoch 0:   2%|██▍                                                                                               | 38/1563 [00:02<01:50, 13.77it/s, v_num=15, train_loss=4.730]

Epoch 0:   2%|██▍                                                                                               | 39/1563 [00:02<01:48, 14.02it/s, v_num=15, train_loss=4.730]

Epoch 0:   2%|██▍                                                                                               | 39/1563 [00:02<01:50, 13.80it/s, v_num=15, train_loss=4.680]

Epoch 0:   3%|██▌                                                                                               | 40/1563 [00:02<01:48, 14.04it/s, v_num=15, train_loss=4.680]

Epoch 0:   3%|██▌                                                                                               | 40/1563 [00:02<01:50, 13.82it/s, v_num=15, train_loss=4.450]

Epoch 0:   3%|██▌                                                                                               | 41/1563 [00:02<01:48, 14.06it/s, v_num=15, train_loss=4.450]

Epoch 0:   3%|██▌                                                                                               | 41/1563 [00:02<01:49, 13.85it/s, v_num=15, train_loss=4.480]

Epoch 0:   3%|██▋                                                                                               | 42/1563 [00:02<01:47, 14.10it/s, v_num=15, train_loss=4.480]

Epoch 0:   3%|██▋                                                                                               | 42/1563 [00:03<01:49, 13.88it/s, v_num=15, train_loss=4.630]

Epoch 0:   3%|██▋                                                                                               | 43/1563 [00:03<01:47, 14.13it/s, v_num=15, train_loss=4.630]

Epoch 0:   3%|██▋                                                                                               | 43/1563 [00:03<01:49, 13.91it/s, v_num=15, train_loss=4.520]

Epoch 0:   3%|██▊                                                                                               | 44/1563 [00:03<01:47, 14.15it/s, v_num=15, train_loss=4.520]

Epoch 0:   3%|██▊                                                                                               | 44/1563 [00:03<01:49, 13.93it/s, v_num=15, train_loss=4.640]

Epoch 0:   3%|██▊                                                                                               | 45/1563 [00:03<01:47, 14.17it/s, v_num=15, train_loss=4.640]

Epoch 0:   3%|██▊                                                                                               | 45/1563 [00:03<01:48, 13.96it/s, v_num=15, train_loss=4.670]

Epoch 0:   3%|██▉                                                                                               | 46/1563 [00:03<01:46, 14.19it/s, v_num=15, train_loss=4.670]

Epoch 0:   3%|██▉                                                                                               | 46/1563 [00:03<01:48, 13.99it/s, v_num=15, train_loss=4.590]

Epoch 0:   3%|██▉                                                                                               | 47/1563 [00:03<01:46, 14.20it/s, v_num=15, train_loss=4.590]

Epoch 0:   3%|██▉                                                                                               | 47/1563 [00:03<01:48, 14.01it/s, v_num=15, train_loss=4.330]

Epoch 0:   3%|███                                                                                               | 48/1563 [00:03<01:46, 14.23it/s, v_num=15, train_loss=4.330]

Epoch 0:   3%|███                                                                                               | 48/1563 [00:03<01:47, 14.04it/s, v_num=15, train_loss=4.420]

Epoch 0:   3%|███                                                                                               | 49/1563 [00:03<01:46, 14.25it/s, v_num=15, train_loss=4.420]

Epoch 0:   3%|███                                                                                               | 49/1563 [00:03<01:47, 14.06it/s, v_num=15, train_loss=4.420]

Epoch 0:   3%|███▏                                                                                              | 50/1563 [00:03<01:46, 14.27it/s, v_num=15, train_loss=4.420]

Epoch 0:   3%|███▏                                                                                              | 50/1563 [00:03<01:47, 14.08it/s, v_num=15, train_loss=4.570]

Epoch 0:   3%|███▏                                                                                              | 51/1563 [00:03<01:45, 14.27it/s, v_num=15, train_loss=4.570]

Epoch 0:   3%|███▏                                                                                              | 51/1563 [00:03<01:47, 14.09it/s, v_num=15, train_loss=4.520]

Epoch 0:   3%|███▎                                                                                              | 52/1563 [00:03<01:45, 14.28it/s, v_num=15, train_loss=4.520]

Epoch 0:   3%|███▎                                                                                              | 52/1563 [00:03<01:47, 14.10it/s, v_num=15, train_loss=4.660]

Epoch 0:   3%|███▎                                                                                              | 53/1563 [00:03<01:45, 14.28it/s, v_num=15, train_loss=4.660]

Epoch 0:   3%|███▎                                                                                              | 53/1563 [00:03<01:46, 14.12it/s, v_num=15, train_loss=4.640]

Epoch 0:   3%|███▍                                                                                              | 54/1563 [00:03<01:45, 14.30it/s, v_num=15, train_loss=4.640]

Epoch 0:   3%|███▍                                                                                              | 54/1563 [00:03<01:46, 14.13it/s, v_num=15, train_loss=4.370]

Epoch 0:   4%|███▍                                                                                              | 55/1563 [00:03<01:45, 14.31it/s, v_num=15, train_loss=4.370]

Epoch 0:   4%|███▍                                                                                              | 55/1563 [00:03<01:46, 14.14it/s, v_num=15, train_loss=4.540]

Epoch 0:   4%|███▌                                                                                              | 56/1563 [00:03<01:45, 14.33it/s, v_num=15, train_loss=4.540]

Epoch 0:   4%|███▌                                                                                              | 56/1563 [00:03<01:46, 14.16it/s, v_num=15, train_loss=4.670]

Epoch 0:   4%|███▌                                                                                              | 57/1563 [00:03<01:44, 14.35it/s, v_num=15, train_loss=4.670]

Epoch 0:   4%|███▌                                                                                              | 57/1563 [00:04<01:46, 14.18it/s, v_num=15, train_loss=4.270]

Epoch 0:   4%|███▋                                                                                              | 58/1563 [00:04<01:44, 14.37it/s, v_num=15, train_loss=4.270]

Epoch 0:   4%|███▋                                                                                              | 58/1563 [00:04<01:46, 14.19it/s, v_num=15, train_loss=4.670]

Epoch 0:   4%|███▋                                                                                              | 59/1563 [00:04<01:44, 14.37it/s, v_num=15, train_loss=4.670]

Epoch 0:   4%|███▋                                                                                              | 59/1563 [00:04<01:45, 14.21it/s, v_num=15, train_loss=4.350]

Epoch 0:   4%|███▊                                                                                              | 60/1563 [00:04<01:44, 14.39it/s, v_num=15, train_loss=4.350]

Epoch 0:   4%|███▊                                                                                              | 60/1563 [00:04<01:45, 14.22it/s, v_num=15, train_loss=4.360]

Epoch 0:   4%|███▊                                                                                              | 61/1563 [00:04<01:44, 14.40it/s, v_num=15, train_loss=4.360]

Epoch 0:   4%|███▊                                                                                              | 61/1563 [00:04<01:45, 14.24it/s, v_num=15, train_loss=4.590]

Epoch 0:   4%|███▉                                                                                              | 62/1563 [00:04<01:44, 14.41it/s, v_num=15, train_loss=4.590]

Epoch 0:   4%|███▉                                                                                              | 62/1563 [00:04<01:45, 14.25it/s, v_num=15, train_loss=4.420]

Epoch 0:   4%|███▉                                                                                              | 63/1563 [00:04<01:43, 14.42it/s, v_num=15, train_loss=4.420]

Epoch 0:   4%|███▉                                                                                              | 63/1563 [00:04<01:45, 14.27it/s, v_num=15, train_loss=4.380]

Epoch 0:   4%|████                                                                                              | 64/1563 [00:04<01:43, 14.43it/s, v_num=15, train_loss=4.380]

Epoch 0:   4%|████                                                                                              | 64/1563 [00:04<01:44, 14.28it/s, v_num=15, train_loss=4.350]

Epoch 0:   4%|████                                                                                              | 65/1563 [00:04<01:43, 14.44it/s, v_num=15, train_loss=4.350]

Epoch 0:   4%|████                                                                                              | 65/1563 [00:04<01:44, 14.29it/s, v_num=15, train_loss=4.320]

Epoch 0:   4%|████▏                                                                                             | 66/1563 [00:04<01:43, 14.44it/s, v_num=15, train_loss=4.320]

Epoch 0:   4%|████▏                                                                                             | 66/1563 [00:04<01:44, 14.30it/s, v_num=15, train_loss=4.300]

Epoch 0:   4%|████▏                                                                                             | 67/1563 [00:04<01:43, 14.45it/s, v_num=15, train_loss=4.300]

Epoch 0:   4%|████▏                                                                                             | 67/1563 [00:04<01:44, 14.31it/s, v_num=15, train_loss=4.440]

Epoch 0:   4%|████▎                                                                                             | 68/1563 [00:04<01:43, 14.45it/s, v_num=15, train_loss=4.440]

Epoch 0:   4%|████▎                                                                                             | 68/1563 [00:04<01:44, 14.32it/s, v_num=15, train_loss=4.260]

Epoch 0:   4%|████▎                                                                                             | 69/1563 [00:04<01:43, 14.46it/s, v_num=15, train_loss=4.260]

Epoch 0:   4%|████▎                                                                                             | 69/1563 [00:04<01:44, 14.32it/s, v_num=15, train_loss=4.170]

Epoch 0:   4%|████▍                                                                                             | 70/1563 [00:04<01:43, 14.46it/s, v_num=15, train_loss=4.170]

Epoch 0:   4%|████▍                                                                                             | 70/1563 [00:04<01:44, 14.33it/s, v_num=15, train_loss=4.370]

Epoch 0:   5%|████▍                                                                                             | 71/1563 [00:04<01:43, 14.48it/s, v_num=15, train_loss=4.370]

Epoch 0:   5%|████▍                                                                                             | 71/1563 [00:04<01:44, 14.34it/s, v_num=15, train_loss=4.500]

Epoch 0:   5%|████▌                                                                                             | 72/1563 [00:04<01:42, 14.49it/s, v_num=15, train_loss=4.500]

Epoch 0:   5%|████▌                                                                                             | 72/1563 [00:05<01:43, 14.35it/s, v_num=15, train_loss=4.500]

Epoch 0:   5%|████▌                                                                                             | 73/1563 [00:05<01:42, 14.50it/s, v_num=15, train_loss=4.500]

Epoch 0:   5%|████▌                                                                                             | 73/1563 [00:05<01:43, 14.36it/s, v_num=15, train_loss=4.330]

Epoch 0:   5%|████▋                                                                                             | 74/1563 [00:05<01:42, 14.51it/s, v_num=15, train_loss=4.330]

Epoch 0:   5%|████▋                                                                                             | 74/1563 [00:05<01:43, 14.37it/s, v_num=15, train_loss=4.290]

Epoch 0:   5%|████▋                                                                                             | 75/1563 [00:05<01:42, 14.52it/s, v_num=15, train_loss=4.290]

Epoch 0:   5%|████▋                                                                                             | 75/1563 [00:05<01:43, 14.39it/s, v_num=15, train_loss=4.530]

Epoch 0:   5%|████▊                                                                                             | 76/1563 [00:05<01:42, 14.53it/s, v_num=15, train_loss=4.530]

Epoch 0:   5%|████▊                                                                                             | 76/1563 [00:05<01:43, 14.40it/s, v_num=15, train_loss=4.180]

Epoch 0:   5%|████▊                                                                                             | 77/1563 [00:05<01:42, 14.53it/s, v_num=15, train_loss=4.180]

Epoch 0:   5%|████▊                                                                                             | 77/1563 [00:05<01:43, 14.41it/s, v_num=15, train_loss=4.290]

Epoch 0:   5%|████▉                                                                                             | 78/1563 [00:05<01:42, 14.54it/s, v_num=15, train_loss=4.290]

Epoch 0:   5%|████▉                                                                                             | 78/1563 [00:05<01:43, 14.41it/s, v_num=15, train_loss=4.750]

Epoch 0:   5%|████▉                                                                                             | 79/1563 [00:05<01:41, 14.55it/s, v_num=15, train_loss=4.750]

Epoch 0:   5%|████▉                                                                                             | 79/1563 [00:05<01:42, 14.42it/s, v_num=15, train_loss=4.360]

Epoch 0:   5%|█████                                                                                             | 80/1563 [00:05<01:41, 14.55it/s, v_num=15, train_loss=4.360]

Epoch 0:   5%|█████                                                                                             | 80/1563 [00:05<01:42, 14.43it/s, v_num=15, train_loss=4.470]

Epoch 0:   5%|█████                                                                                             | 81/1563 [00:05<01:41, 14.56it/s, v_num=15, train_loss=4.470]

Epoch 0:   5%|█████                                                                                             | 81/1563 [00:05<01:42, 14.44it/s, v_num=15, train_loss=4.360]

Epoch 0:   5%|█████▏                                                                                            | 82/1563 [00:05<01:41, 14.56it/s, v_num=15, train_loss=4.360]

Epoch 0:   5%|█████▏                                                                                            | 82/1563 [00:05<01:42, 14.45it/s, v_num=15, train_loss=4.370]

Epoch 0:   5%|█████▏                                                                                            | 83/1563 [00:05<01:41, 14.56it/s, v_num=15, train_loss=4.370]

Epoch 0:   5%|█████▏                                                                                            | 83/1563 [00:05<01:42, 14.45it/s, v_num=15, train_loss=4.280]

Epoch 0:   5%|█████▎                                                                                            | 84/1563 [00:05<01:41, 14.57it/s, v_num=15, train_loss=4.280]

Epoch 0:   5%|█████▎                                                                                            | 84/1563 [00:05<01:42, 14.46it/s, v_num=15, train_loss=4.560]

Epoch 0:   5%|█████▎                                                                                            | 85/1563 [00:05<01:41, 14.57it/s, v_num=15, train_loss=4.560]

Epoch 0:   5%|█████▎                                                                                            | 85/1563 [00:05<01:42, 14.46it/s, v_num=15, train_loss=4.430]

Epoch 0:   6%|█████▍                                                                                            | 86/1563 [00:05<01:41, 14.58it/s, v_num=15, train_loss=4.430]

Epoch 0:   6%|█████▍                                                                                            | 86/1563 [00:05<01:42, 14.47it/s, v_num=15, train_loss=4.410]

Epoch 0:   6%|█████▍                                                                                            | 87/1563 [00:05<01:41, 14.58it/s, v_num=15, train_loss=4.410]

Epoch 0:   6%|█████▍                                                                                            | 87/1563 [00:06<01:42, 14.47it/s, v_num=15, train_loss=4.320]

Epoch 0:   6%|█████▌                                                                                            | 88/1563 [00:06<01:41, 14.58it/s, v_num=15, train_loss=4.320]

Epoch 0:   6%|█████▌                                                                                            | 88/1563 [00:06<01:41, 14.47it/s, v_num=15, train_loss=4.530]

Epoch 0:   6%|█████▌                                                                                            | 89/1563 [00:06<01:41, 14.58it/s, v_num=15, train_loss=4.530]

Epoch 0:   6%|█████▌                                                                                            | 89/1563 [00:06<01:41, 14.48it/s, v_num=15, train_loss=4.410]

Epoch 0:   6%|█████▋                                                                                            | 90/1563 [00:06<01:40, 14.59it/s, v_num=15, train_loss=4.410]

Epoch 0:   6%|█████▋                                                                                            | 90/1563 [00:06<01:41, 14.48it/s, v_num=15, train_loss=4.160]

Epoch 0:   6%|█████▋                                                                                            | 91/1563 [00:06<01:40, 14.59it/s, v_num=15, train_loss=4.160]

Epoch 0:   6%|█████▋                                                                                            | 91/1563 [00:06<01:41, 14.49it/s, v_num=15, train_loss=4.390]

Epoch 0:   6%|█████▊                                                                                            | 92/1563 [00:06<01:40, 14.59it/s, v_num=15, train_loss=4.390]

Epoch 0:   6%|█████▊                                                                                            | 92/1563 [00:06<01:41, 14.49it/s, v_num=15, train_loss=4.280]

Epoch 0:   6%|█████▊                                                                                            | 93/1563 [00:06<01:40, 14.60it/s, v_num=15, train_loss=4.280]

Epoch 0:   6%|█████▊                                                                                            | 93/1563 [00:06<01:41, 14.50it/s, v_num=15, train_loss=4.200]

Epoch 0:   6%|█████▉                                                                                            | 94/1563 [00:06<01:40, 14.61it/s, v_num=15, train_loss=4.200]

Epoch 0:   6%|█████▉                                                                                            | 94/1563 [00:06<01:41, 14.50it/s, v_num=15, train_loss=4.360]

Epoch 0:   6%|█████▉                                                                                            | 95/1563 [00:06<01:40, 14.61it/s, v_num=15, train_loss=4.360]

Epoch 0:   6%|█████▉                                                                                            | 95/1563 [00:06<01:41, 14.51it/s, v_num=15, train_loss=4.280]

Epoch 0:   6%|██████                                                                                            | 96/1563 [00:06<01:40, 14.61it/s, v_num=15, train_loss=4.280]

Epoch 0:   6%|██████                                                                                            | 96/1563 [00:06<01:41, 14.51it/s, v_num=15, train_loss=4.580]

Epoch 0:   6%|██████                                                                                            | 97/1563 [00:06<01:40, 14.61it/s, v_num=15, train_loss=4.580]

Epoch 0:   6%|██████                                                                                            | 97/1563 [00:06<01:40, 14.52it/s, v_num=15, train_loss=4.260]

Epoch 0:   6%|██████▏                                                                                           | 98/1563 [00:06<01:40, 14.62it/s, v_num=15, train_loss=4.260]

Epoch 0:   6%|██████▏                                                                                           | 98/1563 [00:06<01:40, 14.52it/s, v_num=15, train_loss=4.620]

Epoch 0:   6%|██████▏                                                                                           | 99/1563 [00:06<01:40, 14.62it/s, v_num=15, train_loss=4.620]

Epoch 0:   6%|██████▏                                                                                           | 99/1563 [00:06<01:40, 14.53it/s, v_num=15, train_loss=4.510]

Epoch 0:   6%|██████▏                                                                                          | 100/1563 [00:06<01:40, 14.62it/s, v_num=15, train_loss=4.510]

Epoch 0:   6%|██████▏                                                                                          | 100/1563 [00:06<01:40, 14.53it/s, v_num=15, train_loss=4.530]

Epoch 0:   6%|██████▎                                                                                          | 101/1563 [00:06<01:39, 14.62it/s, v_num=15, train_loss=4.530]

Epoch 0:   6%|██████▎                                                                                          | 101/1563 [00:06<01:40, 14.53it/s, v_num=15, train_loss=4.160]

Epoch 0:   7%|██████▎                                                                                          | 102/1563 [00:06<01:39, 14.63it/s, v_num=15, train_loss=4.160]

Epoch 0:   7%|██████▎                                                                                          | 102/1563 [00:07<01:40, 14.54it/s, v_num=15, train_loss=4.430]

Epoch 0:   7%|██████▍                                                                                          | 103/1563 [00:07<01:39, 14.64it/s, v_num=15, train_loss=4.430]

Epoch 0:   7%|██████▍                                                                                          | 103/1563 [00:07<01:40, 14.54it/s, v_num=15, train_loss=4.470]

Epoch 0:   7%|██████▍                                                                                          | 104/1563 [00:07<01:39, 14.64it/s, v_num=15, train_loss=4.470]

Epoch 0:   7%|██████▍                                                                                          | 104/1563 [00:07<01:40, 14.55it/s, v_num=15, train_loss=4.330]

Epoch 0:   7%|██████▌                                                                                          | 105/1563 [00:07<01:39, 14.65it/s, v_num=15, train_loss=4.330]

Epoch 0:   7%|██████▌                                                                                          | 105/1563 [00:07<01:40, 14.55it/s, v_num=15, train_loss=4.270]

Epoch 0:   7%|██████▌                                                                                          | 106/1563 [00:07<01:39, 14.65it/s, v_num=15, train_loss=4.270]

Epoch 0:   7%|██████▌                                                                                          | 106/1563 [00:07<01:40, 14.56it/s, v_num=15, train_loss=4.160]

Epoch 0:   7%|██████▋                                                                                          | 107/1563 [00:07<01:39, 14.65it/s, v_num=15, train_loss=4.160]

Epoch 0:   7%|██████▋                                                                                          | 107/1563 [00:07<01:39, 14.56it/s, v_num=15, train_loss=4.380]

Epoch 0:   7%|██████▋                                                                                          | 108/1563 [00:07<01:39, 14.66it/s, v_num=15, train_loss=4.380]

Epoch 0:   7%|██████▋                                                                                          | 108/1563 [00:07<01:39, 14.57it/s, v_num=15, train_loss=4.210]

Epoch 0:   7%|██████▊                                                                                          | 109/1563 [00:07<01:39, 14.67it/s, v_num=15, train_loss=4.210]

Epoch 0:   7%|██████▊                                                                                          | 109/1563 [00:07<01:39, 14.57it/s, v_num=15, train_loss=4.320]

Epoch 0:   7%|██████▊                                                                                          | 110/1563 [00:07<01:39, 14.67it/s, v_num=15, train_loss=4.320]

Epoch 0:   7%|██████▊                                                                                          | 110/1563 [00:07<01:39, 14.58it/s, v_num=15, train_loss=4.040]

Epoch 0:   7%|██████▉                                                                                          | 111/1563 [00:07<01:39, 14.67it/s, v_num=15, train_loss=4.040]

Epoch 0:   7%|██████▉                                                                                          | 111/1563 [00:07<01:39, 14.58it/s, v_num=15, train_loss=4.500]

Epoch 0:   7%|██████▉                                                                                          | 112/1563 [00:07<01:38, 14.67it/s, v_num=15, train_loss=4.500]

Epoch 0:   7%|██████▉                                                                                          | 112/1563 [00:07<01:39, 14.58it/s, v_num=15, train_loss=4.130]

Epoch 0:   7%|███████                                                                                          | 113/1563 [00:07<01:38, 14.67it/s, v_num=15, train_loss=4.130]

Epoch 0:   7%|███████                                                                                          | 113/1563 [00:07<01:39, 14.59it/s, v_num=15, train_loss=4.520]

Epoch 0:   7%|███████                                                                                          | 114/1563 [00:07<01:38, 14.67it/s, v_num=15, train_loss=4.520]

Epoch 0:   7%|███████                                                                                          | 114/1563 [00:07<01:39, 14.59it/s, v_num=15, train_loss=4.110]

Epoch 0:   7%|███████▏                                                                                         | 115/1563 [00:07<01:38, 14.68it/s, v_num=15, train_loss=4.110]

Epoch 0:   7%|███████▏                                                                                         | 115/1563 [00:07<01:39, 14.59it/s, v_num=15, train_loss=4.120]

Epoch 0:   7%|███████▏                                                                                         | 116/1563 [00:07<01:38, 14.68it/s, v_num=15, train_loss=4.120]

Epoch 0:   7%|███████▏                                                                                         | 116/1563 [00:07<01:39, 14.60it/s, v_num=15, train_loss=4.350]

Epoch 0:   7%|███████▎                                                                                         | 117/1563 [00:07<01:38, 14.68it/s, v_num=15, train_loss=4.350]

Epoch 0:   7%|███████▎                                                                                         | 117/1563 [00:08<01:39, 14.60it/s, v_num=15, train_loss=4.610]

Epoch 0:   8%|███████▎                                                                                         | 118/1563 [00:08<01:38, 14.68it/s, v_num=15, train_loss=4.610]

Epoch 0:   8%|███████▎                                                                                         | 118/1563 [00:08<01:38, 14.60it/s, v_num=15, train_loss=4.490]

Epoch 0:   8%|███████▍                                                                                         | 119/1563 [00:08<01:38, 14.69it/s, v_num=15, train_loss=4.490]

Epoch 0:   8%|███████▍                                                                                         | 119/1563 [00:08<01:38, 14.60it/s, v_num=15, train_loss=4.370]

Epoch 0:   8%|███████▍                                                                                         | 120/1563 [00:08<01:38, 14.69it/s, v_num=15, train_loss=4.370]

Epoch 0:   8%|███████▍                                                                                         | 120/1563 [00:08<01:38, 14.61it/s, v_num=15, train_loss=4.390]

Epoch 0:   8%|███████▌                                                                                         | 121/1563 [00:08<01:38, 14.69it/s, v_num=15, train_loss=4.390]

Epoch 0:   8%|███████▌                                                                                         | 121/1563 [00:08<01:38, 14.61it/s, v_num=15, train_loss=4.400]

Epoch 0:   8%|███████▌                                                                                         | 122/1563 [00:08<01:38, 14.70it/s, v_num=15, train_loss=4.400]

Epoch 0:   8%|███████▌                                                                                         | 122/1563 [00:08<01:38, 14.62it/s, v_num=15, train_loss=4.630]

Epoch 0:   8%|███████▋                                                                                         | 123/1563 [00:08<01:37, 14.70it/s, v_num=15, train_loss=4.630]

Epoch 0:   8%|███████▋                                                                                         | 123/1563 [00:08<01:38, 14.62it/s, v_num=15, train_loss=4.440]

Epoch 0:   8%|███████▋                                                                                         | 124/1563 [00:08<01:37, 14.71it/s, v_num=15, train_loss=4.440]

Epoch 0:   8%|███████▋                                                                                         | 124/1563 [00:08<01:38, 14.63it/s, v_num=15, train_loss=4.300]

Epoch 0:   8%|███████▊                                                                                         | 125/1563 [00:08<01:37, 14.71it/s, v_num=15, train_loss=4.300]

Epoch 0:   8%|███████▊                                                                                         | 125/1563 [00:08<01:38, 14.63it/s, v_num=15, train_loss=4.430]

Epoch 0:   8%|███████▊                                                                                         | 126/1563 [00:08<01:37, 14.71it/s, v_num=15, train_loss=4.430]

Epoch 0:   8%|███████▊                                                                                         | 126/1563 [00:08<01:38, 14.63it/s, v_num=15, train_loss=4.290]

Epoch 0:   8%|███████▉                                                                                         | 127/1563 [00:08<01:37, 14.71it/s, v_num=15, train_loss=4.290]

Epoch 0:   8%|███████▉                                                                                         | 127/1563 [00:08<01:38, 14.63it/s, v_num=15, train_loss=4.640]

Epoch 0:   8%|███████▉                                                                                         | 128/1563 [00:08<01:37, 14.71it/s, v_num=15, train_loss=4.640]

Epoch 0:   8%|███████▉                                                                                         | 128/1563 [00:08<01:38, 14.64it/s, v_num=15, train_loss=4.180]

Epoch 0:   8%|████████                                                                                         | 129/1563 [00:08<01:37, 14.71it/s, v_num=15, train_loss=4.180]

Epoch 0:   8%|████████                                                                                         | 129/1563 [00:08<01:37, 14.64it/s, v_num=15, train_loss=4.180]

Epoch 0:   8%|████████                                                                                         | 130/1563 [00:08<01:37, 14.72it/s, v_num=15, train_loss=4.180]

Epoch 0:   8%|████████                                                                                         | 130/1563 [00:08<01:37, 14.64it/s, v_num=15, train_loss=4.380]

Epoch 0:   8%|████████▏                                                                                        | 131/1563 [00:08<01:37, 14.73it/s, v_num=15, train_loss=4.380]

Epoch 0:   8%|████████▏                                                                                        | 131/1563 [00:08<01:37, 14.65it/s, v_num=15, train_loss=4.460]

Epoch 0:   8%|████████▏                                                                                        | 132/1563 [00:08<01:37, 14.73it/s, v_num=15, train_loss=4.460]

Epoch 0:   8%|████████▏                                                                                        | 132/1563 [00:09<01:37, 14.65it/s, v_num=15, train_loss=4.480]

Epoch 0:   9%|████████▎                                                                                        | 133/1563 [00:09<01:37, 14.73it/s, v_num=15, train_loss=4.480]

Epoch 0:   9%|████████▎                                                                                        | 133/1563 [00:09<01:37, 14.66it/s, v_num=15, train_loss=4.510]

Epoch 0:   9%|████████▎                                                                                        | 134/1563 [00:09<01:36, 14.74it/s, v_num=15, train_loss=4.510]

Epoch 0:   9%|████████▎                                                                                        | 134/1563 [00:09<01:37, 14.66it/s, v_num=15, train_loss=4.510]

Epoch 0:   9%|████████▍                                                                                        | 135/1563 [00:09<01:36, 14.74it/s, v_num=15, train_loss=4.510]

Epoch 0:   9%|████████▍                                                                                        | 135/1563 [00:09<01:37, 14.66it/s, v_num=15, train_loss=4.290]

Epoch 0:   9%|████████▍                                                                                        | 136/1563 [00:09<01:36, 14.74it/s, v_num=15, train_loss=4.290]

Epoch 0:   9%|████████▍                                                                                        | 136/1563 [00:09<01:37, 14.67it/s, v_num=15, train_loss=4.430]

Epoch 0:   9%|████████▌                                                                                        | 137/1563 [00:09<01:36, 14.75it/s, v_num=15, train_loss=4.430]

Epoch 0:   9%|████████▌                                                                                        | 137/1563 [00:09<01:37, 14.67it/s, v_num=15, train_loss=4.310]

Epoch 0:   9%|████████▌                                                                                        | 138/1563 [00:09<01:36, 14.75it/s, v_num=15, train_loss=4.310]

Epoch 0:   9%|████████▌                                                                                        | 138/1563 [00:09<01:37, 14.68it/s, v_num=15, train_loss=4.310]

Epoch 0:   9%|████████▋                                                                                        | 139/1563 [00:09<01:36, 14.75it/s, v_num=15, train_loss=4.310]

Epoch 0:   9%|████████▋                                                                                        | 139/1563 [00:09<01:37, 14.68it/s, v_num=15, train_loss=4.490]

Epoch 0:   9%|████████▋                                                                                        | 140/1563 [00:09<01:36, 14.76it/s, v_num=15, train_loss=4.490]

Epoch 0:   9%|████████▋                                                                                        | 140/1563 [00:09<01:36, 14.68it/s, v_num=15, train_loss=4.200]

Epoch 0:   9%|████████▊                                                                                        | 141/1563 [00:09<01:36, 14.75it/s, v_num=15, train_loss=4.200]

Epoch 0:   9%|████████▊                                                                                        | 141/1563 [00:09<01:36, 14.69it/s, v_num=15, train_loss=4.390]

Epoch 0:   9%|████████▊                                                                                        | 142/1563 [00:09<01:36, 14.76it/s, v_num=15, train_loss=4.390]

Epoch 0:   9%|████████▊                                                                                        | 142/1563 [00:09<01:36, 14.69it/s, v_num=15, train_loss=4.210]

Epoch 0:   9%|████████▊                                                                                        | 143/1563 [00:09<01:36, 14.76it/s, v_num=15, train_loss=4.210]

Epoch 0:   9%|████████▊                                                                                        | 143/1563 [00:09<01:36, 14.69it/s, v_num=15, train_loss=4.330]

Epoch 0:   9%|████████▉                                                                                        | 144/1563 [00:09<01:36, 14.76it/s, v_num=15, train_loss=4.330]

Epoch 0:   9%|████████▉                                                                                        | 144/1563 [00:09<01:36, 14.69it/s, v_num=15, train_loss=4.040]

Epoch 0:   9%|████████▉                                                                                        | 145/1563 [00:09<01:36, 14.76it/s, v_num=15, train_loss=4.040]

Epoch 0:   9%|████████▉                                                                                        | 145/1563 [00:09<01:36, 14.69it/s, v_num=15, train_loss=4.500]

Epoch 0:   9%|█████████                                                                                        | 146/1563 [00:09<01:35, 14.77it/s, v_num=15, train_loss=4.500]

Epoch 0:   9%|█████████                                                                                        | 146/1563 [00:09<01:36, 14.70it/s, v_num=15, train_loss=4.290]

Epoch 0:   9%|█████████                                                                                        | 147/1563 [00:09<01:35, 14.77it/s, v_num=15, train_loss=4.290]

Epoch 0:   9%|█████████                                                                                        | 147/1563 [00:09<01:36, 14.70it/s, v_num=15, train_loss=4.570]

Epoch 0:   9%|█████████▏                                                                                       | 148/1563 [00:10<01:35, 14.77it/s, v_num=15, train_loss=4.570]

Epoch 0:   9%|█████████▏                                                                                       | 148/1563 [00:10<01:36, 14.70it/s, v_num=15, train_loss=4.360]

Epoch 0:  10%|█████████▏                                                                                       | 149/1563 [00:10<01:35, 14.77it/s, v_num=15, train_loss=4.360]

Epoch 0:  10%|█████████▏                                                                                       | 149/1563 [00:10<01:36, 14.71it/s, v_num=15, train_loss=4.140]

Epoch 0:  10%|█████████▎                                                                                       | 150/1563 [00:10<01:35, 14.78it/s, v_num=15, train_loss=4.140]

Epoch 0:  10%|█████████▎                                                                                       | 150/1563 [00:10<01:36, 14.71it/s, v_num=15, train_loss=4.360]

Epoch 0:  10%|█████████▎                                                                                       | 151/1563 [00:10<01:35, 14.78it/s, v_num=15, train_loss=4.360]

Epoch 0:  10%|█████████▎                                                                                       | 151/1563 [00:10<01:35, 14.71it/s, v_num=15, train_loss=4.270]

Epoch 0:  10%|█████████▍                                                                                       | 152/1563 [00:10<01:35, 14.78it/s, v_num=15, train_loss=4.270]

Epoch 0:  10%|█████████▍                                                                                       | 152/1563 [00:10<01:35, 14.72it/s, v_num=15, train_loss=4.190]

Epoch 0:  10%|█████████▍                                                                                       | 153/1563 [00:10<01:35, 14.79it/s, v_num=15, train_loss=4.190]

Epoch 0:  10%|█████████▍                                                                                       | 153/1563 [00:10<01:35, 14.72it/s, v_num=15, train_loss=4.390]

Epoch 0:  10%|█████████▌                                                                                       | 154/1563 [00:10<01:35, 14.79it/s, v_num=15, train_loss=4.390]

Epoch 0:  10%|█████████▌                                                                                       | 154/1563 [00:10<01:35, 14.72it/s, v_num=15, train_loss=4.180]

Epoch 0:  10%|█████████▌                                                                                       | 155/1563 [00:10<01:35, 14.79it/s, v_num=15, train_loss=4.180]

Epoch 0:  10%|█████████▌                                                                                       | 155/1563 [00:10<01:35, 14.73it/s, v_num=15, train_loss=4.220]

Epoch 0:  10%|█████████▋                                                                                       | 156/1563 [00:10<01:35, 14.79it/s, v_num=15, train_loss=4.220]

Epoch 0:  10%|█████████▋                                                                                       | 156/1563 [00:10<01:35, 14.73it/s, v_num=15, train_loss=4.680]

Epoch 0:  10%|█████████▋                                                                                       | 157/1563 [00:10<01:35, 14.79it/s, v_num=15, train_loss=4.680]

Epoch 0:  10%|█████████▋                                                                                       | 157/1563 [00:10<01:35, 14.73it/s, v_num=15, train_loss=4.380]

Epoch 0:  10%|█████████▊                                                                                       | 158/1563 [00:10<01:34, 14.79it/s, v_num=15, train_loss=4.380]

Epoch 0:  10%|█████████▊                                                                                       | 158/1563 [00:10<01:35, 14.73it/s, v_num=15, train_loss=4.250]

Epoch 0:  10%|█████████▊                                                                                       | 159/1563 [00:10<01:34, 14.79it/s, v_num=15, train_loss=4.250]

Epoch 0:  10%|█████████▊                                                                                       | 159/1563 [00:10<01:35, 14.73it/s, v_num=15, train_loss=4.010]

Epoch 0:  10%|█████████▉                                                                                       | 160/1563 [00:10<01:34, 14.80it/s, v_num=15, train_loss=4.010]

Epoch 0:  10%|█████████▉                                                                                       | 160/1563 [00:10<01:35, 14.74it/s, v_num=15, train_loss=4.100]

Epoch 0:  10%|█████████▉                                                                                       | 161/1563 [00:10<01:34, 14.80it/s, v_num=15, train_loss=4.100]

Epoch 0:  10%|█████████▉                                                                                       | 161/1563 [00:10<01:35, 14.74it/s, v_num=15, train_loss=4.230]

Epoch 0:  10%|██████████                                                                                       | 162/1563 [00:10<01:34, 14.80it/s, v_num=15, train_loss=4.230]

Epoch 0:  10%|██████████                                                                                       | 162/1563 [00:10<01:35, 14.74it/s, v_num=15, train_loss=4.400]

Epoch 0:  10%|██████████                                                                                       | 163/1563 [00:11<01:34, 14.80it/s, v_num=15, train_loss=4.400]

Epoch 0:  10%|██████████                                                                                       | 163/1563 [00:11<01:34, 14.74it/s, v_num=15, train_loss=4.180]

Epoch 0:  10%|██████████▏                                                                                      | 164/1563 [00:11<01:34, 14.80it/s, v_num=15, train_loss=4.180]

Epoch 0:  10%|██████████▏                                                                                      | 164/1563 [00:11<01:34, 14.74it/s, v_num=15, train_loss=4.290]

Epoch 0:  11%|██████████▏                                                                                      | 165/1563 [00:11<01:34, 14.80it/s, v_num=15, train_loss=4.290]

Epoch 0:  11%|██████████▏                                                                                      | 165/1563 [00:11<01:34, 14.74it/s, v_num=15, train_loss=4.380]

Epoch 0:  11%|██████████▎                                                                                      | 166/1563 [00:11<01:34, 14.80it/s, v_num=15, train_loss=4.380]

Epoch 0:  11%|██████████▎                                                                                      | 166/1563 [00:11<01:34, 14.74it/s, v_num=15, train_loss=4.380]

Epoch 0:  11%|██████████▎                                                                                      | 167/1563 [00:11<01:34, 14.80it/s, v_num=15, train_loss=4.380]

Epoch 0:  11%|██████████▎                                                                                      | 167/1563 [00:11<01:34, 14.74it/s, v_num=15, train_loss=4.410]

Epoch 0:  11%|██████████▍                                                                                      | 168/1563 [00:11<01:34, 14.80it/s, v_num=15, train_loss=4.410]

Epoch 0:  11%|██████████▍                                                                                      | 168/1563 [00:11<01:34, 14.74it/s, v_num=15, train_loss=4.520]

Epoch 0:  11%|██████████▍                                                                                      | 169/1563 [00:11<01:34, 14.81it/s, v_num=15, train_loss=4.520]

Epoch 0:  11%|██████████▍                                                                                      | 169/1563 [00:11<01:34, 14.75it/s, v_num=15, train_loss=4.220]

Epoch 0:  11%|██████████▌                                                                                      | 170/1563 [00:11<01:34, 14.81it/s, v_num=15, train_loss=4.220]

Epoch 0:  11%|██████████▌                                                                                      | 170/1563 [00:11<01:34, 14.75it/s, v_num=15, train_loss=4.320]

Epoch 0:  11%|██████████▌                                                                                      | 171/1563 [00:11<01:33, 14.81it/s, v_num=15, train_loss=4.320]

Epoch 0:  11%|██████████▌                                                                                      | 171/1563 [00:11<01:34, 14.75it/s, v_num=15, train_loss=4.210]

Epoch 0:  11%|██████████▋                                                                                      | 172/1563 [00:11<01:33, 14.81it/s, v_num=15, train_loss=4.210]

Epoch 0:  11%|██████████▋                                                                                      | 172/1563 [00:11<01:34, 14.75it/s, v_num=15, train_loss=4.240]

Epoch 0:  11%|██████████▋                                                                                      | 173/1563 [00:11<01:33, 14.81it/s, v_num=15, train_loss=4.240]

Epoch 0:  11%|██████████▋                                                                                      | 173/1563 [00:11<01:34, 14.75it/s, v_num=15, train_loss=4.250]

Epoch 0:  11%|██████████▊                                                                                      | 174/1563 [00:11<01:33, 14.81it/s, v_num=15, train_loss=4.250]

Epoch 0:  11%|██████████▊                                                                                      | 174/1563 [00:11<01:34, 14.75it/s, v_num=15, train_loss=4.250]

Epoch 0:  11%|██████████▊                                                                                      | 175/1563 [00:11<01:33, 14.81it/s, v_num=15, train_loss=4.250]

Epoch 0:  11%|██████████▊                                                                                      | 175/1563 [00:11<01:34, 14.75it/s, v_num=15, train_loss=4.310]

Epoch 0:  11%|██████████▉                                                                                      | 176/1563 [00:11<01:33, 14.81it/s, v_num=15, train_loss=4.310]

Epoch 0:  11%|██████████▉                                                                                      | 176/1563 [00:11<01:33, 14.76it/s, v_num=15, train_loss=4.060]

Epoch 0:  11%|██████████▉                                                                                      | 177/1563 [00:11<01:33, 14.82it/s, v_num=15, train_loss=4.060]

Epoch 0:  11%|██████████▉                                                                                      | 177/1563 [00:11<01:33, 14.76it/s, v_num=15, train_loss=4.240]

Epoch 0:  11%|███████████                                                                                      | 178/1563 [00:12<01:33, 14.82it/s, v_num=15, train_loss=4.240]

Epoch 0:  11%|███████████                                                                                      | 178/1563 [00:12<01:33, 14.76it/s, v_num=15, train_loss=4.380]

Epoch 0:  11%|███████████                                                                                      | 179/1563 [00:12<01:33, 14.82it/s, v_num=15, train_loss=4.380]

Epoch 0:  11%|███████████                                                                                      | 179/1563 [00:12<01:33, 14.76it/s, v_num=15, train_loss=4.260]

Epoch 0:  12%|███████████▏                                                                                     | 180/1563 [00:12<01:33, 14.83it/s, v_num=15, train_loss=4.260]

Epoch 0:  12%|███████████▏                                                                                     | 180/1563 [00:12<01:33, 14.77it/s, v_num=15, train_loss=4.090]

Epoch 0:  12%|███████████▏                                                                                     | 181/1563 [00:12<01:33, 14.83it/s, v_num=15, train_loss=4.090]

Epoch 0:  12%|███████████▏                                                                                     | 181/1563 [00:12<01:33, 14.77it/s, v_num=15, train_loss=4.380]

Epoch 0:  12%|███████████▎                                                                                     | 182/1563 [00:12<01:33, 14.83it/s, v_num=15, train_loss=4.380]

Epoch 0:  12%|███████████▎                                                                                     | 182/1563 [00:12<01:33, 14.77it/s, v_num=15, train_loss=4.240]

Epoch 0:  12%|███████████▎                                                                                     | 183/1563 [00:12<01:33, 14.83it/s, v_num=15, train_loss=4.240]

Epoch 0:  12%|███████████▎                                                                                     | 183/1563 [00:12<01:33, 14.77it/s, v_num=15, train_loss=4.200]

Epoch 0:  12%|███████████▍                                                                                     | 184/1563 [00:12<01:32, 14.83it/s, v_num=15, train_loss=4.200]

Epoch 0:  12%|███████████▍                                                                                     | 184/1563 [00:12<01:33, 14.78it/s, v_num=15, train_loss=3.880]

Epoch 0:  12%|███████████▍                                                                                     | 185/1563 [00:12<01:32, 14.84it/s, v_num=15, train_loss=3.880]

Epoch 0:  12%|███████████▍                                                                                     | 185/1563 [00:12<01:33, 14.78it/s, v_num=15, train_loss=4.350]

Epoch 0:  12%|███████████▌                                                                                     | 186/1563 [00:12<01:32, 14.83it/s, v_num=15, train_loss=4.350]

Epoch 0:  12%|███████████▌                                                                                     | 186/1563 [00:12<01:33, 14.78it/s, v_num=15, train_loss=4.430]

Epoch 0:  12%|███████████▌                                                                                     | 187/1563 [00:12<01:32, 14.83it/s, v_num=15, train_loss=4.430]

Epoch 0:  12%|███████████▌                                                                                     | 187/1563 [00:12<01:33, 14.78it/s, v_num=15, train_loss=4.410]

Epoch 0:  12%|███████████▋                                                                                     | 188/1563 [00:12<01:32, 14.83it/s, v_num=15, train_loss=4.410]

Epoch 0:  12%|███████████▋                                                                                     | 188/1563 [00:12<01:33, 14.78it/s, v_num=15, train_loss=4.030]

Epoch 0:  12%|███████████▋                                                                                     | 189/1563 [00:12<01:32, 14.83it/s, v_num=15, train_loss=4.030]

Epoch 0:  12%|███████████▋                                                                                     | 189/1563 [00:12<01:32, 14.78it/s, v_num=15, train_loss=4.290]

Epoch 0:  12%|███████████▊                                                                                     | 190/1563 [00:12<01:32, 14.83it/s, v_num=15, train_loss=4.290]

Epoch 0:  12%|███████████▊                                                                                     | 190/1563 [00:12<01:32, 14.78it/s, v_num=15, train_loss=4.120]

Epoch 0:  12%|███████████▊                                                                                     | 191/1563 [00:12<01:32, 14.83it/s, v_num=15, train_loss=4.120]

Epoch 0:  12%|███████████▊                                                                                     | 191/1563 [00:12<01:32, 14.78it/s, v_num=15, train_loss=4.550]

Epoch 0:  12%|███████████▉                                                                                     | 192/1563 [00:12<01:32, 14.83it/s, v_num=15, train_loss=4.550]

Epoch 0:  12%|███████████▉                                                                                     | 192/1563 [00:12<01:32, 14.78it/s, v_num=15, train_loss=4.220]

Epoch 0:  12%|███████████▉                                                                                     | 193/1563 [00:13<01:32, 14.84it/s, v_num=15, train_loss=4.220]

Epoch 0:  12%|███████████▉                                                                                     | 193/1563 [00:13<01:32, 14.78it/s, v_num=15, train_loss=4.020]

Epoch 0:  12%|████████████                                                                                     | 194/1563 [00:13<01:32, 14.84it/s, v_num=15, train_loss=4.020]

Epoch 0:  12%|████████████                                                                                     | 194/1563 [00:13<01:32, 14.79it/s, v_num=15, train_loss=4.430]

Epoch 0:  12%|████████████                                                                                     | 195/1563 [00:13<01:32, 14.84it/s, v_num=15, train_loss=4.430]

Epoch 0:  12%|████████████                                                                                     | 195/1563 [00:13<01:32, 14.79it/s, v_num=15, train_loss=4.470]

Epoch 0:  13%|████████████▏                                                                                    | 196/1563 [00:13<01:32, 14.84it/s, v_num=15, train_loss=4.470]

Epoch 0:  13%|████████████▏                                                                                    | 196/1563 [00:13<01:32, 14.79it/s, v_num=15, train_loss=4.460]

Epoch 0:  13%|████████████▏                                                                                    | 197/1563 [00:13<01:32, 14.84it/s, v_num=15, train_loss=4.460]

Epoch 0:  13%|████████████▏                                                                                    | 197/1563 [00:13<01:32, 14.79it/s, v_num=15, train_loss=4.310]

Epoch 0:  13%|████████████▎                                                                                    | 198/1563 [00:13<01:31, 14.84it/s, v_num=15, train_loss=4.310]

Epoch 0:  13%|████████████▎                                                                                    | 198/1563 [00:13<01:32, 14.79it/s, v_num=15, train_loss=4.170]

Epoch 0:  13%|████████████▎                                                                                    | 199/1563 [00:13<01:31, 14.84it/s, v_num=15, train_loss=4.170]

Epoch 0:  13%|████████████▎                                                                                    | 199/1563 [00:13<01:32, 14.79it/s, v_num=15, train_loss=4.070]

Epoch 0:  13%|████████████▍                                                                                    | 200/1563 [00:13<01:31, 14.85it/s, v_num=15, train_loss=4.070]

Epoch 0:  13%|████████████▍                                                                                    | 200/1563 [00:13<01:32, 14.79it/s, v_num=15, train_loss=4.240]

Epoch 0:  13%|████████████▍                                                                                    | 201/1563 [00:13<01:31, 14.84it/s, v_num=15, train_loss=4.240]

Epoch 0:  13%|████████████▍                                                                                    | 201/1563 [00:13<01:32, 14.79it/s, v_num=15, train_loss=4.450]

Epoch 0:  13%|████████████▌                                                                                    | 202/1563 [00:13<01:31, 14.84it/s, v_num=15, train_loss=4.450]

Epoch 0:  13%|████████████▌                                                                                    | 202/1563 [00:13<01:32, 14.79it/s, v_num=15, train_loss=4.370]

Epoch 0:  13%|████████████▌                                                                                    | 203/1563 [00:13<01:31, 14.84it/s, v_num=15, train_loss=4.370]

Epoch 0:  13%|████████████▌                                                                                    | 203/1563 [00:13<01:31, 14.79it/s, v_num=15, train_loss=4.190]

Epoch 0:  13%|████████████▋                                                                                    | 204/1563 [00:13<01:31, 14.84it/s, v_num=15, train_loss=4.190]

Epoch 0:  13%|████████████▋                                                                                    | 204/1563 [00:13<01:31, 14.79it/s, v_num=15, train_loss=4.370]

Epoch 0:  13%|████████████▋                                                                                    | 205/1563 [00:13<01:31, 14.84it/s, v_num=15, train_loss=4.370]

Epoch 0:  13%|████████████▋                                                                                    | 205/1563 [00:13<01:31, 14.79it/s, v_num=15, train_loss=4.570]

Epoch 0:  13%|████████████▊                                                                                    | 206/1563 [00:13<01:31, 14.84it/s, v_num=15, train_loss=4.570]

Epoch 0:  13%|████████████▊                                                                                    | 206/1563 [00:13<01:31, 14.80it/s, v_num=15, train_loss=3.870]

Epoch 0:  13%|████████████▊                                                                                    | 207/1563 [00:13<01:31, 14.84it/s, v_num=15, train_loss=3.870]

Epoch 0:  13%|████████████▊                                                                                    | 207/1563 [00:13<01:31, 14.80it/s, v_num=15, train_loss=4.210]

Epoch 0:  13%|████████████▉                                                                                    | 208/1563 [00:14<01:31, 14.84it/s, v_num=15, train_loss=4.210]

Epoch 0:  13%|████████████▉                                                                                    | 208/1563 [00:14<01:31, 14.80it/s, v_num=15, train_loss=4.030]

Epoch 0:  13%|████████████▉                                                                                    | 209/1563 [00:14<01:31, 14.84it/s, v_num=15, train_loss=4.030]

Epoch 0:  13%|████████████▉                                                                                    | 209/1563 [00:14<01:31, 14.80it/s, v_num=15, train_loss=4.360]

Epoch 0:  13%|█████████████                                                                                    | 210/1563 [00:14<01:31, 14.83it/s, v_num=15, train_loss=4.360]

Epoch 0:  13%|█████████████                                                                                    | 210/1563 [00:14<01:31, 14.80it/s, v_num=15, train_loss=4.240]

Epoch 0:  13%|█████████████                                                                                    | 211/1563 [00:14<01:31, 14.84it/s, v_num=15, train_loss=4.240]

Epoch 0:  13%|█████████████                                                                                    | 211/1563 [00:14<01:31, 14.80it/s, v_num=15, train_loss=4.150]

Epoch 0:  14%|█████████████▏                                                                                   | 212/1563 [00:14<01:31, 14.84it/s, v_num=15, train_loss=4.150]

Epoch 0:  14%|█████████████▏                                                                                   | 212/1563 [00:14<01:31, 14.80it/s, v_num=15, train_loss=4.200]

Epoch 0:  14%|█████████████▏                                                                                   | 213/1563 [00:14<01:30, 14.85it/s, v_num=15, train_loss=4.200]

Epoch 0:  14%|█████████████▏                                                                                   | 213/1563 [00:14<01:31, 14.80it/s, v_num=15, train_loss=4.150]

Epoch 0:  14%|█████████████▎                                                                                   | 214/1563 [00:14<01:30, 14.85it/s, v_num=15, train_loss=4.150]

Epoch 0:  14%|█████████████▎                                                                                   | 214/1563 [00:14<01:31, 14.80it/s, v_num=15, train_loss=4.360]

Epoch 0:  14%|█████████████▎                                                                                   | 215/1563 [00:14<01:30, 14.85it/s, v_num=15, train_loss=4.360]

Epoch 0:  14%|█████████████▎                                                                                   | 215/1563 [00:14<01:31, 14.80it/s, v_num=15, train_loss=3.900]

Epoch 0:  14%|█████████████▍                                                                                   | 216/1563 [00:14<01:30, 14.85it/s, v_num=15, train_loss=3.900]

Epoch 0:  14%|█████████████▍                                                                                   | 216/1563 [00:14<01:30, 14.80it/s, v_num=15, train_loss=4.060]

Epoch 0:  14%|█████████████▍                                                                                   | 217/1563 [00:14<01:30, 14.85it/s, v_num=15, train_loss=4.060]

Epoch 0:  14%|█████████████▍                                                                                   | 217/1563 [00:14<01:30, 14.80it/s, v_num=15, train_loss=4.260]

Epoch 0:  14%|█████████████▌                                                                                   | 218/1563 [00:14<01:30, 14.85it/s, v_num=15, train_loss=4.260]

Epoch 0:  14%|█████████████▌                                                                                   | 218/1563 [00:14<01:30, 14.80it/s, v_num=15, train_loss=4.010]

Epoch 0:  14%|█████████████▌                                                                                   | 219/1563 [00:14<01:30, 14.85it/s, v_num=15, train_loss=4.010]

Epoch 0:  14%|█████████████▌                                                                                   | 219/1563 [00:14<01:30, 14.81it/s, v_num=15, train_loss=4.180]

Epoch 0:  14%|█████████████▋                                                                                   | 220/1563 [00:14<01:30, 14.85it/s, v_num=15, train_loss=4.180]

Epoch 0:  14%|█████████████▋                                                                                   | 220/1563 [00:14<01:30, 14.81it/s, v_num=15, train_loss=4.240]

Epoch 0:  14%|█████████████▋                                                                                   | 221/1563 [00:14<01:30, 14.85it/s, v_num=15, train_loss=4.240]

Epoch 0:  14%|█████████████▋                                                                                   | 221/1563 [00:14<01:30, 14.81it/s, v_num=15, train_loss=4.300]

Epoch 0:  14%|█████████████▊                                                                                   | 222/1563 [00:14<01:30, 14.85it/s, v_num=15, train_loss=4.300]

Epoch 0:  14%|█████████████▊                                                                                   | 222/1563 [00:14<01:30, 14.81it/s, v_num=15, train_loss=4.510]

Epoch 0:  14%|█████████████▊                                                                                   | 223/1563 [00:15<01:30, 14.85it/s, v_num=15, train_loss=4.510]

Epoch 0:  14%|█████████████▊                                                                                   | 223/1563 [00:15<01:30, 14.81it/s, v_num=15, train_loss=4.260]

Epoch 0:  14%|█████████████▉                                                                                   | 224/1563 [00:15<01:30, 14.85it/s, v_num=15, train_loss=4.260]

Epoch 0:  14%|█████████████▉                                                                                   | 224/1563 [00:15<01:30, 14.81it/s, v_num=15, train_loss=4.160]

Epoch 0:  14%|█████████████▉                                                                                   | 225/1563 [00:15<01:30, 14.85it/s, v_num=15, train_loss=4.160]

Epoch 0:  14%|█████████████▉                                                                                   | 225/1563 [00:15<01:30, 14.81it/s, v_num=15, train_loss=4.440]

Epoch 0:  14%|██████████████                                                                                   | 226/1563 [00:15<01:30, 14.85it/s, v_num=15, train_loss=4.440]

Epoch 0:  14%|██████████████                                                                                   | 226/1563 [00:15<01:30, 14.81it/s, v_num=15, train_loss=4.200]

Epoch 0:  15%|██████████████                                                                                   | 227/1563 [00:15<01:29, 14.85it/s, v_num=15, train_loss=4.200]

Epoch 0:  15%|██████████████                                                                                   | 227/1563 [00:15<01:30, 14.81it/s, v_num=15, train_loss=3.980]

Epoch 0:  15%|██████████████▏                                                                                  | 228/1563 [00:15<01:29, 14.86it/s, v_num=15, train_loss=3.980]

Epoch 0:  15%|██████████████▏                                                                                  | 228/1563 [00:15<01:30, 14.81it/s, v_num=15, train_loss=4.360]

Epoch 0:  15%|██████████████▏                                                                                  | 229/1563 [00:15<01:29, 14.86it/s, v_num=15, train_loss=4.360]

Epoch 0:  15%|██████████████▏                                                                                  | 229/1563 [00:15<01:30, 14.81it/s, v_num=15, train_loss=4.210]

Epoch 0:  15%|██████████████▎                                                                                  | 230/1563 [00:15<01:29, 14.86it/s, v_num=15, train_loss=4.210]

Epoch 0:  15%|██████████████▎                                                                                  | 230/1563 [00:15<01:29, 14.81it/s, v_num=15, train_loss=4.060]

Epoch 0:  15%|██████████████▎                                                                                  | 231/1563 [00:15<01:29, 14.86it/s, v_num=15, train_loss=4.060]

Epoch 0:  15%|██████████████▎                                                                                  | 231/1563 [00:15<01:29, 14.81it/s, v_num=15, train_loss=4.320]

Epoch 0:  15%|██████████████▍                                                                                  | 232/1563 [00:15<01:29, 14.86it/s, v_num=15, train_loss=4.320]

Epoch 0:  15%|██████████████▍                                                                                  | 232/1563 [00:15<01:29, 14.82it/s, v_num=15, train_loss=4.150]

Epoch 0:  15%|██████████████▍                                                                                  | 233/1563 [00:15<01:29, 14.86it/s, v_num=15, train_loss=4.150]

Epoch 0:  15%|██████████████▍                                                                                  | 233/1563 [00:15<01:29, 14.82it/s, v_num=15, train_loss=4.160]

Epoch 0:  15%|██████████████▌                                                                                  | 234/1563 [00:15<01:29, 14.86it/s, v_num=15, train_loss=4.160]

Epoch 0:  15%|██████████████▌                                                                                  | 234/1563 [00:15<01:29, 14.82it/s, v_num=15, train_loss=4.280]

Epoch 0:  15%|██████████████▌                                                                                  | 235/1563 [00:15<01:29, 14.86it/s, v_num=15, train_loss=4.280]

Epoch 0:  15%|██████████████▌                                                                                  | 235/1563 [00:15<01:29, 14.82it/s, v_num=15, train_loss=4.050]

Epoch 0:  15%|██████████████▋                                                                                  | 236/1563 [00:15<01:29, 14.86it/s, v_num=15, train_loss=4.050]

Epoch 0:  15%|██████████████▋                                                                                  | 236/1563 [00:15<01:29, 14.82it/s, v_num=15, train_loss=4.110]

Epoch 0:  15%|██████████████▋                                                                                  | 237/1563 [00:15<01:29, 14.86it/s, v_num=15, train_loss=4.110]

Epoch 0:  15%|██████████████▋                                                                                  | 237/1563 [00:15<01:29, 14.82it/s, v_num=15, train_loss=3.990]

Epoch 0:  15%|██████████████▊                                                                                  | 238/1563 [00:16<01:29, 14.86it/s, v_num=15, train_loss=3.990]

Epoch 0:  15%|██████████████▊                                                                                  | 238/1563 [00:16<01:29, 14.82it/s, v_num=15, train_loss=4.170]

Epoch 0:  15%|██████████████▊                                                                                  | 239/1563 [00:16<01:29, 14.87it/s, v_num=15, train_loss=4.170]

Epoch 0:  15%|██████████████▊                                                                                  | 239/1563 [00:16<01:29, 14.82it/s, v_num=15, train_loss=4.410]

Epoch 0:  15%|██████████████▉                                                                                  | 240/1563 [00:16<01:28, 14.87it/s, v_num=15, train_loss=4.410]

Epoch 0:  15%|██████████████▉                                                                                  | 240/1563 [00:16<01:29, 14.82it/s, v_num=15, train_loss=4.190]

Epoch 0:  15%|██████████████▉                                                                                  | 241/1563 [00:16<01:28, 14.87it/s, v_num=15, train_loss=4.190]

Epoch 0:  15%|██████████████▉                                                                                  | 241/1563 [00:16<01:29, 14.83it/s, v_num=15, train_loss=4.290]

Epoch 0:  15%|███████████████                                                                                  | 242/1563 [00:16<01:28, 14.87it/s, v_num=15, train_loss=4.290]

Epoch 0:  15%|███████████████                                                                                  | 242/1563 [00:16<01:29, 14.83it/s, v_num=15, train_loss=3.960]

Epoch 0:  16%|███████████████                                                                                  | 243/1563 [00:16<01:28, 14.87it/s, v_num=15, train_loss=3.960]

Epoch 0:  16%|███████████████                                                                                  | 243/1563 [00:16<01:29, 14.83it/s, v_num=15, train_loss=4.290]

Epoch 0:  16%|███████████████▏                                                                                 | 244/1563 [00:16<01:28, 14.87it/s, v_num=15, train_loss=4.290]

Epoch 0:  16%|███████████████▏                                                                                 | 244/1563 [00:16<01:28, 14.83it/s, v_num=15, train_loss=4.210]

Epoch 0:  16%|███████████████▏                                                                                 | 245/1563 [00:16<01:28, 14.87it/s, v_num=15, train_loss=4.210]

Epoch 0:  16%|███████████████▏                                                                                 | 245/1563 [00:16<01:28, 14.83it/s, v_num=15, train_loss=4.250]

Epoch 0:  16%|███████████████▎                                                                                 | 246/1563 [00:16<01:28, 14.87it/s, v_num=15, train_loss=4.250]

Epoch 0:  16%|███████████████▎                                                                                 | 246/1563 [00:16<01:28, 14.83it/s, v_num=15, train_loss=4.230]

Epoch 0:  16%|███████████████▎                                                                                 | 247/1563 [00:16<01:28, 14.87it/s, v_num=15, train_loss=4.230]

Epoch 0:  16%|███████████████▎                                                                                 | 247/1563 [00:16<01:28, 14.83it/s, v_num=15, train_loss=4.270]

Epoch 0:  16%|███████████████▍                                                                                 | 248/1563 [00:16<01:28, 14.87it/s, v_num=15, train_loss=4.270]

Epoch 0:  16%|███████████████▍                                                                                 | 248/1563 [00:16<01:28, 14.83it/s, v_num=15, train_loss=4.090]

Epoch 0:  16%|███████████████▍                                                                                 | 249/1563 [00:16<01:28, 14.87it/s, v_num=15, train_loss=4.090]

Epoch 0:  16%|███████████████▍                                                                                 | 249/1563 [00:16<01:28, 14.83it/s, v_num=15, train_loss=4.030]

Epoch 0:  16%|███████████████▌                                                                                 | 250/1563 [00:16<01:28, 14.87it/s, v_num=15, train_loss=4.030]

Epoch 0:  16%|███████████████▌                                                                                 | 250/1563 [00:16<01:28, 14.83it/s, v_num=15, train_loss=4.260]

Epoch 0:  16%|███████████████▌                                                                                 | 251/1563 [00:16<01:28, 14.87it/s, v_num=15, train_loss=4.260]

Epoch 0:  16%|███████████████▌                                                                                 | 251/1563 [00:16<01:28, 14.83it/s, v_num=15, train_loss=4.110]

Epoch 0:  16%|███████████████▋                                                                                 | 252/1563 [00:16<01:28, 14.88it/s, v_num=15, train_loss=4.110]

Epoch 0:  16%|███████████████▋                                                                                 | 252/1563 [00:16<01:28, 14.83it/s, v_num=15, train_loss=4.010]

Epoch 0:  16%|███████████████▋                                                                                 | 253/1563 [00:17<01:28, 14.87it/s, v_num=15, train_loss=4.010]

Epoch 0:  16%|███████████████▋                                                                                 | 253/1563 [00:17<01:28, 14.83it/s, v_num=15, train_loss=4.000]

Epoch 0:  16%|███████████████▊                                                                                 | 254/1563 [00:17<01:27, 14.88it/s, v_num=15, train_loss=4.000]

Epoch 0:  16%|███████████████▊                                                                                 | 254/1563 [00:17<01:28, 14.84it/s, v_num=15, train_loss=3.890]

Epoch 0:  16%|███████████████▊                                                                                 | 255/1563 [00:17<01:27, 14.88it/s, v_num=15, train_loss=3.890]

Epoch 0:  16%|███████████████▊                                                                                 | 255/1563 [00:17<01:28, 14.84it/s, v_num=15, train_loss=4.600]

Epoch 0:  16%|███████████████▉                                                                                 | 256/1563 [00:17<01:27, 14.88it/s, v_num=15, train_loss=4.600]

Epoch 0:  16%|███████████████▉                                                                                 | 256/1563 [00:17<01:28, 14.84it/s, v_num=15, train_loss=4.360]

Epoch 0:  16%|███████████████▉                                                                                 | 257/1563 [00:17<01:27, 14.88it/s, v_num=15, train_loss=4.360]

Epoch 0:  16%|███████████████▉                                                                                 | 257/1563 [00:17<01:28, 14.84it/s, v_num=15, train_loss=4.430]

Epoch 0:  17%|████████████████                                                                                 | 258/1563 [00:17<01:27, 14.85it/s, v_num=15, train_loss=4.430]

Epoch 0:  17%|████████████████                                                                                 | 258/1563 [00:17<01:28, 14.83it/s, v_num=15, train_loss=4.120]

Epoch 0:  17%|████████████████                                                                                 | 259/1563 [00:17<01:27, 14.85it/s, v_num=15, train_loss=4.120]

Epoch 0:  17%|████████████████                                                                                 | 259/1563 [00:17<01:27, 14.82it/s, v_num=15, train_loss=3.940]

Epoch 0:  17%|████████████████▏                                                                                | 260/1563 [00:17<01:27, 14.86it/s, v_num=15, train_loss=3.940]

Epoch 0:  17%|████████████████▏                                                                                | 260/1563 [00:17<01:27, 14.82it/s, v_num=15, train_loss=4.160]

Epoch 0:  17%|████████████████▏                                                                                | 261/1563 [00:17<01:27, 14.86it/s, v_num=15, train_loss=4.160]

Epoch 0:  17%|████████████████▏                                                                                | 261/1563 [00:17<01:27, 14.82it/s, v_num=15, train_loss=4.270]

Epoch 0:  17%|████████████████▎                                                                                | 262/1563 [00:17<01:27, 14.86it/s, v_num=15, train_loss=4.270]

Epoch 0:  17%|████████████████▎                                                                                | 262/1563 [00:17<01:27, 14.82it/s, v_num=15, train_loss=4.210]

Epoch 0:  17%|████████████████▎                                                                                | 263/1563 [00:17<01:27, 14.86it/s, v_num=15, train_loss=4.210]

Epoch 0:  17%|████████████████▎                                                                                | 263/1563 [00:17<01:27, 14.82it/s, v_num=15, train_loss=4.300]

Epoch 0:  17%|████████████████▍                                                                                | 264/1563 [00:17<01:27, 14.86it/s, v_num=15, train_loss=4.300]

Epoch 0:  17%|████████████████▍                                                                                | 264/1563 [00:17<01:27, 14.82it/s, v_num=15, train_loss=4.090]

Epoch 0:  17%|████████████████▍                                                                                | 265/1563 [00:17<01:27, 14.86it/s, v_num=15, train_loss=4.090]

Epoch 0:  17%|████████████████▍                                                                                | 265/1563 [00:17<01:27, 14.82it/s, v_num=15, train_loss=4.400]

Epoch 0:  17%|████████████████▌                                                                                | 266/1563 [00:17<01:27, 14.86it/s, v_num=15, train_loss=4.400]

Epoch 0:  17%|████████████████▌                                                                                | 266/1563 [00:17<01:27, 14.82it/s, v_num=15, train_loss=4.090]

Epoch 0:  17%|████████████████▌                                                                                | 267/1563 [00:17<01:27, 14.86it/s, v_num=15, train_loss=4.090]

Epoch 0:  17%|████████████████▌                                                                                | 267/1563 [00:18<01:27, 14.82it/s, v_num=15, train_loss=4.210]

Epoch 0:  17%|████████████████▋                                                                                | 268/1563 [00:18<01:27, 14.87it/s, v_num=15, train_loss=4.210]

Epoch 0:  17%|████████████████▋                                                                                | 268/1563 [00:18<01:27, 14.83it/s, v_num=15, train_loss=4.210]

Epoch 0:  17%|████████████████▋                                                                                | 269/1563 [00:18<01:27, 14.87it/s, v_num=15, train_loss=4.210]

Epoch 0:  17%|████████████████▋                                                                                | 269/1563 [00:18<01:27, 14.83it/s, v_num=15, train_loss=4.300]

Epoch 0:  17%|████████████████▊                                                                                | 270/1563 [00:18<01:26, 14.87it/s, v_num=15, train_loss=4.300]

Epoch 0:  17%|████████████████▊                                                                                | 270/1563 [00:18<01:27, 14.83it/s, v_num=15, train_loss=4.180]

Epoch 0:  17%|████████████████▊                                                                                | 271/1563 [00:18<01:26, 14.87it/s, v_num=15, train_loss=4.180]

Epoch 0:  17%|████████████████▊                                                                                | 271/1563 [00:18<01:27, 14.83it/s, v_num=15, train_loss=4.220]

Epoch 0:  17%|████████████████▉                                                                                | 272/1563 [00:18<01:26, 14.87it/s, v_num=15, train_loss=4.220]

Epoch 0:  17%|████████████████▉                                                                                | 272/1563 [00:18<01:27, 14.83it/s, v_num=15, train_loss=4.220]

Epoch 0:  17%|████████████████▉                                                                                | 273/1563 [00:18<01:26, 14.87it/s, v_num=15, train_loss=4.220]

Epoch 0:  17%|████████████████▉                                                                                | 273/1563 [00:18<01:26, 14.83it/s, v_num=15, train_loss=4.230]

Epoch 0:  18%|█████████████████                                                                                | 274/1563 [00:18<01:26, 14.87it/s, v_num=15, train_loss=4.230]

Epoch 0:  18%|█████████████████                                                                                | 274/1563 [00:18<01:26, 14.83it/s, v_num=15, train_loss=4.140]

Epoch 0:  18%|█████████████████                                                                                | 275/1563 [00:18<01:26, 14.87it/s, v_num=15, train_loss=4.140]

Epoch 0:  18%|█████████████████                                                                                | 275/1563 [00:18<01:26, 14.83it/s, v_num=15, train_loss=3.910]

Epoch 0:  18%|█████████████████▏                                                                               | 276/1563 [00:18<01:26, 14.87it/s, v_num=15, train_loss=3.910]

Epoch 0:  18%|█████████████████▏                                                                               | 276/1563 [00:18<01:26, 14.83it/s, v_num=15, train_loss=4.300]

Epoch 0:  18%|█████████████████▏                                                                               | 277/1563 [00:18<01:26, 14.87it/s, v_num=15, train_loss=4.300]

Epoch 0:  18%|█████████████████▏                                                                               | 277/1563 [00:18<01:26, 14.83it/s, v_num=15, train_loss=4.170]

Epoch 0:  18%|█████████████████▎                                                                               | 278/1563 [00:18<01:26, 14.87it/s, v_num=15, train_loss=4.170]

Epoch 0:  18%|█████████████████▎                                                                               | 278/1563 [00:18<01:26, 14.84it/s, v_num=15, train_loss=4.470]

Epoch 0:  18%|█████████████████▎                                                                               | 279/1563 [00:18<01:26, 14.87it/s, v_num=15, train_loss=4.470]

Epoch 0:  18%|█████████████████▎                                                                               | 279/1563 [00:18<01:26, 14.84it/s, v_num=15, train_loss=4.520]

Epoch 0:  18%|█████████████████▍                                                                               | 280/1563 [00:18<01:26, 14.87it/s, v_num=15, train_loss=4.520]

Epoch 0:  18%|█████████████████▍                                                                               | 280/1563 [00:18<01:26, 14.84it/s, v_num=15, train_loss=4.000]

Epoch 0:  18%|█████████████████▍                                                                               | 281/1563 [00:18<01:26, 14.87it/s, v_num=15, train_loss=4.000]

Epoch 0:  18%|█████████████████▍                                                                               | 281/1563 [00:18<01:26, 14.84it/s, v_num=15, train_loss=4.260]

Epoch 0:  18%|█████████████████▌                                                                               | 282/1563 [00:18<01:26, 14.87it/s, v_num=15, train_loss=4.260]

Epoch 0:  18%|█████████████████▌                                                                               | 282/1563 [00:19<01:26, 14.84it/s, v_num=15, train_loss=4.070]

Epoch 0:  18%|█████████████████▌                                                                               | 283/1563 [00:19<01:26, 14.88it/s, v_num=15, train_loss=4.070]

Epoch 0:  18%|█████████████████▌                                                                               | 283/1563 [00:19<01:26, 14.84it/s, v_num=15, train_loss=4.470]

Epoch 0:  18%|█████████████████▋                                                                               | 284/1563 [00:19<01:25, 14.88it/s, v_num=15, train_loss=4.470]

Epoch 0:  18%|█████████████████▋                                                                               | 284/1563 [00:19<01:26, 14.84it/s, v_num=15, train_loss=3.960]

Epoch 0:  18%|█████████████████▋                                                                               | 285/1563 [00:19<01:25, 14.88it/s, v_num=15, train_loss=3.960]

Epoch 0:  18%|█████████████████▋                                                                               | 285/1563 [00:19<01:26, 14.84it/s, v_num=15, train_loss=4.230]

Epoch 0:  18%|█████████████████▋                                                                               | 286/1563 [00:19<01:25, 14.88it/s, v_num=15, train_loss=4.230]

Epoch 0:  18%|█████████████████▋                                                                               | 286/1563 [00:19<01:26, 14.84it/s, v_num=15, train_loss=4.120]

Epoch 0:  18%|█████████████████▊                                                                               | 287/1563 [00:19<01:25, 14.88it/s, v_num=15, train_loss=4.120]

Epoch 0:  18%|█████████████████▊                                                                               | 287/1563 [00:19<01:25, 14.84it/s, v_num=15, train_loss=3.930]

Epoch 0:  18%|█████████████████▊                                                                               | 288/1563 [00:19<01:25, 14.88it/s, v_num=15, train_loss=3.930]

Epoch 0:  18%|█████████████████▊                                                                               | 288/1563 [00:19<01:25, 14.84it/s, v_num=15, train_loss=4.300]

Epoch 0:  18%|█████████████████▉                                                                               | 289/1563 [00:19<01:25, 14.88it/s, v_num=15, train_loss=4.300]

Epoch 0:  18%|█████████████████▉                                                                               | 289/1563 [00:19<01:25, 14.84it/s, v_num=15, train_loss=4.390]

Epoch 0:  19%|█████████████████▉                                                                               | 290/1563 [00:19<01:25, 14.88it/s, v_num=15, train_loss=4.390]

Epoch 0:  19%|█████████████████▉                                                                               | 290/1563 [00:19<01:25, 14.84it/s, v_num=15, train_loss=4.170]

Epoch 0:  19%|██████████████████                                                                               | 291/1563 [00:19<01:25, 14.88it/s, v_num=15, train_loss=4.170]

Epoch 0:  19%|██████████████████                                                                               | 291/1563 [00:19<01:25, 14.84it/s, v_num=15, train_loss=4.060]

Epoch 0:  19%|██████████████████                                                                               | 292/1563 [00:19<01:25, 14.88it/s, v_num=15, train_loss=4.060]

Epoch 0:  19%|██████████████████                                                                               | 292/1563 [00:19<01:25, 14.84it/s, v_num=15, train_loss=4.100]

Epoch 0:  19%|██████████████████▏                                                                              | 293/1563 [00:19<01:25, 14.88it/s, v_num=15, train_loss=4.100]

Epoch 0:  19%|██████████████████▏                                                                              | 293/1563 [00:19<01:25, 14.84it/s, v_num=15, train_loss=3.990]

Epoch 0:  19%|██████████████████▏                                                                              | 294/1563 [00:19<01:25, 14.88it/s, v_num=15, train_loss=3.990]

Epoch 0:  19%|██████████████████▏                                                                              | 294/1563 [00:19<01:25, 14.84it/s, v_num=15, train_loss=4.240]

Epoch 0:  19%|██████████████████▎                                                                              | 295/1563 [00:19<01:25, 14.88it/s, v_num=15, train_loss=4.240]

Epoch 0:  19%|██████████████████▎                                                                              | 295/1563 [00:19<01:25, 14.84it/s, v_num=15, train_loss=4.100]

Epoch 0:  19%|██████████████████▎                                                                              | 296/1563 [00:19<01:25, 14.88it/s, v_num=15, train_loss=4.100]

Epoch 0:  19%|██████████████████▎                                                                              | 296/1563 [00:19<01:25, 14.84it/s, v_num=15, train_loss=4.270]

Epoch 0:  19%|██████████████████▍                                                                              | 297/1563 [00:19<01:25, 14.88it/s, v_num=15, train_loss=4.270]

Epoch 0:  19%|██████████████████▍                                                                              | 297/1563 [00:20<01:25, 14.84it/s, v_num=15, train_loss=4.260]

Epoch 0:  19%|██████████████████▍                                                                              | 298/1563 [00:20<01:25, 14.88it/s, v_num=15, train_loss=4.260]

Epoch 0:  19%|██████████████████▍                                                                              | 298/1563 [00:20<01:25, 14.84it/s, v_num=15, train_loss=4.270]

Epoch 0:  19%|██████████████████▌                                                                              | 299/1563 [00:20<01:24, 14.88it/s, v_num=15, train_loss=4.270]

Epoch 0:  19%|██████████████████▌                                                                              | 299/1563 [00:20<01:25, 14.85it/s, v_num=15, train_loss=4.360]

Epoch 0:  19%|██████████████████▌                                                                              | 300/1563 [00:20<01:24, 14.88it/s, v_num=15, train_loss=4.360]

Epoch 0:  19%|██████████████████▌                                                                              | 300/1563 [00:20<01:25, 14.85it/s, v_num=15, train_loss=4.220]

Epoch 0:  19%|██████████████████▋                                                                              | 301/1563 [00:20<01:24, 14.88it/s, v_num=15, train_loss=4.220]

Epoch 0:  19%|██████████████████▋                                                                              | 301/1563 [00:20<01:24, 14.85it/s, v_num=15, train_loss=4.210]

Epoch 0:  19%|██████████████████▋                                                                              | 302/1563 [00:20<01:24, 14.88it/s, v_num=15, train_loss=4.210]

Epoch 0:  19%|██████████████████▋                                                                              | 302/1563 [00:20<01:24, 14.85it/s, v_num=15, train_loss=4.060]

Epoch 0:  19%|██████████████████▊                                                                              | 303/1563 [00:20<01:24, 14.88it/s, v_num=15, train_loss=4.060]

Epoch 0:  19%|██████████████████▊                                                                              | 303/1563 [00:20<01:24, 14.85it/s, v_num=15, train_loss=4.210]

Epoch 0:  19%|██████████████████▊                                                                              | 304/1563 [00:20<01:24, 14.88it/s, v_num=15, train_loss=4.210]

Epoch 0:  19%|██████████████████▊                                                                              | 304/1563 [00:20<01:24, 14.85it/s, v_num=15, train_loss=4.350]

Epoch 0:  20%|██████████████████▉                                                                              | 305/1563 [00:20<01:24, 14.89it/s, v_num=15, train_loss=4.350]

Epoch 0:  20%|██████████████████▉                                                                              | 305/1563 [00:20<01:24, 14.85it/s, v_num=15, train_loss=4.050]

Epoch 0:  20%|██████████████████▉                                                                              | 306/1563 [00:20<01:24, 14.88it/s, v_num=15, train_loss=4.050]

Epoch 0:  20%|██████████████████▉                                                                              | 306/1563 [00:20<01:24, 14.85it/s, v_num=15, train_loss=4.580]

Epoch 0:  20%|███████████████████                                                                              | 307/1563 [00:20<01:24, 14.88it/s, v_num=15, train_loss=4.580]

Epoch 0:  20%|███████████████████                                                                              | 307/1563 [00:20<01:24, 14.85it/s, v_num=15, train_loss=4.190]

Epoch 0:  20%|███████████████████                                                                              | 308/1563 [00:20<01:24, 14.88it/s, v_num=15, train_loss=4.190]

Epoch 0:  20%|███████████████████                                                                              | 308/1563 [00:20<01:24, 14.85it/s, v_num=15, train_loss=3.990]

Epoch 0:  20%|███████████████████▏                                                                             | 309/1563 [00:20<01:24, 14.88it/s, v_num=15, train_loss=3.990]

Epoch 0:  20%|███████████████████▏                                                                             | 309/1563 [00:20<01:24, 14.85it/s, v_num=15, train_loss=4.130]

Epoch 0:  20%|███████████████████▏                                                                             | 310/1563 [00:20<01:24, 14.88it/s, v_num=15, train_loss=4.130]

Epoch 0:  20%|███████████████████▏                                                                             | 310/1563 [00:20<01:24, 14.85it/s, v_num=15, train_loss=3.990]

Epoch 0:  20%|███████████████████▎                                                                             | 311/1563 [00:20<01:24, 14.88it/s, v_num=15, train_loss=3.990]

Epoch 0:  20%|███████████████████▎                                                                             | 311/1563 [00:20<01:24, 14.85it/s, v_num=15, train_loss=3.840]

Epoch 0:  20%|███████████████████▎                                                                             | 312/1563 [00:20<01:24, 14.89it/s, v_num=15, train_loss=3.840]

Epoch 0:  20%|███████████████████▎                                                                             | 312/1563 [00:21<01:24, 14.85it/s, v_num=15, train_loss=4.220]

Epoch 0:  20%|███████████████████▍                                                                             | 313/1563 [00:21<01:23, 14.89it/s, v_num=15, train_loss=4.220]

Epoch 0:  20%|███████████████████▍                                                                             | 313/1563 [00:21<01:24, 14.85it/s, v_num=15, train_loss=4.170]

Epoch 0:  20%|███████████████████▍                                                                             | 314/1563 [00:21<01:23, 14.89it/s, v_num=15, train_loss=4.170]

Epoch 0:  20%|███████████████████▍                                                                             | 314/1563 [00:21<01:24, 14.85it/s, v_num=15, train_loss=4.030]

Epoch 0:  20%|███████████████████▌                                                                             | 315/1563 [00:21<01:23, 14.88it/s, v_num=15, train_loss=4.030]

Epoch 0:  20%|███████████████████▌                                                                             | 315/1563 [00:21<01:24, 14.85it/s, v_num=15, train_loss=4.080]

Epoch 0:  20%|███████████████████▌                                                                             | 316/1563 [00:21<01:23, 14.88it/s, v_num=15, train_loss=4.080]

Epoch 0:  20%|███████████████████▌                                                                             | 316/1563 [00:21<01:23, 14.85it/s, v_num=15, train_loss=4.430]

Epoch 0:  20%|███████████████████▋                                                                             | 317/1563 [00:21<01:23, 14.88it/s, v_num=15, train_loss=4.430]

Epoch 0:  20%|███████████████████▋                                                                             | 317/1563 [00:21<01:23, 14.85it/s, v_num=15, train_loss=4.230]

Epoch 0:  20%|███████████████████▋                                                                             | 318/1563 [00:21<01:23, 14.89it/s, v_num=15, train_loss=4.230]

Epoch 0:  20%|███████████████████▋                                                                             | 318/1563 [00:21<01:23, 14.85it/s, v_num=15, train_loss=4.350]

Epoch 0:  20%|███████████████████▊                                                                             | 319/1563 [00:21<01:23, 14.89it/s, v_num=15, train_loss=4.350]

Epoch 0:  20%|███████████████████▊                                                                             | 319/1563 [00:21<01:23, 14.86it/s, v_num=15, train_loss=4.030]

Epoch 0:  20%|███████████████████▊                                                                             | 320/1563 [00:21<01:23, 14.89it/s, v_num=15, train_loss=4.030]

Epoch 0:  20%|███████████████████▊                                                                             | 320/1563 [00:21<01:23, 14.86it/s, v_num=15, train_loss=4.130]

Epoch 0:  21%|███████████████████▉                                                                             | 321/1563 [00:21<01:23, 14.89it/s, v_num=15, train_loss=4.130]

Epoch 0:  21%|███████████████████▉                                                                             | 321/1563 [00:21<01:23, 14.86it/s, v_num=15, train_loss=4.100]

Epoch 0:  21%|███████████████████▉                                                                             | 322/1563 [00:21<01:23, 14.89it/s, v_num=15, train_loss=4.100]

Epoch 0:  21%|███████████████████▉                                                                             | 322/1563 [00:21<01:23, 14.86it/s, v_num=15, train_loss=4.690]

Epoch 0:  21%|████████████████████                                                                             | 323/1563 [00:21<01:23, 14.89it/s, v_num=15, train_loss=4.690]

Epoch 0:  21%|████████████████████                                                                             | 323/1563 [00:21<01:23, 14.86it/s, v_num=15, train_loss=4.080]

Epoch 0:  21%|████████████████████                                                                             | 324/1563 [00:21<01:23, 14.89it/s, v_num=15, train_loss=4.080]

Epoch 0:  21%|████████████████████                                                                             | 324/1563 [00:21<01:23, 14.86it/s, v_num=15, train_loss=4.070]

Epoch 0:  21%|████████████████████▏                                                                            | 325/1563 [00:21<01:23, 14.89it/s, v_num=15, train_loss=4.070]

Epoch 0:  21%|████████████████████▏                                                                            | 325/1563 [00:21<01:23, 14.86it/s, v_num=15, train_loss=4.270]

Epoch 0:  21%|████████████████████▏                                                                            | 326/1563 [00:21<01:23, 14.89it/s, v_num=15, train_loss=4.270]

Epoch 0:  21%|████████████████████▏                                                                            | 326/1563 [00:21<01:23, 14.86it/s, v_num=15, train_loss=4.110]

Epoch 0:  21%|████████████████████▎                                                                            | 327/1563 [00:21<01:23, 14.89it/s, v_num=15, train_loss=4.110]

Epoch 0:  21%|████████████████████▎                                                                            | 327/1563 [00:22<01:23, 14.86it/s, v_num=15, train_loss=4.120]

Epoch 0:  21%|████████████████████▎                                                                            | 328/1563 [00:22<01:22, 14.89it/s, v_num=15, train_loss=4.120]


Detected KeyboardInterrupt, attempting graceful shutdown ...


<span class="codehilite"><div class="highlight"><pre><span></span><span class="gt">Traceback (most recent call last):</span>
  File <span class="nb">&quot;/Users/rturnbull/MDAP/hierarchicalsoftmax/.venv/lib/python3.13/site-packages/lightning/pytorch/trainer/call.py&quot;</span>, line <span class="m">48</span>, in <span class="n">_call_and_handle_interrupt</span>
<span class="w">    </span><span class="k">return</span> <span class="n">trainer_fn</span><span class="p">(</span><span class="o">*</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span>
  File <span class="nb">&quot;/Users/rturnbull/MDAP/hierarchicalsoftmax/.venv/lib/python3.13/site-packages/lightning/pytorch/trainer/trainer.py&quot;</span>, line <span class="m">599</span>, in <span class="n">_fit_impl</span>
<span class="w">    </span><span class="bp">self</span><span class="o">.</span><span class="n">_run</span><span class="p">(</span><span cla

In [None]:
import pandas as pd
from pathlib import Path

basic_metrics_df = pd.read_csv(Path(basic_logger.log_dir) / "metrics.csv")
basic_metrics_df = basic_metrics_df.dropna(subset=["val_MulticlassAccuracy"])
basic_fig = go.Figure()
basic_fig.add_trace(go.Scatter(x=basic_metrics_df["epoch"], y=basic_metrics_df["val_MulticlassAccuracy"], mode='lines', name='class'))
basic_fig.update_layout(
    xaxis_title="Epochs",
    yaxis_title="Accuracy",
)
basic_fig.show()


<span class="codehilite"><div class="highlight"><pre><span></span><span class="gt">Traceback (most recent call last):</span>
  File <span class="nb">&quot;/Users/rturnbull/MDAP/hierarchicalsoftmax/.venv/lib/python3.13/site-packages/marimo/_runtime/executor.py&quot;</span>, line <span class="m">122</span>, in <span class="n">execute_cell</span>
<span class="w">    </span><span class="n">exec</span><span class="p">(</span><span class="n">cell</span><span class="o">.</span><span class="n">body</span><span class="p">,</span> <span class="n">glbls</span><span class="p">)</span>
<span class="w">    </span><span class="pm">~~~~^^^^^^^^^^^^^^^^^^</span>
  File <span class="nb">&quot;/var/folders/5x/vn169pd92hb1twlj61zvwpw40000gq/T/marimo_46152/__marimo__cell_emfo_.py&quot;</span>, line <span class="m">5</span>, in <span class="n">&lt;module&gt;</span>
<span class="w">    </span><span class="n">basic_metrics_df</span> <span class="o">=</span> <span class="n">basic_metrics_df</span><span class="

## Hierarchical model

Let's now create a hierarchical model.
First we need to create a tree structure for the CIFAR dataset.

In [None]:
from hierarchicalsoftmax import (
    SoftmaxNode,
    HierarchicalSoftmaxLazyLinear,
    HierarchicalSoftmaxLoss,
)
from hierarchicalsoftmax.metrics import RankAccuracyTorchMetric

if len(train_data.classes) == 10:
    # CIFAR-10
    superclasses = {
        "animals": ["bird", "cat", "deer", "dog", "frog", "horse"],
        "vehicles": ["airplane", "automobile", "ship", "truck"],
    }
else:
    # CIFAR-100
    superclasses = {
        "aquatic mammals": ["beaver", "dolphin", "otter", "seal", "whale"],
        "fish": ["aquarium_fish", "flatfish", "ray", "shark", "trout"],
        "flowers": ["orchid", "poppy", "rose", "sunflower", "tulip"],
        "food containers": ["bottle", "bowl", "can", "cup", "plate"],
        "fruit and vegetables": ["apple", "mushroom", "orange", "pear", "sweet_pepper"],
        "household electrical devices": ["clock", "keyboard", "lamp", "telephone", "television"],
        "household furniture": ["bed", "chair", "couch", "table", "wardrobe"],
        "insects": ["bee", "beetle", "butterfly", "caterpillar", "cockroach"],
        "large carnivores": ["bear", "leopard", "lion", "tiger", "wolf"],
        "large man-made outdoor things": ["bridge", "castle", "house", "road", "skyscraper"],
        "large natural outdoor scenes": ["cloud", "forest", "mountain", "plain", "sea"],
        "large omnivores and herbivores": ["camel", "cattle", "chimpanzee", "elephant", "kangaroo"],
        "medium-sized mammals": ["fox", "porcupine", "possum", "raccoon", "skunk"],
        "non-insect invertebrates": ["crab", "lobster", "snail", "spider", "worm"],
        "people": ["baby", "boy", "girl", "man", "woman"],
        "reptiles": ["crocodile", "dinosaur", "lizard", "snake", "turtle"],
        "small mammals": ["hamster", "mouse", "rabbit", "shrew", "squirrel"],
        "trees": ["maple_tree", "oak_tree", "palm_tree", "pine_tree", "willow_tree"],
        "vehicles 1": ["bicycle", "bus", "motorcycle", "pickup_truck", "train"],
        "vehicles 2": ["lawn_mower", "rocket", "streetcar", "tank", "tractor"],
    }


root = SoftmaxNode("root")
for superclass, classes in superclasses.items():
    superclass_node = SoftmaxNode(superclass, parent=root)
    for class_name in classes:
        SoftmaxNode(class_name, parent=superclass_node)

# Now that the tree is built, we can set the indexes
# This makes the tree read-only
root.set_indexes()
name_to_node_id = {node.name: root.node_to_id[node] for node in root.leaves}
index_to_node_id = {
    i: name_to_node_id[name] for i, name in enumerate(train_data.classes)
}

# Render the hierarchy
mo.Html(root.svg())

### Create DataLoaders with hierarchical labels

In [None]:
class HierarchicalDataset(torch.utils.data.Dataset):
    def __init__(self, dataset, index_to_node_id):
        self.dataset = dataset
        self.index_to_node_id = index_to_node_id

    def __getitem__(self, idx):
        image, label = self.dataset[idx]
        return image, self.index_to_node_id[label]

    def __len__(self):
        return len(self.dataset)

hierarchical_train_loader = DataLoader(HierarchicalDataset(train_data, index_to_node_id), batch_size=batch_size, shuffle=True)
hierarchical_test_loader = DataLoader(HierarchicalDataset(test_data, index_to_node_id), batch_size=batch_size, shuffle=False)

### Create the Hierarchical Image Classifier model

In [None]:
class HierarchicalImageClassifier(BasicImageClassifier):
    # Just overriding the init - keep the rest of the code
    def __init__(self, root: SoftmaxNode):
        super().__init__()
        self.model = nn.Sequential(
            model_body(),
            HierarchicalSoftmaxLazyLinear(root=root)
        )
        self.loss_fn = HierarchicalSoftmaxLoss(root)
        self.metrics = [
            RankAccuracyTorchMetric(
                root,
                {1: "superclass_accuracy", 2: "class_accuracy"},
            ),
        ]
        self.root = root

hierarchical_model = HierarchicalImageClassifier(root)        
hierarchical_model

<span class="codehilite"><div class="highlight"><pre><span></span><span class="gt">Traceback (most recent call last):</span>
  File <span class="nb">&quot;/Users/rturnbull/MDAP/hierarchicalsoftmax/.venv/lib/python3.13/site-packages/marimo/_runtime/executor.py&quot;</span>, line <span class="m">122</span>, in <span class="n">execute_cell</span>
<span class="w">    </span><span class="n">exec</span><span class="p">(</span><span class="n">cell</span><span class="o">.</span><span class="n">body</span><span class="p">,</span> <span class="n">glbls</span><span class="p">)</span>
<span class="w">    </span><span class="pm">~~~~^^^^^^^^^^^^^^^^^^</span>
  File <span class="nb">&quot;/var/folders/5x/vn169pd92hb1twlj61zvwpw40000gq/T/marimo_46152/__marimo__cell_qnkX_.py&quot;</span>, line <span class="m">18</span>, in <span class="n">&lt;module&gt;</span>
<span class="w">    </span><span class="n">hierarchical_model</span> <span class="o">=</span> <span class="n">HierarchicalImageClassifier</span

In [None]:
hierarchical_logger = CSVLogger(save_dir="lightning_logs", name="hierarchical_model")
hierarchical_trainer = L.Trainer(max_epochs=epochs, accelerator="auto", enable_checkpointing=False, logger=hierarchical_logger)
hierarchical_trainer.fit(hierarchical_model, train_dataloaders=hierarchical_train_loader, val_dataloaders=hierarchical_test_loader)

### Plot the validation results at both the superclass and the class levels

In [None]:
hierarchical_df = pd.read_csv(Path(hierarchical_logger.log_dir) / "metrics.csv")
hierarchical_df = hierarchical_df.dropna(subset=["val_class_accuracy"])
hierarchical_fig = go.Figure()
hierarchical_fig.add_trace(go.Scatter(x=hierarchical_df["epoch"], y=hierarchical_df["val_superclass_accuracy"], mode='lines', name='superclass'))
hierarchical_fig.add_trace(go.Scatter(x=hierarchical_df["epoch"], y=hierarchical_df["val_class_accuracy"], mode='lines', name='class'))
hierarchical_fig.update_layout(
    xaxis_title="Epochs",
    yaxis_title="Accuracy",
)
hierarchical_fig