#**Temporal Separation with Entropy Regularization for Knowledge Distillation**

#**Setup**

---
**Install Libraries**

In [None]:
!pip install snntorch dagshub mlflow pynvml --quiet

---
**GitHub Code**

In [None]:
from google.colab import userdata
import os

# Sets environ variables for GitHub
os.environ['GITHUB_TOKEN'] = userdata.get('GITHUB_TOKEN')
os.environ['USER'] = userdata.get('USER')

# Clones the repo and changes dir
!git clone https://${GITHUB_TOKEN}@github.com/${USER}/tser-kd.git
%cd tser-kd/

fatal: destination path 'tser-kd' already exists and is not an empty directory.
/content/tser-kd


---
**Set Seed for Experiment**

In [None]:
from tser_kd.utils import setup_seed

setup_seed(42)

Random seed: 42


---
**Device Selection**

In [None]:
import torch

# Selects the device for the experiment
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

---
**MLFlow Setup**

In [None]:
import mlflow
from mlflow import MlflowClient
import dagshub

# Sets environ variables for MLFlow
os.environ['MLFLOW_TRACKING_USERNAME'] = userdata.get('USER')
os.environ['MLFLOW_TRACKING_PASSWORD'] = userdata.get('MLFLOW_TRACKING_PASSWORD')

# Init DagsHub
dagshub.init(repo_owner='matteogianferrari', repo_name='tser-kd', mlflow=True)
TRACKING_URI = "https://dagshub.com/matteogianferrari/tser-kd.mlflow"

# Sets MLFlow tracking URI
mlflow.set_tracking_uri(TRACKING_URI)

# Sets MLFLow experiment name
experiment_name = "TSER-KD"

#**Hyperparameters**

In [None]:
# Hyperparamter dictionary
h_dict = {
    "TAU": 5.0, "ALPHA": 0.7, "GAMMA": 1e-2,                # Loss
    "BETA": 0.5, "V_th": 1.0,                               # Leaky Neuron
    "MAX_EPOCHS": 300, "BATCH_SIZE": 32,                    # Training
    "LR_SCHEDULER": "CosineAnnealingLR", "BASE_LR": 1e-4,    # LR
    "ES_PATIENCE": 300, "ES_DELTA": 1e-4,                    # Early Stopping
    "OPTIMIZER": "AdamW", "WEIGHT_DECAY": 5e-4,             # Optimizer
    "HARDWARE": "A100",                                     # GPU
    "ENCODER": "Static", "T": 2,                            # Encoder
    "AUTO_AUG": True, "CUTOUT": True,                       # Dataset
}

#**CIFAR10 Dataset**

---
**Data Loaders Creation**

In [None]:
from tser_kd.dataset import load_cifar10_data
from torch.utils.data import DataLoader


train_dataset, val_dataset, num_classes = load_cifar10_data(auto_aug=h_dict['AUTO_AUG'], cutout=h_dict['CUTOUT'])

# Creates the train and test DataLoaders
train_loader = DataLoader(train_dataset, batch_size=h_dict['BATCH_SIZE'], shuffle=True, num_workers=2)
val_loader = DataLoader(val_dataset, batch_size=h_dict['BATCH_SIZE'], shuffle=False, num_workers=2)

#**Teacher and Student Models**

---
**Models Creation**

In [None]:
from tser_kd.model.student import make_student_model
from tser_kd.model.teacher import make_teacher_model

# ANN
t_state_dict = torch.load("data/teacher_models/resnet34_ft_9708.pth", map_location="cpu")
t_model = make_teacher_model(
    arch='resnet-34',
    in_channels=3,
    num_classes=num_classes,
    device=device,
    state_dict=t_state_dict
)

# SNN
s_model = make_student_model(
    arch='sresnet-19',
    in_channels=3,
    num_classes=num_classes,
    beta=h_dict['BETA'],
    device=device
)

In [None]:
@torch.no_grad()
def load_sresnet18_backbone_into_sresnet19(res18, model):
    """
    Copies ImageNet-pretrained ResNet-18 weights into your CIFAR ResNet19 backbone.
    Leaves stem and FC layers as-is. Returns a small report dict.
    """

    def copy_conv(dst, src):
        assert dst.weight.shape == src.weight.shape, f"conv shape mismatch: {dst.weight.shape} vs {src.weight.shape}"
        dst.weight.copy_(src.weight)
        if getattr(dst, "bias", None) is not None and getattr(src, "bias", None) is not None:
            dst.bias.copy_(src.bias)

    def copy_bn(dst, src):
        assert dst.weight.shape == src.weight.shape, f"bn shape mismatch: {dst.weight.shape} vs {src.weight.shape}"
        dst.weight.copy_(src.weight)
        dst.bias.copy_(src.bias)
        dst.running_mean.copy_(src.running_mean)
        dst.running_var.copy_(src.running_var)
        if hasattr(dst, "num_batches_tracked") and hasattr(src, "num_batches_tracked"):
            dst.num_batches_tracked.copy_(src.num_batches_tracked)

    def copy_block(dst_block, src_block, copy_shortcut=False):
        copy_conv(dst_block.t_conv_bn1.layer, src_block.t_conv_bn1.layer); copy_bn(dst_block.t_conv_bn1.batch_norm, src_block.t_conv_bn1.batch_norm)
        copy_conv(dst_block.t_conv_bn2.layer, src_block.t_conv_bn2.layer); copy_bn(dst_block.t_conv_bn2.batch_norm, src_block.t_conv_bn2.batch_norm)
        if copy_shortcut and (dst_block.shortcuts is not None) and (src_block.shortcuts is not None):
            # conv + bn in the 1x1 projection
            copy_conv(dst_block.shortcuts.layer, src_block.shortcuts.layer)
            copy_bn(dst_block.shortcuts.batch_norm,  src_block.shortcuts.batch_norm)

    # ---- Mappings ----
    # block1 (128 ch): use layer2[1] (128->128 stride 1) for all three
    for i in range(3):
        copy_block(model.stages[0][i], res18.stages[1][1], copy_shortcut=False)

    # block2 (256 ch): first with downsample, then two stride-1 blocks
    copy_block(model.stages[1][0], res18.stages[2][0], copy_shortcut=True)   # 128->256, stride 2
    copy_block(model.stages[1][1], res18.stages[2][1], copy_shortcut=False)  # 256->256
    copy_block(model.stages[1][2], res18.stages[2][1], copy_shortcut=False)  # duplicate

    # block3 (512 ch): first with downsample, then stride-1
    copy_block(model.stages[2][0], res18.stages[3][0], copy_shortcut=True)   # 256->512, stride 2
    copy_block(model.stages[2][1], res18.stages[3][1], copy_shortcut=False)  # 512->512

    report = {
        "copied_from_resnet18": {
            "block1": ["layer2[1] → block1[0,1,2]"],
            "block2": ["layer3[0] → block2[0] (with shortcut)",
                       "layer3[1] → block2[1,2]"],
            "block3": ["layer4[0] → block3[0] (with shortcut)",
                       "layer4[1] → block3[1]"],
        },
        "left_random_init": ["stem conv/bn", "fc1", "fc2"],
    }
    return report


s_state_dict = torch.load("data/student_models/sresnet18_pt_9078.pth", map_location="cpu")
sres18 = make_student_model(
    arch='sresnet-18',
    in_channels=3,
    num_classes=num_classes,
    beta=h_dict["BETA"],
    device=device,
    state_dict=s_state_dict
)

load_sresnet18_backbone_into_sresnet19(sres18, s_model)

{'copied_from_resnet18': {'block1': ['layer2[1] → block1[0,1,2]'],
  'block2': ['layer3[0] → block2[0] (with shortcut)',
   'layer3[1] → block2[1,2]'],
  'block3': ['layer4[0] → block3[0] (with shortcut)',
   'layer4[1] → block3[1]']},
 'left_random_init': ['stem conv/bn', 'fc1', 'fc2']}

In [None]:
# Remove the a_model and a_state_dict
# IMPORTANT TO KEEP THE EPOCH TIME LOW
del sres18, s_state_dict
torch.cuda.empty_cache()

#**Training**

In [None]:
import torch.optim as optim
import torch.nn as nn
from tser_kd.training import EarlyStopping
from tser_kd.dataset import RateEncoder, StaticEncoder
from tser_kd.model import TSERKDLoss
from tser_kd.training.lr_scheduler import WarmupCosineLR
from tser_kd.utils import AccuracyMonitor


# Optimizer
if h_dict["OPTIMIZER"] == 'AdamW':
    optimizer = optim.AdamW(s_model.parameters(), lr=h_dict['BASE_LR'], weight_decay=h_dict['WEIGHT_DECAY'])
elif h_dict["OPTIMIZER"] == 'Adam':
    optimizer = optim.Adam(s_model.parameters(), lr=h_dict['BASE_LR'], weight_decay=h_dict['WEIGHT_DECAY'])
elif h_dict["OPTIMIZER"] == 'SGD':
    optimizer = optim.SGD(s_model.parameters(), lr=h_dict['BASE_LR'], momentum=h_dict["MOMENTUM"], weight_decay=h_dict['WEIGHT_DECAY'])

# LR scheduler
if h_dict["LR_SCHEDULER"] == 'ReduceLROnPlateau':
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=h_dict["LR_PATIENCE"], factor=h_dict["LR_FACTOR"])
elif h_dict["LR_SCHEDULER"] == 'CosineAnnealingLR':
    scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=h_dict["MAX_EPOCHS"])
elif h_dict["LR_SCHEDULER"] == 'StepLR':
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=h_dict["LR_STEP"], gamma=h_dict["LR_FACTOR"])
elif h_dict["LR_SCHEDULER"] == 'WarmupCosineLR':
    scheduler = WarmupCosineLR(
        optimizer=optimizer,
        warmup_epochs=h_dict["WARMUP_EPOCHS"],
        total_epochs=h_dict["MAX_EPOCHS"],
        base_lr=h_dict["BASE_LR"],
        max_lr=h_dict["MAX_LR"]
    )

# Losses
train_criterion = TSERKDLoss(alpha=h_dict["ALPHA"], gamma=h_dict["GAMMA"], tau=h_dict["TAU"])
eval_criterion = nn.CrossEntropyLoss()

# Accuracy monitor
acc_monitor = AccuracyMonitor(path="best_acc.pth")

# Early stopping
es_callback = EarlyStopping(patience=h_dict["ES_PATIENCE"], delta=h_dict["ES_DELTA"], path="best_loss.pth")

# Gradient scaler
scaler = torch.amp.GradScaler(device='cuda')

# Encoder
if h_dict["ENCODER"] == "Rate":
    encoder = RateEncoder(num_steps=h_dict["T"], gain=h_dict["GAIN"])
elif h_dict["ENCODER"] == "Static":
    encoder = StaticEncoder(num_steps=h_dict["T"])

In [None]:
import pynvml
from tser_kd.eval import run_eval
from tser_kd.training import run_kd_train


# Sets the MLFlow experiment
mlflow.set_experiment(experiment_name)

epoch_i = 0
START_EPOCH = 0
curr_lr = optimizer.param_groups[0]["lr"]

# Train the model and log with MLFlow
with mlflow.start_run(run_id=None, log_system_metrics=True):
    for epoch_i in range(h_dict["MAX_EPOCHS"]):
        train_total_loss, train_ce_loss, train_kl_loss, train_e_reg, train_acc, epoch_time, train_batch_time = run_kd_train(
            epoch_i, train_loader, s_model, t_model, train_criterion, optimizer, device, scaler, encoder
        )

        val_loss, val_acc1, val_acc5, val_batch_time = run_eval(
            val_loader, s_model, eval_criterion, device, encoder
        )

        # Logging
        print(
            f"Time: {epoch_time:.1f}s | Train Total Loss: {train_total_loss:.4f} | Train Acc: {train_acc:.2f}% | "
            f"Val Loss: {val_loss:.4f} | Val Acc1: {val_acc1:.2f}% | Val Acc5: {val_acc5:.2f}% | LR: {curr_lr:.6f}"
        )

        mlflow.log_metrics({
            "learning_rate": curr_lr, "train_tserkd_loss": train_total_loss, "train_tsce_loss": train_ce_loss, "train_tskl_loss": train_kl_loss,
            "train_e_reg": train_e_reg, "train_acc": train_acc, "val_ce_loss": val_loss, "val_acc1": val_acc1, "val_acc5": val_acc5, "epoch_time": epoch_time,
            "train_batch_time": train_batch_time, "val_batch_time": val_batch_time
        }, step=epoch_i)

        # Updates the LR
        if h_dict["LR_SCHEDULER"] == 'ReduceLROnPlateau':
            scheduler.step(val_loss)
        else:
            scheduler.step()

        curr_lr = optimizer.param_groups[0]["lr"]

        # ES check
        if es_callback(val_loss, epoch_i, s_model):
            break

    # Log hyperparameters
    mlflow.log_params(h_dict)

    # Log test performance
    s_model.load_state_dict(torch.load("best_acc.pth"))
    test_ce_loss, test_acc1, test_acc5, _ = run_eval(val_loader, s_model, eval_criterion, device, encoder)
    mlflow.log_metrics({"test_ce_loss": test_ce_loss, "test_acc1": test_acc1, "test_acc5": test_acc5})

2025/07/30 23:41:59 INFO mlflow.system_metrics.system_metrics_monitor: Started monitoring system metrics.
Epoch 1: 100%|██████████| Batch 1563/1563 , acc=19.01%, ce_loss=2.1198, e_reg=2.1959, kl_loss=1.1730, total_loss=21.1420


Time: 227.7s | Train Total Loss: 21.1420 | Train Acc: 19.01% | Val Loss: 2.1498 | Val Acc1: 30.25% | Val Acc5: 86.08% | LR: 0.000100


Epoch 2: 100%|██████████| Batch 1563/1563 , acc=26.34%, ce_loss=1.9539, e_reg=2.0644, kl_loss=1.0536, total_loss=19.0036


Time: 226.1s | Train Total Loss: 19.0036 | Train Acc: 26.34% | Val Loss: 2.3168 | Val Acc1: 35.73% | Val Acc5: 90.08% | LR: 0.000100


Epoch 3: 100%|██████████| Batch 1563/1563 , acc=30.39%, ce_loss=1.8777, e_reg=2.0141, kl_loss=0.9954, total_loss=17.9635


Time: 227.9s | Train Total Loss: 17.9635 | Train Acc: 30.39% | Val Loss: 2.0158 | Val Acc1: 40.31% | Val Acc5: 91.39% | LR: 0.000100


Epoch 4: 100%|██████████| Batch 1563/1563 , acc=32.17%, ce_loss=1.8325, e_reg=1.9805, kl_loss=0.9626, total_loss=17.3749


Time: 226.5s | Train Total Loss: 17.3749 | Train Acc: 32.17% | Val Loss: 2.1186 | Val Acc1: 41.68% | Val Acc5: 91.98% | LR: 0.000100


Epoch 5: 100%|██████████| Batch 1563/1563 , acc=34.02%, ce_loss=1.7877, e_reg=1.9429, kl_loss=0.9296, total_loss=16.7845


Time: 226.7s | Train Total Loss: 16.7845 | Train Acc: 34.02% | Val Loss: 2.0844 | Val Acc1: 44.98% | Val Acc5: 92.81% | LR: 0.000100


Epoch 6: 100%|██████████| Batch 1563/1563 , acc=36.21%, ce_loss=1.7456, e_reg=1.9159, kl_loss=0.8965, total_loss=16.1937


Time: 227.5s | Train Total Loss: 16.1937 | Train Acc: 36.21% | Val Loss: 2.1640 | Val Acc1: 47.69% | Val Acc5: 93.96% | LR: 0.000100


Epoch 7: 100%|██████████| Batch 1563/1563 , acc=37.90%, ce_loss=1.7096, e_reg=1.8861, kl_loss=0.8764, total_loss=15.8309


Time: 226.6s | Train Total Loss: 15.8309 | Train Acc: 37.90% | Val Loss: 2.0759 | Val Acc1: 50.62% | Val Acc5: 94.52% | LR: 0.000100


Epoch 8: 100%|██████████| Batch 1563/1563 , acc=40.86%, ce_loss=1.6495, e_reg=1.8466, kl_loss=0.8341, total_loss=15.0730


Time: 227.1s | Train Total Loss: 15.0730 | Train Acc: 40.86% | Val Loss: 2.0051 | Val Acc1: 53.29% | Val Acc5: 94.85% | LR: 0.000100


Epoch 9: 100%|██████████| Batch 1563/1563 , acc=41.85%, ce_loss=1.6245, e_reg=1.8238, kl_loss=0.8155, total_loss=14.7399


Time: 226.7s | Train Total Loss: 14.7399 | Train Acc: 41.85% | Val Loss: 2.1770 | Val Acc1: 54.17% | Val Acc5: 95.25% | LR: 0.000100


Epoch 10: 100%|██████████| Batch 1563/1563 , acc=44.12%, ce_loss=1.5796, e_reg=1.7954, kl_loss=0.7802, total_loss=14.1103


Time: 226.8s | Train Total Loss: 14.1103 | Train Acc: 44.12% | Val Loss: 2.1082 | Val Acc1: 55.10% | Val Acc5: 95.44% | LR: 0.000100


Epoch 11: 100%|██████████| Batch 1563/1563 , acc=44.38%, ce_loss=1.5699, e_reg=1.7758, kl_loss=0.7782, total_loss=14.0710


Time: 227.0s | Train Total Loss: 14.0710 | Train Acc: 44.38% | Val Loss: 1.9982 | Val Acc1: 58.42% | Val Acc5: 96.22% | LR: 0.000100


Epoch 12: 100%|██████████| Batch 1563/1563 , acc=46.52%, ce_loss=1.5215, e_reg=1.7481, kl_loss=0.7391, total_loss=13.3735


Time: 227.3s | Train Total Loss: 13.3735 | Train Acc: 46.52% | Val Loss: 2.0043 | Val Acc1: 59.28% | Val Acc5: 96.26% | LR: 0.000100


Epoch 13: 100%|██████████| Batch 1563/1563 , acc=47.02%, ce_loss=1.5041, e_reg=1.7272, kl_loss=0.7343, total_loss=13.2837


Time: 227.1s | Train Total Loss: 13.2837 | Train Acc: 47.02% | Val Loss: 2.1910 | Val Acc1: 59.53% | Val Acc5: 96.18% | LR: 0.000100


Epoch 14: 100%|██████████| Batch 1563/1563 , acc=48.96%, ce_loss=1.4637, e_reg=1.7005, kl_loss=0.7042, total_loss=12.7453


Time: 226.7s | Train Total Loss: 12.7453 | Train Acc: 48.96% | Val Loss: 1.9287 | Val Acc1: 62.47% | Val Acc5: 96.93% | LR: 0.000100


Epoch 15: 100%|██████████| Batch 1563/1563 , acc=49.90%, ce_loss=1.4360, e_reg=1.6801, kl_loss=0.6823, total_loss=12.3548


Time: 227.1s | Train Total Loss: 12.3548 | Train Acc: 49.90% | Val Loss: 2.0140 | Val Acc1: 61.64% | Val Acc5: 96.82% | LR: 0.000099


Epoch 16: 100%|██████████| Batch 1563/1563 , acc=50.44%, ce_loss=1.4263, e_reg=1.6705, kl_loss=0.6755, total_loss=12.2333


Time: 226.0s | Train Total Loss: 12.2333 | Train Acc: 50.44% | Val Loss: 1.9366 | Val Acc1: 63.34% | Val Acc5: 97.00% | LR: 0.000099


Epoch 17: 100%|██████████| Batch 1563/1563 , acc=51.30%, ce_loss=1.4030, e_reg=1.6510, kl_loss=0.6643, total_loss=12.0292


Time: 227.0s | Train Total Loss: 12.0292 | Train Acc: 51.30% | Val Loss: 1.7589 | Val Acc1: 65.02% | Val Acc5: 97.41% | LR: 0.000099


Epoch 18: 100%|██████████| Batch 1563/1563 , acc=52.73%, ce_loss=1.3747, e_reg=1.6349, kl_loss=0.6402, total_loss=11.5992


Time: 226.7s | Train Total Loss: 11.5992 | Train Acc: 52.73% | Val Loss: 1.8193 | Val Acc1: 65.79% | Val Acc5: 97.38% | LR: 0.000099


Epoch 19: 100%|██████████| Batch 1563/1563 , acc=52.82%, ce_loss=1.3671, e_reg=1.6256, kl_loss=0.6352, total_loss=11.5100


Time: 227.0s | Train Total Loss: 11.5100 | Train Acc: 52.82% | Val Loss: 1.8443 | Val Acc1: 65.67% | Val Acc5: 97.37% | LR: 0.000099


Epoch 20: 100%|██████████| Batch 1563/1563 , acc=53.62%, ce_loss=1.3518, e_reg=1.6131, kl_loss=0.6266, total_loss=11.3549


Time: 227.1s | Train Total Loss: 11.3549 | Train Acc: 53.62% | Val Loss: 1.8432 | Val Acc1: 66.87% | Val Acc5: 97.18% | LR: 0.000099


Epoch 21: 100%|██████████| Batch 1563/1563 , acc=54.22%, ce_loss=1.3369, e_reg=1.5999, kl_loss=0.6196, total_loss=11.2282


Time: 226.8s | Train Total Loss: 11.2282 | Train Acc: 54.22% | Val Loss: 1.7015 | Val Acc1: 67.90% | Val Acc5: 97.53% | LR: 0.000099


Epoch 22: 100%|██████████| Batch 1563/1563 , acc=54.66%, ce_loss=1.3221, e_reg=1.5912, kl_loss=0.6063, total_loss=10.9903


Time: 227.0s | Train Total Loss: 10.9903 | Train Acc: 54.66% | Val Loss: 1.6939 | Val Acc1: 69.65% | Val Acc5: 97.55% | LR: 0.000099


Epoch 23: 100%|██████████| Batch 1563/1563 , acc=56.59%, ce_loss=1.2787, e_reg=1.5623, kl_loss=0.5759, total_loss=10.4466


Time: 227.0s | Train Total Loss: 10.4466 | Train Acc: 56.59% | Val Loss: 1.6693 | Val Acc1: 70.31% | Val Acc5: 97.73% | LR: 0.000099


Epoch 24: 100%|██████████| Batch 1563/1563 , acc=57.66%, ce_loss=1.2539, e_reg=1.5452, kl_loss=0.5621, total_loss=10.1977


Time: 227.4s | Train Total Loss: 10.1977 | Train Acc: 57.66% | Val Loss: 1.5951 | Val Acc1: 71.16% | Val Acc5: 98.06% | LR: 0.000099


Epoch 25: 100%|██████████| Batch 1563/1563 , acc=57.29%, ce_loss=1.2647, e_reg=1.5480, kl_loss=0.5680, total_loss=10.3042


Time: 227.1s | Train Total Loss: 10.3042 | Train Acc: 57.29% | Val Loss: 1.6527 | Val Acc1: 70.34% | Val Acc5: 97.99% | LR: 0.000098


Epoch 26: 100%|██████████| Batch 1563/1563 , acc=58.29%, ce_loss=1.2362, e_reg=1.5307, kl_loss=0.5485, total_loss=9.9548


Time: 227.8s | Train Total Loss: 9.9548 | Train Acc: 58.29% | Val Loss: 1.5869 | Val Acc1: 71.86% | Val Acc5: 98.20% | LR: 0.000098


Epoch 27: 100%|██████████| Batch 1563/1563 , acc=58.26%, ce_loss=1.2448, e_reg=1.5336, kl_loss=0.5544, total_loss=10.0602


Time: 227.2s | Train Total Loss: 10.0602 | Train Acc: 58.26% | Val Loss: 1.5009 | Val Acc1: 72.28% | Val Acc5: 98.17% | LR: 0.000098


Epoch 28: 100%|██████████| Batch 1563/1563 , acc=58.17%, ce_loss=1.2415, e_reg=1.5296, kl_loss=0.5516, total_loss=10.0106


Time: 227.1s | Train Total Loss: 10.0106 | Train Acc: 58.17% | Val Loss: 1.6707 | Val Acc1: 71.51% | Val Acc5: 98.19% | LR: 0.000098


Epoch 29: 100%|██████████| Batch 1563/1563 , acc=59.35%, ce_loss=1.2119, e_reg=1.5128, kl_loss=0.5320, total_loss=9.6588


Time: 227.4s | Train Total Loss: 9.6588 | Train Acc: 59.35% | Val Loss: 1.5295 | Val Acc1: 73.23% | Val Acc5: 98.29% | LR: 0.000098


Epoch 30: 100%|██████████| Batch 1563/1563 , acc=60.67%, ce_loss=1.1824, e_reg=1.4936, kl_loss=0.5143, total_loss=9.3404


Time: 228.0s | Train Total Loss: 9.3404 | Train Acc: 60.67% | Val Loss: 1.4245 | Val Acc1: 75.44% | Val Acc5: 98.37% | LR: 0.000098


Epoch 31: 100%|██████████| Batch 1563/1563 , acc=60.83%, ce_loss=1.1751, e_reg=1.4879, kl_loss=0.5102, total_loss=9.2656


Time: 227.1s | Train Total Loss: 9.2656 | Train Acc: 60.83% | Val Loss: 1.4945 | Val Acc1: 74.04% | Val Acc5: 98.51% | LR: 0.000098


Epoch 32: 100%|██████████| Batch 1563/1563 , acc=61.16%, ce_loss=1.1694, e_reg=1.4780, kl_loss=0.5057, total_loss=9.1850


Time: 227.6s | Train Total Loss: 9.1850 | Train Acc: 61.16% | Val Loss: 1.4582 | Val Acc1: 74.90% | Val Acc5: 98.48% | LR: 0.000097


Epoch 33: 100%|██████████| Batch 1563/1563 , acc=61.71%, ce_loss=1.1626, e_reg=1.4772, kl_loss=0.4999, total_loss=9.0818


Time: 227.5s | Train Total Loss: 9.0818 | Train Acc: 61.71% | Val Loss: 1.4516 | Val Acc1: 75.67% | Val Acc5: 98.61% | LR: 0.000097


Epoch 34: 100%|██████████| Batch 1563/1563 , acc=61.97%, ce_loss=1.1522, e_reg=1.4682, kl_loss=0.4921, total_loss=8.9423


Time: 227.7s | Train Total Loss: 8.9423 | Train Acc: 61.97% | Val Loss: 1.5116 | Val Acc1: 74.81% | Val Acc5: 98.49% | LR: 0.000097


Epoch 35: 100%|██████████| Batch 1563/1563 , acc=62.40%, ce_loss=1.1390, e_reg=1.4562, kl_loss=0.4875, total_loss=8.8591


Time: 227.2s | Train Total Loss: 8.8591 | Train Acc: 62.40% | Val Loss: 1.4819 | Val Acc1: 76.04% | Val Acc5: 98.60% | LR: 0.000097


Epoch 36: 100%|██████████| Batch 1563/1563 , acc=62.98%, ce_loss=1.1312, e_reg=1.4523, kl_loss=0.4796, total_loss=8.7181


Time: 227.5s | Train Total Loss: 8.7181 | Train Acc: 62.98% | Val Loss: 1.3570 | Val Acc1: 77.15% | Val Acc5: 98.86% | LR: 0.000097


Epoch 37: 100%|██████████| Batch 1563/1563 , acc=63.33%, ce_loss=1.1202, e_reg=1.4477, kl_loss=0.4744, total_loss=8.6233


Time: 227.4s | Train Total Loss: 8.6233 | Train Acc: 63.33% | Val Loss: 1.3752 | Val Acc1: 76.71% | Val Acc5: 98.71% | LR: 0.000096


Epoch 38: 100%|██████████| Batch 1563/1563 , acc=63.66%, ce_loss=1.1104, e_reg=1.4366, kl_loss=0.4655, total_loss=8.4654


Time: 227.3s | Train Total Loss: 8.4654 | Train Acc: 63.66% | Val Loss: 1.3232 | Val Acc1: 77.74% | Val Acc5: 98.81% | LR: 0.000096


Epoch 39: 100%|██████████| Batch 1563/1563 , acc=63.13%, ce_loss=1.1233, e_reg=1.4467, kl_loss=0.4720, total_loss=8.5819


Time: 227.9s | Train Total Loss: 8.5819 | Train Acc: 63.13% | Val Loss: 1.3990 | Val Acc1: 77.72% | Val Acc5: 98.63% | LR: 0.000096


Epoch 40: 100%|██████████| Batch 1563/1563 , acc=64.53%, ce_loss=1.0893, e_reg=1.4236, kl_loss=0.4529, total_loss=8.2374


Time: 228.2s | Train Total Loss: 8.2374 | Train Acc: 64.53% | Val Loss: 1.2996 | Val Acc1: 79.04% | Val Acc5: 98.85% | LR: 0.000096


Epoch 41: 100%|██████████| Batch 1563/1563 , acc=65.14%, ce_loss=1.0783, e_reg=1.4198, kl_loss=0.4410, total_loss=8.0267


Time: 227.5s | Train Total Loss: 8.0267 | Train Acc: 65.14% | Val Loss: 1.3295 | Val Acc1: 78.64% | Val Acc5: 98.73% | LR: 0.000096


Epoch 42: 100%|██████████| Batch 1563/1563 , acc=65.15%, ce_loss=1.0762, e_reg=1.4110, kl_loss=0.4425, total_loss=8.0522


Time: 227.4s | Train Total Loss: 8.0522 | Train Acc: 65.15% | Val Loss: 1.3527 | Val Acc1: 78.73% | Val Acc5: 98.92% | LR: 0.000095


Epoch 43: 100%|██████████| Batch 1563/1563 , acc=66.53%, ce_loss=1.0466, e_reg=1.3961, kl_loss=0.4268, total_loss=7.7697


Time: 227.8s | Train Total Loss: 7.7697 | Train Acc: 66.53% | Val Loss: 1.3171 | Val Acc1: 79.30% | Val Acc5: 98.89% | LR: 0.000095


Epoch 44: 100%|██████████| Batch 1563/1563 , acc=66.31%, ce_loss=1.0490, e_reg=1.3932, kl_loss=0.4237, total_loss=7.7153


Time: 227.8s | Train Total Loss: 7.7153 | Train Acc: 66.31% | Val Loss: 1.3072 | Val Acc1: 79.44% | Val Acc5: 98.76% | LR: 0.000095


Epoch 45: 100%|██████████| Batch 1563/1563 , acc=66.06%, ce_loss=1.0510, e_reg=1.3943, kl_loss=0.4273, total_loss=7.7795


Time: 227.8s | Train Total Loss: 7.7795 | Train Acc: 66.06% | Val Loss: 1.3297 | Val Acc1: 79.01% | Val Acc5: 98.84% | LR: 0.000095


Epoch 46: 100%|██████████| Batch 1563/1563 , acc=66.39%, ce_loss=1.0462, e_reg=1.3920, kl_loss=0.4215, total_loss=7.6759


Time: 227.6s | Train Total Loss: 7.6759 | Train Acc: 66.39% | Val Loss: 1.3275 | Val Acc1: 79.40% | Val Acc5: 98.90% | LR: 0.000095


Epoch 47: 100%|██████████| Batch 1563/1563 , acc=66.75%, ce_loss=1.0375, e_reg=1.3867, kl_loss=0.4180, total_loss=7.6128


Time: 227.0s | Train Total Loss: 7.6128 | Train Acc: 66.75% | Val Loss: 1.4073 | Val Acc1: 78.78% | Val Acc5: 98.88% | LR: 0.000094


Epoch 48: 100%|██████████| Batch 1563/1563 , acc=67.02%, ce_loss=1.0331, e_reg=1.3867, kl_loss=0.4151, total_loss=7.5598


Time: 227.5s | Train Total Loss: 7.5598 | Train Acc: 67.02% | Val Loss: 1.3425 | Val Acc1: 78.98% | Val Acc5: 98.82% | LR: 0.000094


Epoch 49: 100%|██████████| Batch 1563/1563 , acc=67.11%, ce_loss=1.0282, e_reg=1.3817, kl_loss=0.4109, total_loss=7.4859


Time: 227.7s | Train Total Loss: 7.4859 | Train Acc: 67.11% | Val Loss: 1.2020 | Val Acc1: 80.86% | Val Acc5: 99.14% | LR: 0.000094


Epoch 50: 100%|██████████| Batch 1563/1563 , acc=66.83%, ce_loss=1.0340, e_reg=1.3831, kl_loss=0.4157, total_loss=7.5716


Time: 227.7s | Train Total Loss: 7.5716 | Train Acc: 66.83% | Val Loss: 1.3443 | Val Acc1: 79.33% | Val Acc5: 98.86% | LR: 0.000094


Epoch 51: 100%|██████████| Batch 1563/1563 , acc=67.53%, ce_loss=1.0196, e_reg=1.3780, kl_loss=0.4060, total_loss=7.3966


Time: 228.1s | Train Total Loss: 7.3966 | Train Acc: 67.53% | Val Loss: 1.2625 | Val Acc1: 80.12% | Val Acc5: 99.07% | LR: 0.000093


Epoch 52: 100%|██████████| Batch 1563/1563 , acc=67.96%, ce_loss=1.0064, e_reg=1.3654, kl_loss=0.3968, total_loss=7.2331


Time: 227.8s | Train Total Loss: 7.2331 | Train Acc: 67.96% | Val Loss: 1.2348 | Val Acc1: 81.27% | Val Acc5: 99.11% | LR: 0.000093


Epoch 53: 100%|██████████| Batch 1563/1563 , acc=69.16%, ce_loss=0.9806, e_reg=1.3478, kl_loss=0.3818, total_loss=6.9616


Time: 228.2s | Train Total Loss: 6.9616 | Train Acc: 69.16% | Val Loss: 1.2075 | Val Acc1: 81.23% | Val Acc5: 99.14% | LR: 0.000093


Epoch 54: 100%|██████████| Batch 1563/1563 , acc=69.13%, ce_loss=0.9804, e_reg=1.3478, kl_loss=0.3825, total_loss=6.9740


Time: 228.4s | Train Total Loss: 6.9740 | Train Acc: 69.13% | Val Loss: 1.2286 | Val Acc1: 80.97% | Val Acc5: 99.05% | LR: 0.000092


Epoch 55: 100%|██████████| Batch 1563/1563 , acc=68.97%, ce_loss=0.9843, e_reg=1.3552, kl_loss=0.3806, total_loss=6.9416


Time: 227.5s | Train Total Loss: 6.9416 | Train Acc: 68.97% | Val Loss: 1.1409 | Val Acc1: 82.08% | Val Acc5: 99.14% | LR: 0.000092


Epoch 56: 100%|██████████| Batch 1563/1563 , acc=68.89%, ce_loss=0.9876, e_reg=1.3542, kl_loss=0.3872, total_loss=7.0582


Time: 227.6s | Train Total Loss: 7.0582 | Train Acc: 68.89% | Val Loss: 1.2035 | Val Acc1: 81.35% | Val Acc5: 99.14% | LR: 0.000092


Epoch 57: 100%|██████████| Batch 1563/1563 , acc=69.63%, ce_loss=0.9682, e_reg=1.3431, kl_loss=0.3728, total_loss=6.8013


Time: 228.7s | Train Total Loss: 6.8013 | Train Acc: 69.63% | Val Loss: 1.1820 | Val Acc1: 81.70% | Val Acc5: 99.15% | LR: 0.000092


Epoch 58: 100%|██████████| Batch 1563/1563 , acc=69.29%, ce_loss=0.9779, e_reg=1.3488, kl_loss=0.3770, total_loss=6.8774


Time: 228.4s | Train Total Loss: 6.8774 | Train Acc: 69.29% | Val Loss: 1.1642 | Val Acc1: 81.86% | Val Acc5: 99.22% | LR: 0.000091


Epoch 59: 100%|██████████| Batch 1563/1563 , acc=69.86%, ce_loss=0.9657, e_reg=1.3396, kl_loss=0.3705, total_loss=6.7604


Time: 228.3s | Train Total Loss: 6.7604 | Train Acc: 69.86% | Val Loss: 1.2141 | Val Acc1: 82.19% | Val Acc5: 99.04% | LR: 0.000091


Epoch 60: 100%|██████████| Batch 1563/1563 , acc=70.05%, ce_loss=0.9563, e_reg=1.3315, kl_loss=0.3652, total_loss=6.6649


Time: 228.1s | Train Total Loss: 6.6649 | Train Acc: 70.05% | Val Loss: 1.1003 | Val Acc1: 82.94% | Val Acc5: 99.37% | LR: 0.000091


Epoch 61: 100%|██████████| Batch 1563/1563 , acc=69.87%, ce_loss=0.9580, e_reg=1.3328, kl_loss=0.3644, total_loss=6.6505


Time: 227.6s | Train Total Loss: 6.6505 | Train Acc: 69.87% | Val Loss: 1.1982 | Val Acc1: 82.33% | Val Acc5: 99.16% | LR: 0.000090


Epoch 62: 100%|██████████| Batch 1563/1563 , acc=71.02%, ce_loss=0.9321, e_reg=1.3172, kl_loss=0.3523, total_loss=6.4317


Time: 228.7s | Train Total Loss: 6.4317 | Train Acc: 71.02% | Val Loss: 1.2172 | Val Acc1: 81.97% | Val Acc5: 99.10% | LR: 0.000090


Epoch 63: 100%|██████████| Batch 1563/1563 , acc=70.08%, ce_loss=0.9561, e_reg=1.3310, kl_loss=0.3650, total_loss=6.6607


Time: 228.4s | Train Total Loss: 6.6607 | Train Acc: 70.08% | Val Loss: 1.1004 | Val Acc1: 82.79% | Val Acc5: 99.27% | LR: 0.000090


Epoch 64: 100%|██████████| Batch 1563/1563 , acc=71.22%, ce_loss=0.9315, e_reg=1.3219, kl_loss=0.3450, total_loss=6.3045


Time: 228.3s | Train Total Loss: 6.3045 | Train Acc: 71.22% | Val Loss: 1.0813 | Val Acc1: 83.22% | Val Acc5: 99.41% | LR: 0.000090


Epoch 65: 100%|██████████| Batch 1563/1563 , acc=71.29%, ce_loss=0.9268, e_reg=1.3141, kl_loss=0.3458, total_loss=6.3171


Time: 227.8s | Train Total Loss: 6.3171 | Train Acc: 71.29% | Val Loss: 1.2267 | Val Acc1: 82.25% | Val Acc5: 99.22% | LR: 0.000089


Epoch 66: 100%|██████████| Batch 1563/1563 , acc=70.41%, ce_loss=0.9467, e_reg=1.3269, kl_loss=0.3549, total_loss=6.4815


Time: 228.6s | Train Total Loss: 6.4815 | Train Acc: 70.41% | Val Loss: 1.0922 | Val Acc1: 83.25% | Val Acc5: 99.32% | LR: 0.000089


Epoch 67: 100%|██████████| Batch 1563/1563 , acc=71.88%, ce_loss=0.9133, e_reg=1.3034, kl_loss=0.3402, total_loss=6.2140


Time: 228.3s | Train Total Loss: 6.2140 | Train Acc: 71.88% | Val Loss: 1.0426 | Val Acc1: 83.92% | Val Acc5: 99.45% | LR: 0.000089


Epoch 68: 100%|██████████| Batch 1563/1563 , acc=71.91%, ce_loss=0.9148, e_reg=1.3080, kl_loss=0.3356, total_loss=6.1352


Time: 228.0s | Train Total Loss: 6.1352 | Train Acc: 71.91% | Val Loss: 1.1162 | Val Acc1: 83.42% | Val Acc5: 99.26% | LR: 0.000088


Epoch 69: 100%|██████████| Batch 1563/1563 , acc=72.08%, ce_loss=0.9083, e_reg=1.3007, kl_loss=0.3352, total_loss=6.1252


Time: 227.8s | Train Total Loss: 6.1252 | Train Acc: 72.08% | Val Loss: 1.0379 | Val Acc1: 84.31% | Val Acc5: 99.41% | LR: 0.000088


Epoch 70: 100%|██████████| Batch 1563/1563 , acc=72.10%, ce_loss=0.9098, e_reg=1.3007, kl_loss=0.3350, total_loss=6.1228


Time: 228.4s | Train Total Loss: 6.1228 | Train Acc: 72.10% | Val Loss: 1.0826 | Val Acc1: 83.88% | Val Acc5: 99.39% | LR: 0.000088


Epoch 71: 100%|██████████| Batch 1563/1563 , acc=72.26%, ce_loss=0.9027, e_reg=1.2917, kl_loss=0.3334, total_loss=6.0928


Time: 227.8s | Train Total Loss: 6.0928 | Train Acc: 72.26% | Val Loss: 1.0381 | Val Acc1: 84.12% | Val Acc5: 99.51% | LR: 0.000087


Epoch 72: 100%|██████████| Batch 1563/1563 , acc=72.38%, ce_loss=0.9008, e_reg=1.2983, kl_loss=0.3287, total_loss=6.0092


Time: 227.8s | Train Total Loss: 6.0092 | Train Acc: 72.38% | Val Loss: 1.0162 | Val Acc1: 84.95% | Val Acc5: 99.40% | LR: 0.000087


Epoch 73: 100%|██████████| Batch 1563/1563 , acc=72.64%, ce_loss=0.8972, e_reg=1.2949, kl_loss=0.3239, total_loss=5.9251


Time: 227.5s | Train Total Loss: 5.9251 | Train Acc: 72.64% | Val Loss: 1.1355 | Val Acc1: 83.18% | Val Acc5: 99.30% | LR: 0.000086


Epoch 74: 100%|██████████| Batch 1563/1563 , acc=72.26%, ce_loss=0.9052, e_reg=1.2955, kl_loss=0.3317, total_loss=6.0626


Time: 228.7s | Train Total Loss: 6.0626 | Train Acc: 72.26% | Val Loss: 1.1108 | Val Acc1: 83.49% | Val Acc5: 99.28% | LR: 0.000086


Epoch 75: 100%|██████████| Batch 1563/1563 , acc=72.68%, ce_loss=0.8934, e_reg=1.2887, kl_loss=0.3236, total_loss=5.9176


Time: 228.2s | Train Total Loss: 5.9176 | Train Acc: 72.68% | Val Loss: 1.0809 | Val Acc1: 83.64% | Val Acc5: 99.37% | LR: 0.000086


Epoch 76: 100%|██████████| Batch 1563/1563 , acc=72.35%, ce_loss=0.9004, e_reg=1.2955, kl_loss=0.3296, total_loss=6.0244


Time: 228.2s | Train Total Loss: 6.0244 | Train Acc: 72.35% | Val Loss: 1.1385 | Val Acc1: 83.71% | Val Acc5: 99.30% | LR: 0.000085


Epoch 77: 100%|██████████| Batch 1563/1563 , acc=73.60%, ce_loss=0.8701, e_reg=1.2744, kl_loss=0.3105, total_loss=5.6813


Time: 228.2s | Train Total Loss: 5.6813 | Train Acc: 73.60% | Val Loss: 0.9458 | Val Acc1: 85.59% | Val Acc5: 99.48% | LR: 0.000085


Epoch 78: 100%|██████████| Batch 1563/1563 , acc=72.58%, ce_loss=0.8939, e_reg=1.2919, kl_loss=0.3222, total_loss=5.8945


Time: 228.3s | Train Total Loss: 5.8945 | Train Acc: 72.58% | Val Loss: 0.9653 | Val Acc1: 84.77% | Val Acc5: 99.46% | LR: 0.000085


Epoch 79: 100%|██████████| Batch 1563/1563 , acc=73.17%, ce_loss=0.8797, e_reg=1.2810, kl_loss=0.3142, total_loss=5.7498


Time: 227.9s | Train Total Loss: 5.7498 | Train Acc: 73.17% | Val Loss: 0.9656 | Val Acc1: 85.37% | Val Acc5: 99.46% | LR: 0.000084


Epoch 80: 100%|██████████| Batch 1563/1563 , acc=73.87%, ce_loss=0.8663, e_reg=1.2758, kl_loss=0.3073, total_loss=5.6248


Time: 228.0s | Train Total Loss: 5.6248 | Train Acc: 73.87% | Val Loss: 0.9536 | Val Acc1: 85.70% | Val Acc5: 99.44% | LR: 0.000084


Epoch 81: 100%|██████████| Batch 1563/1563 , acc=73.85%, ce_loss=0.8644, e_reg=1.2734, kl_loss=0.3065, total_loss=5.6095


Time: 228.7s | Train Total Loss: 5.6095 | Train Acc: 73.85% | Val Loss: 0.9709 | Val Acc1: 85.18% | Val Acc5: 99.43% | LR: 0.000083


Epoch 82: 100%|██████████| Batch 1563/1563 , acc=73.85%, ce_loss=0.8632, e_reg=1.2749, kl_loss=0.3055, total_loss=5.5928


Time: 228.3s | Train Total Loss: 5.5928 | Train Acc: 73.85% | Val Loss: 1.0445 | Val Acc1: 84.75% | Val Acc5: 99.38% | LR: 0.000083


Epoch 83: 100%|██████████| Batch 1563/1563 , acc=73.73%, ce_loss=0.8691, e_reg=1.2740, kl_loss=0.3110, total_loss=5.6913


Time: 228.2s | Train Total Loss: 5.6913 | Train Acc: 73.73% | Val Loss: 0.9825 | Val Acc1: 85.30% | Val Acc5: 99.37% | LR: 0.000083


Epoch 84: 100%|██████████| Batch 1563/1563 , acc=74.44%, ce_loss=0.8490, e_reg=1.2617, kl_loss=0.2965, total_loss=5.4301


Time: 227.9s | Train Total Loss: 5.4301 | Train Acc: 74.44% | Val Loss: 1.1741 | Val Acc1: 82.71% | Val Acc5: 99.30% | LR: 0.000082


Epoch 85: 100%|██████████| Batch 1563/1563 , acc=73.76%, ce_loss=0.8661, e_reg=1.2726, kl_loss=0.3060, total_loss=5.6026


Time: 228.8s | Train Total Loss: 5.6026 | Train Acc: 73.76% | Val Loss: 0.9998 | Val Acc1: 84.72% | Val Acc5: 99.26% | LR: 0.000082


Epoch 86: 100%|██████████| Batch 1563/1563 , acc=74.58%, ce_loss=0.8477, e_reg=1.2563, kl_loss=0.2978, total_loss=5.4527


Time: 227.9s | Train Total Loss: 5.4527 | Train Acc: 74.58% | Val Loss: 0.9658 | Val Acc1: 84.89% | Val Acc5: 99.53% | LR: 0.000081


Epoch 87: 100%|██████████| Batch 1563/1563 , acc=74.66%, ce_loss=0.8501, e_reg=1.2661, kl_loss=0.2953, total_loss=5.4104


Time: 227.8s | Train Total Loss: 5.4104 | Train Acc: 74.66% | Val Loss: 0.9554 | Val Acc1: 85.80% | Val Acc5: 99.47% | LR: 0.000081


Epoch 88: 100%|██████████| Batch 1563/1563 , acc=75.14%, ce_loss=0.8390, e_reg=1.2604, kl_loss=0.2894, total_loss=5.3034


Time: 228.2s | Train Total Loss: 5.3034 | Train Acc: 75.14% | Val Loss: 0.9590 | Val Acc1: 85.16% | Val Acc5: 99.43% | LR: 0.000081


Epoch 89: 100%|██████████| Batch 1563/1563 , acc=73.91%, ce_loss=0.8602, e_reg=1.2678, kl_loss=0.3036, total_loss=5.5575


Time: 227.7s | Train Total Loss: 5.5575 | Train Acc: 73.91% | Val Loss: 0.9018 | Val Acc1: 85.76% | Val Acc5: 99.48% | LR: 0.000080


Epoch 90: 100%|██████████| Batch 1563/1563 , acc=74.78%, ce_loss=0.8434, e_reg=1.2578, kl_loss=0.2903, total_loss=5.3208


Time: 228.2s | Train Total Loss: 5.3208 | Train Acc: 74.78% | Val Loss: 1.0087 | Val Acc1: 85.37% | Val Acc5: 99.45% | LR: 0.000080


Epoch 91: 100%|██████████| Batch 1563/1563 , acc=74.82%, ce_loss=0.8446, e_reg=1.2584, kl_loss=0.2940, total_loss=5.3851


Time: 227.5s | Train Total Loss: 5.3851 | Train Acc: 74.82% | Val Loss: 0.9283 | Val Acc1: 85.99% | Val Acc5: 99.45% | LR: 0.000079


Epoch 92: 100%|██████████| Batch 1563/1563 , acc=75.39%, ce_loss=0.8278, e_reg=1.2512, kl_loss=0.2814, total_loss=5.1608


Time: 228.3s | Train Total Loss: 5.1608 | Train Acc: 75.39% | Val Loss: 0.9331 | Val Acc1: 86.01% | Val Acc5: 99.52% | LR: 0.000079


Epoch 93: 100%|██████████| Batch 1563/1563 , acc=75.49%, ce_loss=0.8275, e_reg=1.2483, kl_loss=0.2820, total_loss=5.1712


Time: 228.3s | Train Total Loss: 5.1712 | Train Acc: 75.49% | Val Loss: 0.9546 | Val Acc1: 85.77% | Val Acc5: 99.53% | LR: 0.000079


Epoch 94: 100%|██████████| Batch 1563/1563 , acc=75.04%, ce_loss=0.8368, e_reg=1.2507, kl_loss=0.2892, total_loss=5.3002


Time: 228.8s | Train Total Loss: 5.3002 | Train Acc: 75.04% | Val Loss: 0.9301 | Val Acc1: 85.95% | Val Acc5: 99.47% | LR: 0.000078


Epoch 95: 100%|██████████| Batch 1563/1563 , acc=75.18%, ce_loss=0.8368, e_reg=1.2543, kl_loss=0.2897, total_loss=5.3084


Time: 228.0s | Train Total Loss: 5.3084 | Train Acc: 75.18% | Val Loss: 0.8825 | Val Acc1: 86.51% | Val Acc5: 99.44% | LR: 0.000078


Epoch 96: 100%|██████████| Batch 1563/1563 , acc=75.81%, ce_loss=0.8156, e_reg=1.2394, kl_loss=0.2744, total_loss=5.0340


Time: 228.3s | Train Total Loss: 5.0340 | Train Acc: 75.81% | Val Loss: 0.8822 | Val Acc1: 86.69% | Val Acc5: 99.50% | LR: 0.000077


Epoch 97: 100%|██████████| Batch 1563/1563 , acc=75.90%, ce_loss=0.8183, e_reg=1.2438, kl_loss=0.2744, total_loss=5.0350


Time: 228.5s | Train Total Loss: 5.0350 | Train Acc: 75.90% | Val Loss: 0.9328 | Val Acc1: 86.12% | Val Acc5: 99.56% | LR: 0.000077


Epoch 98: 100%|██████████| Batch 1563/1563 , acc=75.90%, ce_loss=0.8176, e_reg=1.2435, kl_loss=0.2769, total_loss=5.0792


Time: 227.9s | Train Total Loss: 5.0792 | Train Acc: 75.90% | Val Loss: 0.9271 | Val Acc1: 86.06% | Val Acc5: 99.51% | LR: 0.000076


Epoch 99: 100%|██████████| Batch 1563/1563 , acc=75.10%, ce_loss=0.8323, e_reg=1.2485, kl_loss=0.2841, total_loss=5.2091


Time: 227.8s | Train Total Loss: 5.2091 | Train Acc: 75.10% | Val Loss: 0.9690 | Val Acc1: 85.55% | Val Acc5: 99.44% | LR: 0.000076


Epoch 100: 100%|██████████| Batch 1563/1563 , acc=75.98%, ce_loss=0.8141, e_reg=1.2417, kl_loss=0.2773, total_loss=5.0850


Time: 227.6s | Train Total Loss: 5.0850 | Train Acc: 75.98% | Val Loss: 0.8675 | Val Acc1: 86.83% | Val Acc5: 99.56% | LR: 0.000075


Epoch 101: 100%|██████████| Batch 1563/1563 , acc=76.80%, ce_loss=0.7950, e_reg=1.2294, kl_loss=0.2633, total_loss=4.8346


Time: 227.7s | Train Total Loss: 4.8346 | Train Acc: 76.80% | Val Loss: 0.8465 | Val Acc1: 86.70% | Val Acc5: 99.56% | LR: 0.000075


Epoch 102: 100%|██████████| Batch 1563/1563 , acc=76.43%, ce_loss=0.8006, e_reg=1.2312, kl_loss=0.2645, total_loss=4.8567


Time: 227.6s | Train Total Loss: 4.8567 | Train Acc: 76.43% | Val Loss: 0.8883 | Val Acc1: 86.89% | Val Acc5: 99.57% | LR: 0.000075


Epoch 103: 100%|██████████| Batch 1563/1563 , acc=76.85%, ce_loss=0.7971, e_reg=1.2307, kl_loss=0.2639, total_loss=4.8455


Time: 227.7s | Train Total Loss: 4.8455 | Train Acc: 76.85% | Val Loss: 0.8695 | Val Acc1: 86.98% | Val Acc5: 99.56% | LR: 0.000074


Epoch 104: 100%|██████████| Batch 1563/1563 , acc=76.18%, ce_loss=0.8045, e_reg=1.2306, kl_loss=0.2655, total_loss=4.8757


Time: 228.4s | Train Total Loss: 4.8757 | Train Acc: 76.18% | Val Loss: 0.8419 | Val Acc1: 87.10% | Val Acc5: 99.66% | LR: 0.000074


Epoch 105: 100%|██████████| Batch 1563/1563 , acc=76.73%, ce_loss=0.7979, e_reg=1.2315, kl_loss=0.2633, total_loss=4.8346


Time: 228.2s | Train Total Loss: 4.8346 | Train Acc: 76.73% | Val Loss: 0.9919 | Val Acc1: 85.91% | Val Acc5: 99.50% | LR: 0.000073


Epoch 106: 100%|██████████| Batch 1563/1563 , acc=76.81%, ce_loss=0.7963, e_reg=1.2299, kl_loss=0.2643, total_loss=4.8515


Time: 228.6s | Train Total Loss: 4.8515 | Train Acc: 76.81% | Val Loss: 0.9219 | Val Acc1: 86.70% | Val Acc5: 99.58% | LR: 0.000073


Epoch 107: 100%|██████████| Batch 1563/1563 , acc=77.11%, ce_loss=0.7897, e_reg=1.2224, kl_loss=0.2591, total_loss=4.7582


Time: 228.4s | Train Total Loss: 4.7582 | Train Acc: 77.11% | Val Loss: 0.8431 | Val Acc1: 87.10% | Val Acc5: 99.50% | LR: 0.000072


Epoch 108: 100%|██████████| Batch 1563/1563 , acc=77.57%, ce_loss=0.7766, e_reg=1.2170, kl_loss=0.2521, total_loss=4.6319


Time: 228.5s | Train Total Loss: 4.6319 | Train Acc: 77.57% | Val Loss: 0.8181 | Val Acc1: 87.42% | Val Acc5: 99.51% | LR: 0.000072


Epoch 109: 100%|██████████| Batch 1563/1563 , acc=76.91%, ce_loss=0.7936, e_reg=1.2284, kl_loss=0.2614, total_loss=4.8000


Time: 227.8s | Train Total Loss: 4.8000 | Train Acc: 76.91% | Val Loss: 0.8275 | Val Acc1: 87.44% | Val Acc5: 99.46% | LR: 0.000071


Epoch 110: 100%|██████████| Batch 1563/1563 , acc=77.05%, ce_loss=0.7871, e_reg=1.2233, kl_loss=0.2583, total_loss=4.7445


Time: 228.2s | Train Total Loss: 4.7445 | Train Acc: 77.05% | Val Loss: 0.9015 | Val Acc1: 86.37% | Val Acc5: 99.52% | LR: 0.000071


Epoch 111: 100%|██████████| Batch 1563/1563 , acc=77.36%, ce_loss=0.7875, e_reg=1.2254, kl_loss=0.2576, total_loss=4.7320


Time: 227.9s | Train Total Loss: 4.7320 | Train Acc: 77.36% | Val Loss: 0.8386 | Val Acc1: 87.25% | Val Acc5: 99.53% | LR: 0.000070


Epoch 112: 100%|██████████| Batch 1563/1563 , acc=77.35%, ce_loss=0.7810, e_reg=1.2212, kl_loss=0.2520, total_loss=4.6315


Time: 228.4s | Train Total Loss: 4.6315 | Train Acc: 77.35% | Val Loss: 0.8380 | Val Acc1: 87.27% | Val Acc5: 99.66% | LR: 0.000070


Epoch 113: 100%|██████████| Batch 1563/1563 , acc=77.69%, ce_loss=0.7740, e_reg=1.2164, kl_loss=0.2474, total_loss=4.5493


Time: 227.9s | Train Total Loss: 4.5493 | Train Acc: 77.69% | Val Loss: 0.7860 | Val Acc1: 87.93% | Val Acc5: 99.56% | LR: 0.000069


Epoch 114: 100%|██████████| Batch 1563/1563 , acc=77.28%, ce_loss=0.7832, e_reg=1.2222, kl_loss=0.2510, total_loss=4.6157


Time: 227.5s | Train Total Loss: 4.6157 | Train Acc: 77.28% | Val Loss: 0.8513 | Val Acc1: 87.04% | Val Acc5: 99.60% | LR: 0.000069


Epoch 115: 100%|██████████| Batch 1563/1563 , acc=77.87%, ce_loss=0.7701, e_reg=1.2118, kl_loss=0.2465, total_loss=4.5333


Time: 228.4s | Train Total Loss: 4.5333 | Train Acc: 77.87% | Val Loss: 0.8555 | Val Acc1: 87.00% | Val Acc5: 99.61% | LR: 0.000068


Epoch 116: 100%|██████████| Batch 1563/1563 , acc=78.14%, ce_loss=0.7653, e_reg=1.2127, kl_loss=0.2407, total_loss=4.4293


Time: 227.9s | Train Total Loss: 4.4293 | Train Acc: 78.14% | Val Loss: 0.8073 | Val Acc1: 87.43% | Val Acc5: 99.51% | LR: 0.000068


Epoch 117: 100%|██████████| Batch 1563/1563 , acc=77.98%, ce_loss=0.7678, e_reg=1.2131, kl_loss=0.2438, total_loss=4.4845


Time: 228.3s | Train Total Loss: 4.4845 | Train Acc: 77.98% | Val Loss: 0.8635 | Val Acc1: 87.13% | Val Acc5: 99.56% | LR: 0.000067


Epoch 118: 100%|██████████| Batch 1563/1563 , acc=78.02%, ce_loss=0.7638, e_reg=1.2044, kl_loss=0.2447, total_loss=4.4991


Time: 227.7s | Train Total Loss: 4.4991 | Train Acc: 78.02% | Val Loss: 0.8540 | Val Acc1: 87.13% | Val Acc5: 99.56% | LR: 0.000067


Epoch 119: 100%|██████████| Batch 1563/1563 , acc=77.99%, ce_loss=0.7686, e_reg=1.2097, kl_loss=0.2448, total_loss=4.5023


Time: 227.9s | Train Total Loss: 4.5023 | Train Acc: 77.99% | Val Loss: 0.8119 | Val Acc1: 87.19% | Val Acc5: 99.56% | LR: 0.000066


Epoch 120: 100%|██████████| Batch 1563/1563 , acc=78.04%, ce_loss=0.7631, e_reg=1.2064, kl_loss=0.2413, total_loss=4.4392


Time: 228.1s | Train Total Loss: 4.4392 | Train Acc: 78.04% | Val Loss: 0.8321 | Val Acc1: 87.45% | Val Acc5: 99.54% | LR: 0.000066


Epoch 121: 100%|██████████| Batch 1563/1563 , acc=77.93%, ce_loss=0.7701, e_reg=1.2091, kl_loss=0.2476, total_loss=4.5522


Time: 228.0s | Train Total Loss: 4.5522 | Train Acc: 77.93% | Val Loss: 0.8628 | Val Acc1: 86.93% | Val Acc5: 99.59% | LR: 0.000065


Epoch 122: 100%|██████████| Batch 1563/1563 , acc=78.16%, ce_loss=0.7648, e_reg=1.2129, kl_loss=0.2418, total_loss=4.4488


Time: 228.5s | Train Total Loss: 4.4488 | Train Acc: 78.16% | Val Loss: 0.8296 | Val Acc1: 87.80% | Val Acc5: 99.59% | LR: 0.000065


Epoch 123: 100%|██████████| Batch 1563/1563 , acc=78.66%, ce_loss=0.7497, e_reg=1.2007, kl_loss=0.2329, total_loss=4.2878


Time: 227.8s | Train Total Loss: 4.2878 | Train Acc: 78.66% | Val Loss: 0.8373 | Val Acc1: 87.35% | Val Acc5: 99.62% | LR: 0.000064


Epoch 124: 100%|██████████| Batch 1563/1563 , acc=78.96%, ce_loss=0.7424, e_reg=1.1955, kl_loss=0.2280, total_loss=4.2000


Time: 228.3s | Train Total Loss: 4.2000 | Train Acc: 78.96% | Val Loss: 0.8376 | Val Acc1: 87.15% | Val Acc5: 99.60% | LR: 0.000064


Epoch 125: 100%|██████████| Batch 1563/1563 , acc=78.55%, ce_loss=0.7530, e_reg=1.1988, kl_loss=0.2376, total_loss=4.3713


Time: 228.7s | Train Total Loss: 4.3713 | Train Acc: 78.55% | Val Loss: 0.8659 | Val Acc1: 86.66% | Val Acc5: 99.47% | LR: 0.000063


Epoch 126: 100%|██████████| Batch 1563/1563 , acc=78.55%, ce_loss=0.7549, e_reg=1.2029, kl_loss=0.2386, total_loss=4.3907


Time: 227.8s | Train Total Loss: 4.3907 | Train Acc: 78.55% | Val Loss: 0.8007 | Val Acc1: 87.70% | Val Acc5: 99.63% | LR: 0.000063


Epoch 127: 100%|██████████| Batch 1563/1563 , acc=79.10%, ce_loss=0.7413, e_reg=1.1964, kl_loss=0.2305, total_loss=4.2437


Time: 228.2s | Train Total Loss: 4.2437 | Train Acc: 79.10% | Val Loss: 0.8137 | Val Acc1: 87.54% | Val Acc5: 99.63% | LR: 0.000062


Epoch 128: 100%|██████████| Batch 1563/1563 , acc=79.34%, ce_loss=0.7359, e_reg=1.1915, kl_loss=0.2257, total_loss=4.1588


Time: 227.8s | Train Total Loss: 4.1588 | Train Acc: 79.34% | Val Loss: 0.7917 | Val Acc1: 87.47% | Val Acc5: 99.67% | LR: 0.000062


Epoch 129: 100%|██████████| Batch 1563/1563 , acc=78.97%, ce_loss=0.7472, e_reg=1.2002, kl_loss=0.2317, total_loss=4.2665


Time: 227.6s | Train Total Loss: 4.2665 | Train Acc: 78.97% | Val Loss: 0.8475 | Val Acc1: 87.15% | Val Acc5: 99.57% | LR: 0.000061


Epoch 130: 100%|██████████| Batch 1563/1563 , acc=79.33%, ce_loss=0.7357, e_reg=1.1949, kl_loss=0.2253, total_loss=4.1510


Time: 227.8s | Train Total Loss: 4.1510 | Train Acc: 79.33% | Val Loss: 0.8220 | Val Acc1: 87.28% | Val Acc5: 99.53% | LR: 0.000061


Epoch 131: 100%|██████████| Batch 1563/1563 , acc=79.08%, ce_loss=0.7396, e_reg=1.1922, kl_loss=0.2291, total_loss=4.2198


Time: 228.1s | Train Total Loss: 4.2198 | Train Acc: 79.08% | Val Loss: 0.7834 | Val Acc1: 88.00% | Val Acc5: 99.64% | LR: 0.000060


Epoch 132: 100%|██████████| Batch 1563/1563 , acc=79.14%, ce_loss=0.7413, e_reg=1.1949, kl_loss=0.2283, total_loss=4.2061


Time: 228.1s | Train Total Loss: 4.2061 | Train Acc: 79.14% | Val Loss: 0.8621 | Val Acc1: 87.35% | Val Acc5: 99.54% | LR: 0.000060


Epoch 133: 100%|██████████| Batch 1563/1563 , acc=79.14%, ce_loss=0.7411, e_reg=1.1933, kl_loss=0.2279, total_loss=4.1985


Time: 227.8s | Train Total Loss: 4.1985 | Train Acc: 79.14% | Val Loss: 0.7884 | Val Acc1: 87.78% | Val Acc5: 99.61% | LR: 0.000059


Epoch 134: 100%|██████████| Batch 1563/1563 , acc=79.39%, ce_loss=0.7349, e_reg=1.1874, kl_loss=0.2253, total_loss=4.1509


Time: 227.6s | Train Total Loss: 4.1509 | Train Acc: 79.39% | Val Loss: 0.7987 | Val Acc1: 87.64% | Val Acc5: 99.56% | LR: 0.000059


Epoch 135: 100%|██████████| Batch 1563/1563 , acc=79.38%, ce_loss=0.7354, e_reg=1.1926, kl_loss=0.2233, total_loss=4.1161


Time: 227.9s | Train Total Loss: 4.1161 | Train Acc: 79.38% | Val Loss: 0.7274 | Val Acc1: 88.55% | Val Acc5: 99.64% | LR: 0.000058


Epoch 136: 100%|██████████| Batch 1563/1563 , acc=79.35%, ce_loss=0.7332, e_reg=1.1869, kl_loss=0.2266, total_loss=4.1736


Time: 227.5s | Train Total Loss: 4.1736 | Train Acc: 79.35% | Val Loss: 0.7587 | Val Acc1: 88.05% | Val Acc5: 99.61% | LR: 0.000058


Epoch 137: 100%|██████████| Batch 1563/1563 , acc=79.85%, ce_loss=0.7261, e_reg=1.1858, kl_loss=0.2196, total_loss=4.0495


Time: 227.9s | Train Total Loss: 4.0495 | Train Acc: 79.85% | Val Loss: 0.7450 | Val Acc1: 88.20% | Val Acc5: 99.70% | LR: 0.000057


Epoch 138: 100%|██████████| Batch 1563/1563 , acc=79.47%, ce_loss=0.7308, e_reg=1.1881, kl_loss=0.2233, total_loss=4.1151


Time: 228.4s | Train Total Loss: 4.1151 | Train Acc: 79.47% | Val Loss: 0.7104 | Val Acc1: 88.63% | Val Acc5: 99.66% | LR: 0.000057


Epoch 139: 100%|██████████| Batch 1563/1563 , acc=79.91%, ce_loss=0.7213, e_reg=1.1816, kl_loss=0.2197, total_loss=4.0496


Time: 229.1s | Train Total Loss: 4.0496 | Train Acc: 79.91% | Val Loss: 0.7439 | Val Acc1: 88.34% | Val Acc5: 99.63% | LR: 0.000056


Epoch 140: 100%|██████████| Batch 1563/1563 , acc=79.94%, ce_loss=0.7196, e_reg=1.1822, kl_loss=0.2154, total_loss=3.9728


Time: 228.9s | Train Total Loss: 3.9728 | Train Acc: 79.94% | Val Loss: 0.7939 | Val Acc1: 87.70% | Val Acc5: 99.63% | LR: 0.000056


Epoch 141: 100%|██████████| Batch 1563/1563 , acc=79.93%, ce_loss=0.7224, e_reg=1.1809, kl_loss=0.2178, total_loss=4.0172


Time: 228.4s | Train Total Loss: 4.0172 | Train Acc: 79.93% | Val Loss: 0.8074 | Val Acc1: 87.80% | Val Acc5: 99.59% | LR: 0.000055


Epoch 142: 100%|██████████| Batch 1563/1563 , acc=80.31%, ce_loss=0.7148, e_reg=1.1782, kl_loss=0.2129, total_loss=3.9283


Time: 228.1s | Train Total Loss: 3.9283 | Train Acc: 80.31% | Val Loss: 0.7334 | Val Acc1: 88.51% | Val Acc5: 99.68% | LR: 0.000055


Epoch 143: 100%|██████████| Batch 1563/1563 , acc=80.36%, ce_loss=0.7140, e_reg=1.1792, kl_loss=0.2100, total_loss=3.8773


Time: 228.6s | Train Total Loss: 3.8773 | Train Acc: 80.36% | Val Loss: 0.7591 | Val Acc1: 88.32% | Val Acc5: 99.53% | LR: 0.000054


Epoch 144: 100%|██████████| Batch 1563/1563 , acc=80.14%, ce_loss=0.7139, e_reg=1.1762, kl_loss=0.2143, total_loss=3.9535


Time: 228.8s | Train Total Loss: 3.9535 | Train Acc: 80.14% | Val Loss: 0.7723 | Val Acc1: 88.04% | Val Acc5: 99.59% | LR: 0.000054


Epoch 145: 100%|██████████| Batch 1563/1563 , acc=80.01%, ce_loss=0.7207, e_reg=1.1830, kl_loss=0.2162, total_loss=3.9878


Time: 228.8s | Train Total Loss: 3.9878 | Train Acc: 80.01% | Val Loss: 0.7812 | Val Acc1: 88.01% | Val Acc5: 99.61% | LR: 0.000053


Epoch 146: 100%|██████████| Batch 1563/1563 , acc=79.93%, ce_loss=0.7186, e_reg=1.1780, kl_loss=0.2161, total_loss=3.9859


Time: 228.4s | Train Total Loss: 3.9859 | Train Acc: 79.93% | Val Loss: 0.7530 | Val Acc1: 87.96% | Val Acc5: 99.60% | LR: 0.000053


Epoch 147: 100%|██████████| Batch 1563/1563 , acc=80.56%, ce_loss=0.7080, e_reg=1.1768, kl_loss=0.2081, total_loss=3.8417


Time: 228.8s | Train Total Loss: 3.8417 | Train Acc: 80.56% | Val Loss: 0.7532 | Val Acc1: 88.31% | Val Acc5: 99.59% | LR: 0.000052


Epoch 148: 100%|██████████| Batch 1563/1563 , acc=79.84%, ce_loss=0.7234, e_reg=1.1836, kl_loss=0.2169, total_loss=4.0002


Time: 228.8s | Train Total Loss: 4.0002 | Train Acc: 79.84% | Val Loss: 0.7734 | Val Acc1: 87.92% | Val Acc5: 99.67% | LR: 0.000052


Epoch 149: 100%|██████████| Batch 1563/1563 , acc=80.42%, ce_loss=0.7118, e_reg=1.1773, kl_loss=0.2084, total_loss=3.8484


Time: 228.2s | Train Total Loss: 3.8484 | Train Acc: 80.42% | Val Loss: 0.7478 | Val Acc1: 88.29% | Val Acc5: 99.65% | LR: 0.000051


Epoch 150: 100%|██████████| Batch 1563/1563 , acc=80.42%, ce_loss=0.7089, e_reg=1.1763, kl_loss=0.2080, total_loss=3.8415


Time: 228.5s | Train Total Loss: 3.8415 | Train Acc: 80.42% | Val Loss: 0.7108 | Val Acc1: 88.61% | Val Acc5: 99.65% | LR: 0.000051


Epoch 151: 100%|██████████| Batch 1563/1563 , acc=80.21%, ce_loss=0.7129, e_reg=1.1762, kl_loss=0.2126, total_loss=3.9224


Time: 228.6s | Train Total Loss: 3.9224 | Train Acc: 80.21% | Val Loss: 0.7579 | Val Acc1: 88.15% | Val Acc5: 99.58% | LR: 0.000050


Epoch 152: 100%|██████████| Batch 1563/1563 , acc=80.47%, ce_loss=0.7109, e_reg=1.1757, kl_loss=0.2094, total_loss=3.8654


Time: 229.0s | Train Total Loss: 3.8654 | Train Acc: 80.47% | Val Loss: 0.7404 | Val Acc1: 88.95% | Val Acc5: 99.68% | LR: 0.000049


Epoch 153: 100%|██████████| Batch 1563/1563 , acc=80.58%, ce_loss=0.7054, e_reg=1.1726, kl_loss=0.2081, total_loss=3.8422


Time: 229.1s | Train Total Loss: 3.8422 | Train Acc: 80.58% | Val Loss: 0.6890 | Val Acc1: 89.13% | Val Acc5: 99.60% | LR: 0.000049


Epoch 154: 100%|██████████| Batch 1563/1563 , acc=80.59%, ce_loss=0.7053, e_reg=1.1739, kl_loss=0.2047, total_loss=3.7820


Time: 228.4s | Train Total Loss: 3.7820 | Train Acc: 80.59% | Val Loss: 0.7413 | Val Acc1: 88.25% | Val Acc5: 99.74% | LR: 0.000048


Epoch 155: 100%|██████████| Batch 1563/1563 , acc=81.17%, ce_loss=0.6943, e_reg=1.1674, kl_loss=0.1995, total_loss=3.6878


Time: 229.2s | Train Total Loss: 3.6878 | Train Acc: 81.17% | Val Loss: 0.6901 | Val Acc1: 88.83% | Val Acc5: 99.73% | LR: 0.000048


Epoch 156: 100%|██████████| Batch 1563/1563 , acc=80.80%, ce_loss=0.6990, e_reg=1.1677, kl_loss=0.2029, total_loss=3.7480


Time: 228.7s | Train Total Loss: 3.7480 | Train Acc: 80.80% | Val Loss: 0.7055 | Val Acc1: 88.83% | Val Acc5: 99.63% | LR: 0.000047


Epoch 157: 100%|██████████| Batch 1563/1563 , acc=80.75%, ce_loss=0.7038, e_reg=1.1710, kl_loss=0.2086, total_loss=3.8501


Time: 228.4s | Train Total Loss: 3.8501 | Train Acc: 80.75% | Val Loss: 0.7733 | Val Acc1: 87.95% | Val Acc5: 99.62% | LR: 0.000047


Epoch 158: 100%|██████████| Batch 1563/1563 , acc=80.69%, ce_loss=0.6992, e_reg=1.1684, kl_loss=0.2027, total_loss=3.7452


Time: 228.1s | Train Total Loss: 3.7452 | Train Acc: 80.69% | Val Loss: 0.7233 | Val Acc1: 88.30% | Val Acc5: 99.61% | LR: 0.000046


Epoch 159: 100%|██████████| Batch 1563/1563 , acc=80.75%, ce_loss=0.7044, e_reg=1.1728, kl_loss=0.2045, total_loss=3.7787


Time: 228.0s | Train Total Loss: 3.7787 | Train Acc: 80.75% | Val Loss: 0.6986 | Val Acc1: 88.94% | Val Acc5: 99.73% | LR: 0.000046


Epoch 160: 100%|██████████| Batch 1563/1563 , acc=81.15%, ce_loss=0.6930, e_reg=1.1647, kl_loss=0.2004, total_loss=3.7039


Time: 227.5s | Train Total Loss: 3.7039 | Train Acc: 81.15% | Val Loss: 0.7452 | Val Acc1: 88.37% | Val Acc5: 99.66% | LR: 0.000045


Epoch 161: 100%|██████████| Batch 1563/1563 , acc=81.36%, ce_loss=0.6867, e_reg=1.1611, kl_loss=0.1974, total_loss=3.6486


Time: 227.8s | Train Total Loss: 3.6486 | Train Acc: 81.36% | Val Loss: 0.7400 | Val Acc1: 88.72% | Val Acc5: 99.65% | LR: 0.000045


Epoch 162: 100%|██████████| Batch 1563/1563 , acc=81.22%, ce_loss=0.6911, e_reg=1.1650, kl_loss=0.1969, total_loss=3.6409


Time: 227.4s | Train Total Loss: 3.6409 | Train Acc: 81.22% | Val Loss: 0.7520 | Val Acc1: 88.49% | Val Acc5: 99.54% | LR: 0.000044


Epoch 163: 100%|██████████| Batch 1563/1563 , acc=80.64%, ce_loss=0.7037, e_reg=1.1730, kl_loss=0.2030, total_loss=3.7518


Time: 227.7s | Train Total Loss: 3.7518 | Train Acc: 80.64% | Val Loss: 0.7092 | Val Acc1: 88.65% | Val Acc5: 99.63% | LR: 0.000044


Epoch 164: 100%|██████████| Batch 1563/1563 , acc=81.21%, ce_loss=0.6928, e_reg=1.1668, kl_loss=0.1995, total_loss=3.6879


Time: 228.2s | Train Total Loss: 3.6879 | Train Acc: 81.21% | Val Loss: 0.6977 | Val Acc1: 88.98% | Val Acc5: 99.59% | LR: 0.000043


Epoch 165: 100%|██████████| Batch 1563/1563 , acc=81.32%, ce_loss=0.6868, e_reg=1.1608, kl_loss=0.1976, total_loss=3.6531


Time: 228.0s | Train Total Loss: 3.6531 | Train Acc: 81.32% | Val Loss: 0.7480 | Val Acc1: 87.91% | Val Acc5: 99.57% | LR: 0.000043


Epoch 166: 100%|██████████| Batch 1563/1563 , acc=81.17%, ce_loss=0.6922, e_reg=1.1686, kl_loss=0.1972, total_loss=3.6466


Time: 227.8s | Train Total Loss: 3.6466 | Train Acc: 81.17% | Val Loss: 0.7047 | Val Acc1: 88.47% | Val Acc5: 99.56% | LR: 0.000042


Epoch 167: 100%|██████████| Batch 1563/1563 , acc=81.57%, ce_loss=0.6833, e_reg=1.1588, kl_loss=0.1937, total_loss=3.5827


Time: 227.8s | Train Total Loss: 3.5827 | Train Acc: 81.57% | Val Loss: 0.7046 | Val Acc1: 88.58% | Val Acc5: 99.69% | LR: 0.000042


Epoch 168: 100%|██████████| Batch 1563/1563 , acc=81.34%, ce_loss=0.6897, e_reg=1.1641, kl_loss=0.1950, total_loss=3.6077


Time: 227.3s | Train Total Loss: 3.6077 | Train Acc: 81.34% | Val Loss: 0.7723 | Val Acc1: 87.85% | Val Acc5: 99.61% | LR: 0.000041


Epoch 169: 100%|██████████| Batch 1563/1563 , acc=81.33%, ce_loss=0.6896, e_reg=1.1654, kl_loss=0.1970, total_loss=3.6425


Time: 227.6s | Train Total Loss: 3.6425 | Train Acc: 81.33% | Val Loss: 0.7315 | Val Acc1: 88.67% | Val Acc5: 99.62% | LR: 0.000041


Epoch 170: 100%|██████████| Batch 1563/1563 , acc=81.63%, ce_loss=0.6801, e_reg=1.1592, kl_loss=0.1906, total_loss=3.5274


Time: 227.6s | Train Total Loss: 3.5274 | Train Acc: 81.63% | Val Loss: 0.7271 | Val Acc1: 88.70% | Val Acc5: 99.70% | LR: 0.000040


Epoch 171: 100%|██████████| Batch 1563/1563 , acc=81.60%, ce_loss=0.6804, e_reg=1.1582, kl_loss=0.1886, total_loss=3.4925


Time: 228.2s | Train Total Loss: 3.4925 | Train Acc: 81.60% | Val Loss: 0.7127 | Val Acc1: 88.80% | Val Acc5: 99.73% | LR: 0.000040


Epoch 172: 100%|██████████| Batch 1563/1563 , acc=81.75%, ce_loss=0.6798, e_reg=1.1579, kl_loss=0.1921, total_loss=3.5539


Time: 227.7s | Train Total Loss: 3.5539 | Train Acc: 81.75% | Val Loss: 0.7491 | Val Acc1: 88.34% | Val Acc5: 99.57% | LR: 0.000039


Epoch 173: 100%|██████████| Batch 1563/1563 , acc=81.46%, ce_loss=0.6831, e_reg=1.1572, kl_loss=0.1926, total_loss=3.5643


Time: 227.4s | Train Total Loss: 3.5643 | Train Acc: 81.46% | Val Loss: 0.7017 | Val Acc1: 88.90% | Val Acc5: 99.62% | LR: 0.000039


Epoch 174: 100%|██████████| Batch 1563/1563 , acc=81.66%, ce_loss=0.6818, e_reg=1.1616, kl_loss=0.1910, total_loss=3.5358


Time: 228.4s | Train Total Loss: 3.5358 | Train Acc: 81.66% | Val Loss: 0.7249 | Val Acc1: 88.29% | Val Acc5: 99.61% | LR: 0.000038


Epoch 175: 100%|██████████| Batch 1563/1563 , acc=81.40%, ce_loss=0.6884, e_reg=1.1640, kl_loss=0.1957, total_loss=3.6192


Time: 228.4s | Train Total Loss: 3.6192 | Train Acc: 81.40% | Val Loss: 0.7342 | Val Acc1: 88.33% | Val Acc5: 99.59% | LR: 0.000038


Epoch 176: 100%|██████████| Batch 1563/1563 , acc=82.02%, ce_loss=0.6756, e_reg=1.1569, kl_loss=0.1894, total_loss=3.5057


Time: 228.9s | Train Total Loss: 3.5057 | Train Acc: 82.02% | Val Loss: 0.7182 | Val Acc1: 88.84% | Val Acc5: 99.63% | LR: 0.000037


Epoch 177: 100%|██████████| Batch 1563/1563 , acc=82.12%, ce_loss=0.6700, e_reg=1.1504, kl_loss=0.1876, total_loss=3.4719


Time: 227.8s | Train Total Loss: 3.4719 | Train Acc: 82.12% | Val Loss: 0.7049 | Val Acc1: 88.96% | Val Acc5: 99.60% | LR: 0.000037


Epoch 178: 100%|██████████| Batch 1563/1563 , acc=81.87%, ce_loss=0.6781, e_reg=1.1571, kl_loss=0.1892, total_loss=3.5027


Time: 227.6s | Train Total Loss: 3.5027 | Train Acc: 81.87% | Val Loss: 0.6735 | Val Acc1: 88.80% | Val Acc5: 99.69% | LR: 0.000036


Epoch 179: 100%|██████████| Batch 1563/1563 , acc=81.87%, ce_loss=0.6763, e_reg=1.1564, kl_loss=0.1874, total_loss=3.4705


Time: 227.5s | Train Total Loss: 3.4705 | Train Acc: 81.87% | Val Loss: 0.7424 | Val Acc1: 88.30% | Val Acc5: 99.69% | LR: 0.000036


Epoch 180: 100%|██████████| Batch 1563/1563 , acc=81.57%, ce_loss=0.6803, e_reg=1.1591, kl_loss=0.1894, total_loss=3.5064


Time: 228.0s | Train Total Loss: 3.5064 | Train Acc: 81.57% | Val Loss: 0.7131 | Val Acc1: 88.16% | Val Acc5: 99.53% | LR: 0.000035


Epoch 181: 100%|██████████| Batch 1563/1563 , acc=81.75%, ce_loss=0.6787, e_reg=1.1575, kl_loss=0.1886, total_loss=3.4931


Time: 228.3s | Train Total Loss: 3.4931 | Train Acc: 81.75% | Val Loss: 0.7249 | Val Acc1: 88.79% | Val Acc5: 99.71% | LR: 0.000035


Epoch 182: 100%|██████████| Batch 1563/1563 , acc=82.04%, ce_loss=0.6728, e_reg=1.1520, kl_loss=0.1838, total_loss=3.4069


Time: 228.5s | Train Total Loss: 3.4069 | Train Acc: 82.04% | Val Loss: 0.7257 | Val Acc1: 88.74% | Val Acc5: 99.63% | LR: 0.000034


Epoch 183: 100%|██████████| Batch 1563/1563 , acc=82.30%, ce_loss=0.6644, e_reg=1.1465, kl_loss=0.1826, total_loss=3.3839


Time: 228.6s | Train Total Loss: 3.3839 | Train Acc: 82.30% | Val Loss: 0.7019 | Val Acc1: 88.75% | Val Acc5: 99.63% | LR: 0.000034


Epoch 184: 100%|██████████| Batch 1563/1563 , acc=81.99%, ce_loss=0.6728, e_reg=1.1538, kl_loss=0.1870, total_loss=3.4623


Time: 228.6s | Train Total Loss: 3.4623 | Train Acc: 81.99% | Val Loss: 0.7110 | Val Acc1: 88.83% | Val Acc5: 99.69% | LR: 0.000033


Epoch 185: 100%|██████████| Batch 1563/1563 , acc=82.30%, ce_loss=0.6659, e_reg=1.1488, kl_loss=0.1843, total_loss=3.4130


Time: 228.1s | Train Total Loss: 3.4130 | Train Acc: 82.30% | Val Loss: 0.7533 | Val Acc1: 88.30% | Val Acc5: 99.66% | LR: 0.000033


Epoch 186: 100%|██████████| Batch 1563/1563 , acc=82.56%, ce_loss=0.6616, e_reg=1.1465, kl_loss=0.1822, total_loss=3.3747


Time: 228.7s | Train Total Loss: 3.3747 | Train Acc: 82.56% | Val Loss: 0.6946 | Val Acc1: 89.12% | Val Acc5: 99.60% | LR: 0.000032


Epoch 187: 100%|██████████| Batch 1563/1563 , acc=82.59%, ce_loss=0.6655, e_reg=1.1513, kl_loss=0.1807, total_loss=3.3498


Time: 228.3s | Train Total Loss: 3.3498 | Train Acc: 82.59% | Val Loss: 0.7171 | Val Acc1: 88.60% | Val Acc5: 99.65% | LR: 0.000032


Epoch 188: 100%|██████████| Batch 1563/1563 , acc=82.16%, ce_loss=0.6686, e_reg=1.1515, kl_loss=0.1850, total_loss=3.4260


Time: 227.6s | Train Total Loss: 3.4260 | Train Acc: 82.16% | Val Loss: 0.6854 | Val Acc1: 88.47% | Val Acc5: 99.63% | LR: 0.000031


Epoch 189: 100%|██████████| Batch 1563/1563 , acc=82.34%, ce_loss=0.6648, e_reg=1.1479, kl_loss=0.1814, total_loss=3.3625


Time: 227.9s | Train Total Loss: 3.3625 | Train Acc: 82.34% | Val Loss: 0.6913 | Val Acc1: 88.47% | Val Acc5: 99.73% | LR: 0.000031


Epoch 190:  95%|█████████▍| Batch 1484/1563 , acc=82.52%, ce_loss=0.6652, e_reg=1.1522, kl_loss=0.1829, total_loss=3.3892

In [None]:
torch.save(s_model.state_dict(), "9229.pth")

In [None]:
for batch_i in range(len(s_logits_list)):
    T, B, K = s_logits_list[batch_i].shape

    if B != 256:
        # Remove this tensor
        s_logits_list.pop(batch_i)

In [None]:
for batch_i in range(len(t_logits_list)):
    B, K = t_logits_list[batch_i].shape

    if B != 256:
        # Remove this tensor
        t_logits_list.pop(batch_i)

In [None]:
# Convert list to tensor
s_logits_tensor = torch.stack(s_logits_list)
t_logits_tensor = torch.stack(t_logits_list)

print(s_logits_tensor.shape)
print(t_logits_tensor.shape)

In [None]:
# Replicate the logits of the teacher along the temporal axis
t_logits_tensor = t_logits_tensor.unsqueeze(1).repeat(1, s_logits_tensor.shape[1], 1, 1)

print(s_logits_tensor.shape)
print(t_logits_tensor.shape)

In [None]:
# Save these 2 objects in pickle
import pickle

with open('s_logits_tensor.pkl', 'wb') as f:
    pickle.dump(s_logits_tensor, f)

with open('t_logits_tensor.pkl', 'wb') as f:
    pickle.dump(t_logits_tensor, f)

In [None]:
logits_t1 = s_logits_tensor[-1][0][0]

In [None]:
logits_t2 = s_logits_tensor[-1][1][0]

In [None]:
t_logits = t_logits_tensor[-1][0][0]

In [None]:
# Perform softmax with temperature over the logits
softmax_t1 = torch.softmax(logits_t1 / h_dict['TAU'], dim=0)
softmax_t2 = torch.softmax(logits_t2 / h_dict['TAU'], dim=0)
softmax_t = torch.softmax(t_logits / h_dict['TAU'], dim=0)

In [None]:
import numpy as np
import matplotlib.pyplot as plt

def plot_prob_tensors(tensors, class_names=None, alpha=0.4):
    """
    Plot multiple 1‑D tensors (probability distributions) for the same set of classes,
    overlaying semi‑transparent bars so overlaps are easy to spot.

    Parameters
    ----------
    tensors : list or iterable of 1‑D arrays
        Each entry must be a length‑N iterable (N = #classes) of probabilities.
    class_names : list of str, optional
        Labels for the x‑axis. If None, classes are numbered 1..N.
    alpha : float, optional
        Transparency for the bars (0 = fully transparent, 1 = opaque).
    """
    tensors = [np.asarray(t) for t in tensors]
    n_classes = tensors[0].shape[0]
    for t in tensors:
        if t.shape[0] != n_classes:
            raise ValueError("All tensors must have the same length")

    if class_names is None:
        class_names = [str(i + 1) for i in range(n_classes)]

    x = np.arange(n_classes)
    width = 0.8 / len(tensors)  # keep the bars inside the class bin

    plt.figure(figsize=(8, 3))
    for i, t in enumerate(tensors):
        plt.bar(x + (i - (len(tensors) - 1) / 2) * width,
                t,
                width=width,
                label=f"tensor {i + 1}",
                alpha=alpha)

    plt.xticks(x, class_names)
    plt.ylabel("Probability")
    plt.xlabel("Class")
    plt.title("ResNet34 Probability Distribution")
    plt.ylim(0, 1)
    plt.legend()
    plt.tight_layout()
    plt.show()

#,
tensor_list = [softmax_t1.cpu().detach().numpy(), softmax_t2.cpu().detach().numpy(), softmax_t.cpu().detach().numpy()]
plot_prob_tensors(tensor_list, alpha=0.5)