In [1]:
from utils.loss_functions import DKDLoss
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from tqdm import tqdm
from models_package.models import Teacher, Student
from torchvision import datasets, transforms, models
import models_package
import time
import matplotlib.pyplot as plt
import numpy as np
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import os
# new libraries
from data.data_loader import load_cifar10, load_cifar100, load_imagenet, load_prof
import boto3
import io
from utils.compare_tools import compare_model_size, compare_inference_time, compare_performance_metrics, plot_comparison
from utils.misc_tools import best_LR, train_teacher, retrieve_teacher_class_weights, new_teacher_class_weights

## Find best LR

In [2]:
# Hyperparameters
learning_rate = 0.003  # 0.01 for resnet34x2 & 0.1 for resnet8 & 0.003 for resnet 8x4
num_epochs = 200
num_workers = 2
batch_size = 64
temperature = 4.0
alpha = 0.9
momentum = 0.9
num_classes = 10
step_size = 30
gamma = 0.1

# new parameters
# lr_input = 0.1
# momentum_input = 0.9
weight_decay_input = 5e-4
# epochs = 20
# T = 4.0 # temperatureture
# alpha = 0.9
patience = 5  # for early stopping

## Load in Data

In [3]:
# Load IdenProf dataset
train_path = '/home/ubuntu/W210-Capstone/notebooks/idenprof/train'
test_path = '/home/ubuntu/W210-Capstone/notebooks/idenprof/test'
trainloader, testloader  = load_prof(train_path, test_path, batch_size=batch_size)

## Prep S3 Bucket Info

In [4]:
# Calling the function
model_name = 'resnet32x4_idenprof'
model_weight_path = 'weights/idenprof_teacher_resnet32x4_weights.pth'
num_class = 10
data_name = 'idenprof'  
batch_size = 32  
bucket_name = '210bucket'  

## Load in models

### resnet32x4_idenprof

In [4]:
# Instantiate the models
teacher_name = 'resnet32x4_idenprof'
teacher_model = models_package.__dict__[teacher_name](num_class=10)
teacher_model.fc = nn.Linear(teacher_model.fc.in_features, 10)

### resnet8x4_idenprof

In [5]:
student_name = 'resnet8x4_idenprof'
student_model = models_package.__dict__[student_name](num_class=10)
student_model.fc = nn.Linear(teacher_model.fc.in_features, 10)

In [6]:
# Optimizer and scheduler for the student model
student_optimizer = optim.SGD(student_model.parameters(), lr=learning_rate, momentum=momentum)
student_scheduler = torch.optim.lr_scheduler.StepLR(student_optimizer, step_size=step_size, gamma=gamma)

# Optimizer and scheduler for the teacher model
teacher_optimizer = optim.SGD(teacher_model.parameters(), lr=learning_rate, momentum=momentum)
teacher_scheduler = torch.optim.lr_scheduler.StepLR(teacher_optimizer, step_size=step_size, gamma=gamma)

criterion = nn.CrossEntropyLoss()
# Assuming the device is a CUDA device if available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

## Best LR

In [9]:
teacher_lr = best_LR('resnet32x4_lr_test', teacher_model, trainloader, criterion, teacher_optimizer, teacher_scheduler, num_epochs=3, emb = True)
teacher_lr

  0%|                                                                                                 | 0/71 [00:01<?, ?it/s]


OutOfMemoryError: CUDA out of memory. Tried to allocate 1.56 GiB. GPU 0 has a total capacty of 22.19 GiB of which 1.47 GiB is free. Including non-PyTorch memory, this process has 20.71 GiB memory in use. Of the allocated memory 20.37 GiB is allocated by PyTorch, and 47.87 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

In [None]:
student_lr = best_LR('resnet8x4_lr', student_model, trainloader, criterion, optimizer, scheduler, num_epochs=3, emb = True)
student_lr

In [9]:
teacher_lr = 0.00036685719526150065
student_lr = 0.0016510167498967254

In [10]:
# Optimizer and scheduler for the student model
student_optimizer = optim.SGD(student_model.parameters(), lr=student_lr, momentum=momentum)
student_scheduler = torch.optim.lr_scheduler.StepLR(student_optimizer, step_size=step_size, gamma=gamma)

# Optimizer and scheduler for the teacher model
teacher_optimizer = optim.SGD(teacher_model.parameters(), lr=teacher_lr, momentum=momentum)
teacher_scheduler = torch.optim.lr_scheduler.StepLR(teacher_optimizer, step_size=step_size, gamma=gamma)

criterion = nn.CrossEntropyLoss()
# Assuming the device is a CUDA device if available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

## Train Leaderboard Teacher Models

In [12]:
teacher_resnet32x4 = train_teacher('resnet_32x4', teacher_model, trainloader, criterion, teacher_optimizer, teacher_scheduler, num_epochs=260, patience=5)


 35%|██████████████████████████████▍                                                       | 100/282 [01:27<02:38,  1.15it/s]

[1, 100] loss: 1.300


 71%|████████████████████████████████████████████████████████████▉                         | 200/282 [02:54<01:11,  1.15it/s]

[1, 200] loss: 1.255


100%|██████████████████████████████████████████████████████████████████████████████████████| 282/282 [04:05<00:00,  1.15it/s]
 35%|██████████████████████████████▍                                                       | 100/282 [01:27<02:38,  1.15it/s]

[2, 100] loss: 1.132


 71%|████████████████████████████████████████████████████████████▉                         | 200/282 [02:54<01:11,  1.15it/s]

[2, 200] loss: 1.143


100%|██████████████████████████████████████████████████████████████████████████████████████| 282/282 [04:05<00:00,  1.15it/s]
 35%|██████████████████████████████▍                                                       | 100/282 [01:27<02:38,  1.15it/s]

[3, 100] loss: 1.004


 71%|████████████████████████████████████████████████████████████▉                         | 200/282 [02:54<01:11,  1.15it/s]

[3, 200] loss: 1.064


100%|██████████████████████████████████████████████████████████████████████████████████████| 282/282 [04:05<00:00,  1.15it/s]
 35%|██████████████████████████████▍                                                       | 100/282 [01:27<02:38,  1.15it/s]

[4, 100] loss: 0.995


 71%|████████████████████████████████████████████████████████████▉                         | 200/282 [02:54<01:11,  1.15it/s]

[4, 200] loss: 0.993


100%|██████████████████████████████████████████████████████████████████████████████████████| 282/282 [04:05<00:00,  1.15it/s]
 35%|██████████████████████████████▍                                                       | 100/282 [01:27<02:38,  1.15it/s]

[5, 100] loss: 0.886


 71%|████████████████████████████████████████████████████████████▉                         | 200/282 [02:54<01:11,  1.15it/s]

[5, 200] loss: 0.919


100%|██████████████████████████████████████████████████████████████████████████████████████| 282/282 [04:05<00:00,  1.15it/s]
 35%|██████████████████████████████▍                                                       | 100/282 [01:27<02:38,  1.15it/s]

[6, 100] loss: 0.794


 71%|████████████████████████████████████████████████████████████▉                         | 200/282 [02:54<01:11,  1.15it/s]

[6, 200] loss: 0.865


100%|██████████████████████████████████████████████████████████████████████████████████████| 282/282 [04:05<00:00,  1.15it/s]
 35%|██████████████████████████████▍                                                       | 100/282 [01:27<02:38,  1.15it/s]

[7, 100] loss: 0.762


 71%|████████████████████████████████████████████████████████████▉                         | 200/282 [02:54<01:11,  1.15it/s]

[7, 200] loss: 0.801


100%|██████████████████████████████████████████████████████████████████████████████████████| 282/282 [04:05<00:00,  1.15it/s]
 35%|██████████████████████████████▍                                                       | 100/282 [01:27<02:38,  1.15it/s]

[8, 100] loss: 0.737


 71%|████████████████████████████████████████████████████████████▉                         | 200/282 [02:54<01:11,  1.15it/s]

[8, 200] loss: 0.775


100%|██████████████████████████████████████████████████████████████████████████████████████| 282/282 [04:05<00:00,  1.15it/s]
 35%|██████████████████████████████▍                                                       | 100/282 [01:27<02:38,  1.15it/s]

[9, 100] loss: 0.718


 71%|████████████████████████████████████████████████████████████▉                         | 200/282 [02:54<01:11,  1.15it/s]

[9, 200] loss: 0.722


100%|██████████████████████████████████████████████████████████████████████████████████████| 282/282 [04:05<00:00,  1.15it/s]
 35%|██████████████████████████████▍                                                       | 100/282 [01:27<02:38,  1.15it/s]

[10, 100] loss: 0.631


 71%|████████████████████████████████████████████████████████████▉                         | 200/282 [02:54<01:11,  1.15it/s]

[10, 200] loss: 0.699


100%|██████████████████████████████████████████████████████████████████████████████████████| 282/282 [04:05<00:00,  1.15it/s]
 35%|██████████████████████████████▍                                                       | 100/282 [01:27<02:38,  1.15it/s]

[11, 100] loss: 0.611


 71%|████████████████████████████████████████████████████████████▉                         | 200/282 [02:54<01:11,  1.15it/s]

[11, 200] loss: 0.604


100%|██████████████████████████████████████████████████████████████████████████████████████| 282/282 [04:05<00:00,  1.15it/s]
 35%|██████████████████████████████▍                                                       | 100/282 [01:27<02:38,  1.15it/s]

[12, 100] loss: 0.563


 71%|████████████████████████████████████████████████████████████▉                         | 200/282 [02:54<01:11,  1.15it/s]

[12, 200] loss: 0.567


100%|██████████████████████████████████████████████████████████████████████████████████████| 282/282 [04:05<00:00,  1.15it/s]
 35%|██████████████████████████████▍                                                       | 100/282 [01:27<02:38,  1.15it/s]

[13, 100] loss: 0.542


 71%|████████████████████████████████████████████████████████████▉                         | 200/282 [02:54<01:11,  1.15it/s]

[13, 200] loss: 0.543


100%|██████████████████████████████████████████████████████████████████████████████████████| 282/282 [04:05<00:00,  1.15it/s]
 35%|██████████████████████████████▍                                                       | 100/282 [01:27<02:38,  1.15it/s]

[14, 100] loss: 0.494


 71%|████████████████████████████████████████████████████████████▉                         | 200/282 [02:54<01:11,  1.15it/s]

[14, 200] loss: 0.519


100%|██████████████████████████████████████████████████████████████████████████████████████| 282/282 [04:05<00:00,  1.15it/s]
 35%|██████████████████████████████▍                                                       | 100/282 [01:27<02:38,  1.15it/s]

[15, 100] loss: 0.479


 71%|████████████████████████████████████████████████████████████▉                         | 200/282 [02:54<01:11,  1.15it/s]

[15, 200] loss: 0.520


100%|██████████████████████████████████████████████████████████████████████████████████████| 282/282 [04:05<00:00,  1.15it/s]
 35%|██████████████████████████████▍                                                       | 100/282 [01:27<02:38,  1.15it/s]

[16, 100] loss: 0.425


 71%|████████████████████████████████████████████████████████████▉                         | 200/282 [02:54<01:11,  1.15it/s]

[16, 200] loss: 0.405


100%|██████████████████████████████████████████████████████████████████████████████████████| 282/282 [04:05<00:00,  1.15it/s]
 35%|██████████████████████████████▍                                                       | 100/282 [01:27<02:38,  1.15it/s]

[17, 100] loss: 0.400


 71%|████████████████████████████████████████████████████████████▉                         | 200/282 [02:54<01:11,  1.15it/s]

[17, 200] loss: 0.398


100%|██████████████████████████████████████████████████████████████████████████████████████| 282/282 [04:05<00:00,  1.15it/s]
 35%|██████████████████████████████▍                                                       | 100/282 [01:27<02:38,  1.15it/s]

[18, 100] loss: 0.382


 71%|████████████████████████████████████████████████████████████▉                         | 200/282 [02:54<01:11,  1.15it/s]

[18, 200] loss: 0.375


100%|██████████████████████████████████████████████████████████████████████████████████████| 282/282 [04:05<00:00,  1.15it/s]
 35%|██████████████████████████████▍                                                       | 100/282 [01:27<02:38,  1.15it/s]

[19, 100] loss: 0.318


 71%|████████████████████████████████████████████████████████████▉                         | 200/282 [02:54<01:11,  1.15it/s]

[19, 200] loss: 0.345


100%|██████████████████████████████████████████████████████████████████████████████████████| 282/282 [04:05<00:00,  1.15it/s]
 35%|██████████████████████████████▍                                                       | 100/282 [01:27<02:38,  1.15it/s]

[20, 100] loss: 0.290


 71%|████████████████████████████████████████████████████████████▉                         | 200/282 [02:54<01:11,  1.15it/s]

[20, 200] loss: 0.310


100%|██████████████████████████████████████████████████████████████████████████████████████| 282/282 [04:05<00:00,  1.15it/s]
 35%|██████████████████████████████▍                                                       | 100/282 [01:27<02:38,  1.15it/s]

[21, 100] loss: 0.291


 71%|████████████████████████████████████████████████████████████▉                         | 200/282 [02:54<01:11,  1.15it/s]

[21, 200] loss: 0.286


100%|██████████████████████████████████████████████████████████████████████████████████████| 282/282 [04:05<00:00,  1.15it/s]
 35%|██████████████████████████████▍                                                       | 100/282 [01:27<02:38,  1.15it/s]

[22, 100] loss: 0.288


 71%|████████████████████████████████████████████████████████████▉                         | 200/282 [02:54<01:11,  1.15it/s]

[22, 200] loss: 0.257


100%|██████████████████████████████████████████████████████████████████████████████████████| 282/282 [04:05<00:00,  1.15it/s]
 35%|██████████████████████████████▍                                                       | 100/282 [01:27<02:38,  1.15it/s]

[23, 100] loss: 0.215


 71%|████████████████████████████████████████████████████████████▉                         | 200/282 [02:54<01:11,  1.15it/s]

[23, 200] loss: 0.240


100%|██████████████████████████████████████████████████████████████████████████████████████| 282/282 [04:05<00:00,  1.15it/s]
 35%|██████████████████████████████▍                                                       | 100/282 [01:27<02:38,  1.15it/s]

[24, 100] loss: 0.219


 71%|████████████████████████████████████████████████████████████▉                         | 200/282 [02:54<01:11,  1.15it/s]

[24, 200] loss: 0.240


100%|██████████████████████████████████████████████████████████████████████████████████████| 282/282 [04:05<00:00,  1.15it/s]
 35%|██████████████████████████████▍                                                       | 100/282 [01:27<02:38,  1.15it/s]

[25, 100] loss: 0.188


 71%|████████████████████████████████████████████████████████████▉                         | 200/282 [02:54<01:11,  1.15it/s]

[25, 200] loss: 0.213


100%|██████████████████████████████████████████████████████████████████████████████████████| 282/282 [04:05<00:00,  1.15it/s]
 35%|██████████████████████████████▍                                                       | 100/282 [01:27<02:38,  1.15it/s]

[26, 100] loss: 0.162


 71%|████████████████████████████████████████████████████████████▉                         | 200/282 [02:54<01:11,  1.15it/s]

[26, 200] loss: 0.155


100%|██████████████████████████████████████████████████████████████████████████████████████| 282/282 [04:05<00:00,  1.15it/s]
 35%|██████████████████████████████▍                                                       | 100/282 [01:27<02:38,  1.15it/s]

[27, 100] loss: 0.177


 71%|████████████████████████████████████████████████████████████▉                         | 200/282 [02:54<01:11,  1.15it/s]

[27, 200] loss: 0.164


100%|██████████████████████████████████████████████████████████████████████████████████████| 282/282 [04:05<00:00,  1.15it/s]
 35%|██████████████████████████████▍                                                       | 100/282 [01:27<02:38,  1.15it/s]

[28, 100] loss: 0.180


 71%|████████████████████████████████████████████████████████████▉                         | 200/282 [02:54<01:11,  1.15it/s]

[28, 200] loss: 0.154


100%|██████████████████████████████████████████████████████████████████████████████████████| 282/282 [04:05<00:00,  1.15it/s]
 35%|██████████████████████████████▍                                                       | 100/282 [01:27<02:38,  1.15it/s]

[29, 100] loss: 0.096


 71%|████████████████████████████████████████████████████████████▉                         | 200/282 [02:54<01:11,  1.15it/s]

[29, 200] loss: 0.075


100%|██████████████████████████████████████████████████████████████████████████████████████| 282/282 [04:05<00:00,  1.15it/s]
 35%|██████████████████████████████▍                                                       | 100/282 [01:27<02:38,  1.15it/s]

[30, 100] loss: 0.069


 71%|████████████████████████████████████████████████████████████▉                         | 200/282 [02:54<01:11,  1.15it/s]

[30, 200] loss: 0.062


100%|██████████████████████████████████████████████████████████████████████████████████████| 282/282 [04:05<00:00,  1.15it/s]
 35%|██████████████████████████████▍                                                       | 100/282 [01:27<02:38,  1.15it/s]

[31, 100] loss: 0.064


 71%|████████████████████████████████████████████████████████████▉                         | 200/282 [02:54<01:11,  1.15it/s]

[31, 200] loss: 0.069


100%|██████████████████████████████████████████████████████████████████████████████████████| 282/282 [04:05<00:00,  1.15it/s]
 35%|██████████████████████████████▍                                                       | 100/282 [01:27<02:38,  1.15it/s]

[32, 100] loss: 0.065


 71%|████████████████████████████████████████████████████████████▉                         | 200/282 [02:54<01:11,  1.15it/s]

[32, 200] loss: 0.066


100%|██████████████████████████████████████████████████████████████████████████████████████| 282/282 [04:05<00:00,  1.15it/s]
 35%|██████████████████████████████▍                                                       | 100/282 [01:27<02:38,  1.15it/s]

[33, 100] loss: 0.057


 71%|████████████████████████████████████████████████████████████▉                         | 200/282 [02:54<01:11,  1.15it/s]

[33, 200] loss: 0.057


100%|██████████████████████████████████████████████████████████████████████████████████████| 282/282 [04:05<00:00,  1.15it/s]
 35%|██████████████████████████████▍                                                       | 100/282 [01:27<02:38,  1.15it/s]

[34, 100] loss: 0.060


 71%|████████████████████████████████████████████████████████████▉                         | 200/282 [02:54<01:11,  1.15it/s]

[34, 200] loss: 0.060


100%|██████████████████████████████████████████████████████████████████████████████████████| 282/282 [04:05<00:00,  1.15it/s]
 35%|██████████████████████████████▍                                                       | 100/282 [01:27<02:38,  1.15it/s]

[35, 100] loss: 0.052


 71%|████████████████████████████████████████████████████████████▉                         | 200/282 [02:54<01:11,  1.15it/s]

[35, 200] loss: 0.058


100%|██████████████████████████████████████████████████████████████████████████████████████| 282/282 [04:05<00:00,  1.15it/s]
 35%|██████████████████████████████▍                                                       | 100/282 [01:27<02:38,  1.15it/s]

[36, 100] loss: 0.049


 71%|████████████████████████████████████████████████████████████▉                         | 200/282 [02:54<01:11,  1.15it/s]

[36, 200] loss: 0.056


100%|██████████████████████████████████████████████████████████████████████████████████████| 282/282 [04:05<00:00,  1.15it/s]
 35%|██████████████████████████████▍                                                       | 100/282 [01:27<02:38,  1.15it/s]

[37, 100] loss: 0.052


 71%|████████████████████████████████████████████████████████████▉                         | 200/282 [02:54<01:11,  1.15it/s]

[37, 200] loss: 0.057


100%|██████████████████████████████████████████████████████████████████████████████████████| 282/282 [04:05<00:00,  1.15it/s]
 35%|██████████████████████████████▍                                                       | 100/282 [01:27<02:38,  1.15it/s]

[38, 100] loss: 0.054


 71%|████████████████████████████████████████████████████████████▉                         | 200/282 [02:54<01:11,  1.15it/s]

[38, 200] loss: 0.051


100%|██████████████████████████████████████████████████████████████████████████████████████| 282/282 [04:05<00:00,  1.15it/s]
 35%|██████████████████████████████▍                                                       | 100/282 [01:27<02:38,  1.15it/s]

[39, 100] loss: 0.055


 71%|████████████████████████████████████████████████████████████▉                         | 200/282 [02:54<01:11,  1.15it/s]

[39, 200] loss: 0.052


100%|██████████████████████████████████████████████████████████████████████████████████████| 282/282 [04:05<00:00,  1.15it/s]
 35%|██████████████████████████████▍                                                       | 100/282 [01:27<02:38,  1.15it/s]

[40, 100] loss: 0.049


 71%|████████████████████████████████████████████████████████████▉                         | 200/282 [02:54<01:11,  1.15it/s]

[40, 200] loss: 0.052


100%|██████████████████████████████████████████████████████████████████████████████████████| 282/282 [04:05<00:00,  1.15it/s]
 35%|██████████████████████████████▍                                                       | 100/282 [01:27<02:38,  1.15it/s]

[41, 100] loss: 0.056


 71%|████████████████████████████████████████████████████████████▉                         | 200/282 [02:54<01:11,  1.15it/s]

[41, 200] loss: 0.048


100%|██████████████████████████████████████████████████████████████████████████████████████| 282/282 [04:05<00:00,  1.15it/s]
 35%|██████████████████████████████▍                                                       | 100/282 [01:27<02:38,  1.15it/s]

[42, 100] loss: 0.057


 71%|████████████████████████████████████████████████████████████▉                         | 200/282 [02:54<01:11,  1.15it/s]

[42, 200] loss: 0.049


100%|██████████████████████████████████████████████████████████████████████████████████████| 282/282 [04:05<00:00,  1.15it/s]
 35%|██████████████████████████████▍                                                       | 100/282 [01:27<02:38,  1.15it/s]

[43, 100] loss: 0.050


 71%|████████████████████████████████████████████████████████████▉                         | 200/282 [02:54<01:11,  1.15it/s]

[43, 200] loss: 0.049


100%|██████████████████████████████████████████████████████████████████████████████████████| 282/282 [04:05<00:00,  1.15it/s]
 35%|██████████████████████████████▍                                                       | 100/282 [01:27<02:38,  1.15it/s]

[44, 100] loss: 0.049


 71%|████████████████████████████████████████████████████████████▉                         | 200/282 [02:54<01:11,  1.15it/s]

[44, 200] loss: 0.050


100%|██████████████████████████████████████████████████████████████████████████████████████| 282/282 [04:05<00:00,  1.15it/s]
 35%|██████████████████████████████▍                                                       | 100/282 [01:27<02:38,  1.15it/s]

[45, 100] loss: 0.048


 71%|████████████████████████████████████████████████████████████▉                         | 200/282 [02:54<01:11,  1.15it/s]

[45, 200] loss: 0.050


100%|██████████████████████████████████████████████████████████████████████████████████████| 282/282 [04:05<00:00,  1.15it/s]
 35%|██████████████████████████████▍                                                       | 100/282 [01:27<02:38,  1.15it/s]

[46, 100] loss: 0.052


 71%|████████████████████████████████████████████████████████████▉                         | 200/282 [02:54<01:11,  1.15it/s]

[46, 200] loss: 0.044


100%|██████████████████████████████████████████████████████████████████████████████████████| 282/282 [04:05<00:00,  1.15it/s]
 35%|██████████████████████████████▍                                                       | 100/282 [01:27<02:38,  1.15it/s]

[47, 100] loss: 0.046


 71%|████████████████████████████████████████████████████████████▉                         | 200/282 [02:54<01:11,  1.15it/s]

[47, 200] loss: 0.050


100%|██████████████████████████████████████████████████████████████████████████████████████| 282/282 [04:05<00:00,  1.15it/s]
 35%|██████████████████████████████▍                                                       | 100/282 [01:27<02:38,  1.15it/s]

[48, 100] loss: 0.044


 71%|████████████████████████████████████████████████████████████▉                         | 200/282 [02:54<01:11,  1.15it/s]

[48, 200] loss: 0.048


100%|██████████████████████████████████████████████████████████████████████████████████████| 282/282 [04:05<00:00,  1.15it/s]
 35%|██████████████████████████████▍                                                       | 100/282 [01:27<02:38,  1.15it/s]

[49, 100] loss: 0.043


 71%|████████████████████████████████████████████████████████████▉                         | 200/282 [02:54<01:11,  1.15it/s]

[49, 200] loss: 0.046


100%|██████████████████████████████████████████████████████████████████████████████████████| 282/282 [04:05<00:00,  1.15it/s]
 35%|██████████████████████████████▍                                                       | 100/282 [01:27<02:38,  1.15it/s]

[50, 100] loss: 0.045


 71%|████████████████████████████████████████████████████████████▉                         | 200/282 [02:54<01:11,  1.15it/s]

[50, 200] loss: 0.045


100%|██████████████████████████████████████████████████████████████████████████████████████| 282/282 [04:05<00:00,  1.15it/s]
 35%|██████████████████████████████▍                                                       | 100/282 [01:27<02:38,  1.15it/s]

[51, 100] loss: 0.043


 71%|████████████████████████████████████████████████████████████▉                         | 200/282 [02:54<01:11,  1.15it/s]

[51, 200] loss: 0.046


100%|██████████████████████████████████████████████████████████████████████████████████████| 282/282 [04:05<00:00,  1.15it/s]
 35%|██████████████████████████████▍                                                       | 100/282 [01:27<02:38,  1.15it/s]

[52, 100] loss: 0.046


 71%|████████████████████████████████████████████████████████████▉                         | 200/282 [02:54<01:11,  1.15it/s]

[52, 200] loss: 0.046


100%|██████████████████████████████████████████████████████████████████████████████████████| 282/282 [04:05<00:00,  1.15it/s]
 35%|██████████████████████████████▍                                                       | 100/282 [01:27<02:38,  1.15it/s]

[53, 100] loss: 0.044


 71%|████████████████████████████████████████████████████████████▉                         | 200/282 [02:54<01:11,  1.15it/s]

[53, 200] loss: 0.038


100%|██████████████████████████████████████████████████████████████████████████████████████| 282/282 [04:05<00:00,  1.15it/s]
 35%|██████████████████████████████▍                                                       | 100/282 [01:27<02:38,  1.15it/s]

[54, 100] loss: 0.044


 71%|████████████████████████████████████████████████████████████▉                         | 200/282 [02:54<01:11,  1.15it/s]

[54, 200] loss: 0.043


100%|██████████████████████████████████████████████████████████████████████████████████████| 282/282 [04:05<00:00,  1.15it/s]
 35%|██████████████████████████████▍                                                       | 100/282 [01:27<02:38,  1.15it/s]

[55, 100] loss: 0.042


 71%|████████████████████████████████████████████████████████████▉                         | 200/282 [02:54<01:11,  1.15it/s]

[55, 200] loss: 0.045


100%|██████████████████████████████████████████████████████████████████████████████████████| 282/282 [04:05<00:00,  1.15it/s]
 35%|██████████████████████████████▍                                                       | 100/282 [01:27<02:38,  1.15it/s]

[56, 100] loss: 0.044


 71%|████████████████████████████████████████████████████████████▉                         | 200/282 [02:54<01:11,  1.15it/s]

[56, 200] loss: 0.046


100%|██████████████████████████████████████████████████████████████████████████████████████| 282/282 [04:05<00:00,  1.15it/s]
 35%|██████████████████████████████▍                                                       | 100/282 [01:27<02:38,  1.15it/s]

[57, 100] loss: 0.043


 71%|████████████████████████████████████████████████████████████▉                         | 200/282 [02:54<01:11,  1.15it/s]

[57, 200] loss: 0.037


100%|██████████████████████████████████████████████████████████████████████████████████████| 282/282 [04:05<00:00,  1.15it/s]
 35%|██████████████████████████████▍                                                       | 100/282 [01:27<02:38,  1.15it/s]

[58, 100] loss: 0.039


 71%|████████████████████████████████████████████████████████████▉                         | 200/282 [02:54<01:11,  1.15it/s]

[58, 200] loss: 0.040


100%|██████████████████████████████████████████████████████████████████████████████████████| 282/282 [04:05<00:00,  1.15it/s]
 35%|██████████████████████████████▍                                                       | 100/282 [01:27<02:38,  1.15it/s]

[59, 100] loss: 0.039


 71%|████████████████████████████████████████████████████████████▉                         | 200/282 [02:54<01:11,  1.15it/s]

[59, 200] loss: 0.044


100%|██████████████████████████████████████████████████████████████████████████████████████| 282/282 [04:05<00:00,  1.15it/s]
 35%|██████████████████████████████▍                                                       | 100/282 [01:27<02:38,  1.15it/s]

[60, 100] loss: 0.035


 71%|████████████████████████████████████████████████████████████▉                         | 200/282 [02:54<01:11,  1.15it/s]

[60, 200] loss: 0.034


100%|██████████████████████████████████████████████████████████████████████████████████████| 282/282 [04:05<00:00,  1.15it/s]
 35%|██████████████████████████████▍                                                       | 100/282 [01:27<02:38,  1.15it/s]

[61, 100] loss: 0.045


 71%|████████████████████████████████████████████████████████████▉                         | 200/282 [02:54<01:11,  1.15it/s]

[61, 200] loss: 0.040


100%|██████████████████████████████████████████████████████████████████████████████████████| 282/282 [04:05<00:00,  1.15it/s]
 35%|██████████████████████████████▍                                                       | 100/282 [01:27<02:38,  1.15it/s]

[62, 100] loss: 0.042


 71%|████████████████████████████████████████████████████████████▉                         | 200/282 [02:54<01:11,  1.15it/s]

[62, 200] loss: 0.041


100%|██████████████████████████████████████████████████████████████████████████████████████| 282/282 [04:05<00:00,  1.15it/s]
 35%|██████████████████████████████▍                                                       | 100/282 [01:27<02:38,  1.15it/s]

[63, 100] loss: 0.037


 71%|████████████████████████████████████████████████████████████▉                         | 200/282 [02:54<01:11,  1.15it/s]

[63, 200] loss: 0.041


100%|██████████████████████████████████████████████████████████████████████████████████████| 282/282 [04:05<00:00,  1.15it/s]
 35%|██████████████████████████████▍                                                       | 100/282 [01:27<02:38,  1.15it/s]

[64, 100] loss: 0.039


 71%|████████████████████████████████████████████████████████████▉                         | 200/282 [02:54<01:11,  1.15it/s]

[64, 200] loss: 0.038


100%|██████████████████████████████████████████████████████████████████████████████████████| 282/282 [04:05<00:00,  1.15it/s]
 35%|██████████████████████████████▍                                                       | 100/282 [01:27<02:38,  1.15it/s]

[65, 100] loss: 0.038


 71%|████████████████████████████████████████████████████████████▉                         | 200/282 [02:54<01:11,  1.15it/s]

[65, 200] loss: 0.041


100%|██████████████████████████████████████████████████████████████████████████████████████| 282/282 [04:05<00:00,  1.15it/s]

Early stopping
Finished Training Teacher





## Extract Class Weights for Norm and Direction

## Train Leaderboard Student Models

## Save Models and Weights

In [None]:
###################### Saving weights and movel using s3 bucket ######################

session = boto3.session.Session()
s3 = session.client('s3')

bucket_name = '210bucket' 

# Teacher Model
#### IMPORTANT!!!!! Change the file name so that you do not overwrite the existing files
teacher_model_weights_path = 'weights/teacher_model_weights_resnet8_4.pth'
teacher_model_path = 'models/testing_teacher_model_resnet8_4.pth'

# Save state dict to buffer
teacher_model_weights_buffer = io.BytesIO()
torch.save(teacher_model.state_dict(), teacher_model_weights_buffer)
teacher_model_weights_buffer.seek(0)

# Save entire model to buffer
teacher_model_buffer = io.BytesIO()
torch.save(teacher_model, teacher_model_buffer)
teacher_model_buffer.seek(0)

# Upload to S3
s3.put_object(Bucket=bucket_name, Key=teacher_model_weights_path, Body=teacher_model_weights_buffer)
s3.put_object(Bucket=bucket_name, Key=teacher_model_path, Body=teacher_model_buffer)
print('teacher weights and architecture saved and exported to S3')

# # Student Model
# #### IMPORTANT!!!!! Change the file name so that you do not overwrite the existing files
# student_model_weights_path = 'weights/student_model_weights.pth' 
# student_model_path = 'models/student_model.pth'

# # Save state dict to buffer
# student_model_weights_buffer = io.BytesIO()
# torch.save(student_model.state_dict(), student_model_weights_buffer)
# student_model_weights_buffer.seek(0)

# # Save entire model to buffer
# student_model_buffer = io.BytesIO()
# torch.save(student_model, student_model_buffer)
# student_model_buffer.seek(0)

# # Upload to S3
# s3.put_object(Bucket=bucket_name, Key=student_model_weights_path, Body=student_model_weights_buffer)
# s3.put_object(Bucket=bucket_name, Key=student_model_path, Body=student_model_buffer)
# print('student weights and architecture saved and exported to S3')

## Read Models and Weights

In [5]:
# Initialize a session using Boto3 again 
session = boto3.session.Session()

s3 = session.client('s3')
bucket_name = '210bucket'  

teacher_model_weights_s3_path = 'weights/idenprof_teacher_resnet32x4_weights.pth'
# student_model_weights_s3_path = 'weights/testing_student_model_weights_rkd_prof.pth'

# Read files directly into memory
teacher_model_weights_buffer = io.BytesIO()
# student_model_weights_buffer = io.BytesIO()

s3.download_fileobj(bucket_name, teacher_model_weights_s3_path, teacher_model_weights_buffer)
# s3.download_fileobj(bucket_name, student_model_weights_s3_path, student_model_weights_buffer)

# Load the weights into the models
teacher_model_weights_buffer.seek(0)  # Move to the beginning of the buffer
# student_model_weights_buffer.seek(0)  

######## MAKE SURE THAT YOU HAVE THE CORRECT MODELS FOR WEIGHTS ########
# Teacher
# teacher_name = 'resnet8x4_idenprof'
teacher_name = 'resnet32x4_idenprof'
teacher_model = models_package.__dict__[teacher_name](num_class=10)
teacher_model.fc = nn.Linear(teacher_model.fc.in_features, 10)
teacher_model.load_state_dict(torch.load(teacher_model_weights_buffer))
teacher_model.eval()
# # Student
# student_model = CustomResNet18()
# student_model.load_state_dict(torch.load(student_model_weights_buffer))


ResNet(
  (conv1): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (bn1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (downsample): Sequential(
        (0): Conv2d(32, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (b

In [4]:
# Calling the function
model_name = 'resnet32x4_idenprof'
model_weight_path = 'weights/idenprof_teacher_resnet32x4_weights.pth'
num_class = 10
data_name = 'idenprof'  
batch_size = 32  
bucket_name = '210bucket'  


In [5]:
session = boto3.session.Session()
s3 = session.client('s3')

teacher_model_weights_buffer = io.BytesIO()
s3.download_fileobj(bucket_name, model_weight_path, teacher_model_weights_buffer)
teacher_model_weights_buffer.seek(0)  

# Load the model
model = models_package.__dict__[model_name](num_class=num_class)
checkpoint = torch.load(teacher_model_weights_buffer)
print("Keys in checkpoint:", checkpoint.keys())

Keys in checkpoint: odict_keys(['conv1.weight', 'bn1.weight', 'bn1.bias', 'bn1.running_mean', 'bn1.running_var', 'bn1.num_batches_tracked', 'layer1.0.conv1.weight', 'layer1.0.bn1.weight', 'layer1.0.bn1.bias', 'layer1.0.bn1.running_mean', 'layer1.0.bn1.running_var', 'layer1.0.bn1.num_batches_tracked', 'layer1.0.conv2.weight', 'layer1.0.bn2.weight', 'layer1.0.bn2.bias', 'layer1.0.bn2.running_mean', 'layer1.0.bn2.running_var', 'layer1.0.bn2.num_batches_tracked', 'layer1.0.downsample.0.weight', 'layer1.0.downsample.1.weight', 'layer1.0.downsample.1.bias', 'layer1.0.downsample.1.running_mean', 'layer1.0.downsample.1.running_var', 'layer1.0.downsample.1.num_batches_tracked', 'layer1.1.conv1.weight', 'layer1.1.bn1.weight', 'layer1.1.bn1.bias', 'layer1.1.bn1.running_mean', 'layer1.1.bn1.running_var', 'layer1.1.bn1.num_batches_tracked', 'layer1.1.conv2.weight', 'layer1.1.bn2.weight', 'layer1.1.bn2.bias', 'layer1.1.bn2.running_mean', 'layer1.1.bn2.running_var', 'layer1.1.bn2.num_batches_tracked'

In [7]:
import boto3
import io
import os
import torch
import torch.nn as nn
from collections import OrderedDict
import json
import models_package  
import numpy as np


# Function definitions
def get_lindsey_emb_fea(model, dataloader, batch_size):
    # Define the device
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model.to(device)
    model.eval() 
    embeddings = []

    with torch.no_grad(): 
        for data in dataloader:
            inputs, labels = data
            inputs = inputs.to(device)

            output = model(inputs)

            if isinstance(output, tuple):
                output = output[0]

            embeddings.append(output.cpu().numpy())

    embeddings = np.concatenate(embeddings, axis=0).tolist() 
    return embeddings


def get_emb_fea(model, dataloader, batch_size):
    ''' Used to extract the feature embeddings in a teacher model '''
    model.eval()

    EMB = {}

    with torch.no_grad():
        for images, labels in dataloader:
            images, labels = images.cuda(), labels.cuda()

            # compute output
            emb_fea, logits = model(images, embed=True)

            for emb, i in zip(emb_fea, labels):
                i = i.item()
                emb_size = len(emb) 
                if str(i) in EMB:
                    for j in range(emb_size):
                        EMB[str(i)][j].append(round(emb[j].item(), 4))
                else:
                    EMB[str(i)] = [[] for _ in range(emb_size)]
                    for j in range(emb_size):
                        EMB[str(i)][j].append(round(emb[j].item(), 4))

    for key, value in EMB.items():
        for i in range(emb_size):
            EMB[key][i] = round(np.array(EMB[key][i]).mean(), 4)

    return EMB


def retrieve_teacher_class_weights(model_name, model_weight_path, num_class, data_name, dataloader, batch_size, bucket_name):
    ''' Use the extracted feature embeddings to create a json of class means for teacher'''

    session = boto3.session.Session()
    s3 = session.client('s3')

    teacher_model_weights_buffer = io.BytesIO()
    s3.download_fileobj(bucket_name, model_weight_path, teacher_model_weights_buffer)
    teacher_model_weights_buffer.seek(0)  

    # Load the model
    model = models_package.__dict__[model_name](num_class=num_class)
    checkpoint = torch.load(teacher_model_weights_buffer)
    # print("Keys in checkpoint:", checkpoint.keys())
    print("model is loaded properly")

    new_state_dict = OrderedDict()
    for k, v in checkpoint.items():
        name = k[7:] if k.startswith('module.') else k
        new_state_dict[name] = v
    model.load_state_dict(new_state_dict)
    model.eval()

    for param in model.parameters():
        param.requires_grad = False
    
    model = model.cuda()

    # emb = get_emb_fea(model=model, dataloader=dataloader, batch_size=batch_size)
    # emb_json = json.dumps(emb, indent=4)
    # with open("./class_means/{}_embedding_fea/{}.json".format(data_name, model_name), 'w', encoding='utf-8') as f:
    #     f.write(emb_json)

    emb = get_emb_fea(model=model, dataloader=dataloader, batch_size=batch_size)
    emb_json = json.dumps(emb, indent=4)

    # Create the directory if it doesn't exist
    output_dir = "./class_means/{}_embedding_fea".format(data_name)
    os.makedirs(output_dir, exist_ok=True)

    with open("{}/{}.json".format(output_dir, model_name), 'w', encoding='utf-8') as f:
        f.write(emb_json)

In [7]:
retrieve_teacher_class_weights(model_name, model_weight_path, num_class, data_name, testloader, batch_size, bucket_name)

model is loaded properly


# Knowledge Distillation 

## KD++

In [7]:
## Training script

def train(model, teacher, T_EMB, train_dataloader, optimizer, criterion, kd_loss, nd_loss, args, epoch):
    train_loss = AverageMeter()
    train_error = AverageMeter()

    Cls_loss = AverageMeter()
    Div_loss = AverageMeter()
    Norm_Dir_loss = AverageMeter()

    # Model on train mode
    model.train()
    teacher.eval()
    step_per_epoch = len(train_dataloader)

    for step, (images, labels) in enumerate(train_dataloader):
        start = time.time()
        if torch.cuda.is_available():
            images, labels = images.cuda(), labels.cuda() 

            # compute output
            s_emb, s_logits = model(images, embed=True)
    
            with torch.no_grad():
                t_emb, t_logits = teacher(images, embed=True)
    
            # cls loss
            cls_loss = criterion(s_logits, labels) * args.cls_loss_factor
            # KD loss
            div_loss = kd_loss(s_out = s_logits, t_out = t_logits) * min(1.0, epoch/args.warm_up)
            # ND loss
            norm_dir_loss = nd_loss(s_emb=s_emb, t_emb=t_emb, T_EMB=T_EMB, labels=labels)
    
            loss = cls_loss + div_loss + norm_dir_loss
            # measure accuracy and record loss
            batch_size = images.size(0)
            _, pred = s_logits.data.cpu().topk(1, dim=1)
            train_error.update(torch.ne(pred.squeeze(), labels.cpu()).float().sum().item() / batch_size, batch_size)
            train_loss.update(loss.item(), batch_size)
    
            Cls_loss.update(cls_loss.item(), batch_size)
            Div_loss.update(div_loss.item(), batch_size)
            Norm_Dir_loss.update(norm_dir_loss.item(), batch_size)
    
            # compute gradient and do SGD step
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            t = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))
            s1 = '\r{} [{}/{}]'.format(t, step+1, step_per_epoch)
            s2 = ' - {:.2f}ms/step - nd_loss: {:.3f} - kd_loss: {:.3f} - cls_loss: {:.3f} - train_loss: {:.3f} - train_acc: {:.3f}'.format(
                 1000 * (time.time() - start), norm_dir_loss.item(), div_loss.item(), cls_loss.item(), train_loss.val, 1-train_error.val)
    
            print(s1+s2, end='', flush=True)

    print()
    return Norm_Dir_loss.avg, Div_loss.avg, Cls_loss.avg, train_loss.avg, train_error.avg


def test(model, test_dataloader, criterion):
    test_loss = AverageMeter()
    test_error = AverageMeter()

    # Model on eval mode
    model.eval()

    with torch.no_grad():
        for images, labels in test_dataloader:
            images, labels = images.cuda(), labels.cuda()

            # compute logits
            logits = model(images, embed=False)

            loss = criterion(logits, labels)

            # measure accuracy and record loss
            batch_size = images.size(0)
            _, pred = logits.data.cpu().topk(1, dim=1)
            test_error.update(torch.ne(pred.squeeze(), labels.cpu()).float().sum().item() / batch_size, batch_size)
            test_loss.update(loss.item(), batch_size)

    return test_loss.avg, test_error.avg


def epoch_loop(model, teacher, train_loader, test_loader, num_class, args):

    device = "cuda" if torch.cuda.is_available() else "cpu"
    # model = nn.DataParallel(model, device_ids=args.gpus)
    model = nn.DataParallel(model)
    model.to(device)
    # teacher = nn.DataParallel(teacher, device_ids=args.gpus)
    teacher = nn.DataParallel(teacher)
    teacher.to(device)

    # loss
    criterion = nn.CrossEntropyLoss().to(device)
    kd_loss = KDLoss(kl_loss_factor=args.kd_loss_factor, T=args.t).to(device)
    nd_loss = DirectNormLoss(num_class=num_class, nd_loss_factor=args.nd_loss_factor).to(device)
    # optimizer
    optimizer = torch.optim.SGD(params=model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay, nesterov=True)

    # weights
    save_dir = Path(args.save_dir)
    weights = save_dir / 'weights'
    weights.mkdir(parents=True, exist_ok=True)
    last = weights / 'last'
    best = weights / 'best'

    # acc,loss
    acc_loss = save_dir / 'acc_loss'
    acc_loss.mkdir(parents=True, exist_ok=True)

    train_acc_savepath = acc_loss / 'train_acc.npy'
    train_loss_savepath = acc_loss / 'train_loss.npy'
    val_acc_savepath = acc_loss / 'val_acc.npy'
    val_loss_savepath = acc_loss / 'val_loss.npy'

    # tensorboard
    logdir = save_dir / 'logs'
    logdir.mkdir(parents=True, exist_ok=True)
    summary_writer = SummaryWriter(logdir, flush_secs=120)

    # resume
    if args.resume:
        checkpoint = torch.load(args.resume)
        start_epoch = checkpoint['epoch']
        model.load_state_dict(checkpoint['model_state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
        best_error = checkpoint['best_error']
        train_acc = checkpoint['train_acc']
        train_loss = checkpoint['train_loss']
        test_acc = checkpoint['test_acc']
        test_loss = checkpoint['test_loss']
        logger.info(colorstr('green', 'Resuming training from {} epoch'.format(start_epoch)))
    else:
        start_epoch = 0
        best_error = 0
        train_acc = []
        train_loss = []
        test_acc = []
        test_loss = []

    # Train model
    best_error = 1
    for epoch in range(start_epoch, args.epochs):
        if epoch in [150, 180, 210]:
            for param_group in optimizer.param_groups:
                param_group['lr'] *= 0.1
        print("Epoch {}/{}".format(epoch + 1, args.epochs))
        norm_dir_loss, div_loss, cls_loss, train_epoch_loss, train_error = train(model=model,
                                                                                 teacher=teacher,
                                                                                 T_EMB=T_EMB,
                                                                                 train_dataloader=train_loader,
                                                                                 optimizer=optimizer,
                                                                                 criterion=criterion,
                                                                                 kd_loss=kd_loss,
                                                                                 nd_loss=nd_loss,
                                                                                 args=args,
                                                                                 epoch=epoch)
        test_epoch_loss, test_error = test(model=model,
                                           test_dataloader=test_loader,
                                           criterion=criterion)

        s = "Train Loss: {:.3f}, Train Acc: {:.3f}, Test Loss: {:.3f}, Test Acc: {:.3f}, lr: {:.5f}".format(
            train_epoch_loss, 1-train_error, test_epoch_loss, 1-test_error, optimizer.param_groups[0]['lr'])
        logger.info(colorstr('green', s))

        # save acc,loss
        train_loss.append(train_epoch_loss)
        train_acc.append(1-train_error)
        test_loss.append(test_epoch_loss)
        test_acc.append(1-test_error)

        # save model
        is_best = test_error < best_error
        best_error = min(best_error, test_error)
        state = {
                'epoch': epoch + 1,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'best_error': best_error,
                'train_acc': train_acc,
                'train_loss': train_loss,
                'test_acc': test_acc,
                'test_loss': test_loss,
            }

        last_path = last / 'epoch_{}_loss_{:.3f}_acc_{:.3f}'.format(
            epoch + 1, test_epoch_loss, 1-test_error)
        best_path = best / 'epoch_{}_acc_{:.3f}'.format(
                epoch + 1, 1-best_error)

        Save_Checkpoint(state, last, last_path, best, best_path, is_best)

        # tensorboard
        if epoch == 1:
            images, labels = next(iter(train_loader))
            img_grid = torchvision.utils.make_grid(images)
            summary_writer.add_image('Image', img_grid)
        summary_writer.add_scalar('lr', optimizer.param_groups[0]['lr'], epoch)
        summary_writer.add_scalar('train_loss', train_epoch_loss, epoch)
        summary_writer.add_scalar('train_error', train_error, epoch)
        summary_writer.add_scalar('val_loss', test_epoch_loss, epoch)
        summary_writer.add_scalar('val_error', test_error, epoch)

        summary_writer.add_scalar('nd_loss', norm_dir_loss, epoch)
        summary_writer.add_scalar('kd_loss', div_loss, epoch)
        summary_writer.add_scalar('cls_loss', cls_loss, epoch)

    summary_writer.close()
    import os
    if not os.path.exists(train_acc_savepath) or not os.path.exists(train_loss_savepath):
        np.save(train_acc_savepath, train_acc)
        np.save(train_loss_savepath, train_loss)
        np.save(val_acc_savepath, test_acc)
        np.save(val_loss_savepath, test_loss)


In [8]:

if __name__ == "__main__":
    model_names = sorted(name for name in models_package.__dict__
                         if name.islower() and not name.startswith("__")
                         and callable(models_package.__dict__[name]))

    parser = argparse.ArgumentParser(description='PyTorch Cifar Training')
    parser.add_argument('-f') # added to make this run in collab
    parser.add_argument("--model_name", type=str, default="resnet8x4_idenprof", choices=model_names, help="model architecture")
    parser.add_argument("--dataset", type=str, default='idenprof')
    parser.add_argument("--epochs", type=int, default=240)
    # parser.add_argument("--epochs", type=int, default=4)
    parser.add_argument("--batch_size", type=int, default=64, help="batch size per gpu")
    parser.add_argument('--workers', default=8, type=int, help='number of data loading workers')
    parser.add_argument("--lr", type=float, default=0.1)
    parser.add_argument('--momentum', type=float, default=0.9, help='SGD momentum')
    parser.add_argument("--weight_decay", type=float, default=5e-4)

    parser.add_argument("--teacher", type=str, default="resnet32x4_idenprof", help="teacher architecture")
    parser.add_argument("--teacher_weights", type=str, default="./weights/resnet_32x4/weights.pth", help="teacher weights path")
    parser.add_argument("--cls_loss_factor", type=float, default=1.0, help="cls loss weight factor")
    parser.add_argument("--kd_loss_factor", type=float, default=1.0, help="KD loss weight factor")
    parser.add_argument("--t", type=float, default=4.0, help="temperature")
    parser.add_argument("--nd_loss_factor", type=float, default=1.0, help="ND loss weight factor")
    parser.add_argument("--warm_up", type=float, default=20.0, help='loss weight warm up epochs')

    parser.add_argument("--gpus", type=list, default=[0, 1])
    parser.add_argument('--seed', default=None, type=int, help='seed for initializing training.')
    parser.add_argument("--resume", type=str, help="best ckpt's path to resume most recent training")
    parser.add_argument("--save_dir", type=str, default="./run/IdenProf/KD++", help="save path, eg, acc_loss, weights, tensorboard, and so on")
    args = parser.parse_args()

    if args.seed is not None:
        random.seed(args.seed)
        torch.manual_seed(args.seed)
        cudnn.deterministic = True
        cudnn.benchmark = False
        warnings.warn('You have chosen to seed training. '
                      'This will turn on the CUDNN deterministic setting, '
                      'which can slow down your training considerably! '
                      'You may see unexpected behavior when restarting '
                      'from checkpoints.')

    logging.basicConfig(level=logging.INFO, format='%(asctime)s [line:%(lineno)d] %(message)s',
                        datefmt='%d %b %Y %H:%M:%S')
    logger = logging.getLogger(__name__)

    args.batch_size = args.batch_size * len(args.gpus)
    # args.batch_size = args.batch_size * 1

    # logger.info(colorstr('green', "Distribute train, gpus:{}, total batch size:{}, epoch:{}".format(args.gpus, args.batch_size, args.epochs)))
    logger.info(colorstr('green', "Distribute train, total batch size:{}, epoch:{}".format(args.batch_size, args.epochs)))


    # train_set, test_set, num_class = IDENPROF(name=args.dataset)
    num_class = 10
    model = models_package.__dict__[args.model_name](num_class=num_class)

    # if args.model_name in ['wrn40_1_cifar', 'mobilenetv2', 'shufflev1_cifar', 'shufflev2_cifar']:
    #     model = EmbTrans(student=model, model_name=args.model_name)

    teacher = models_package.__dict__[args.teacher](num_class=num_class)

    if args.teacher_weights:
        print('Load Teacher Weights')
        session = boto3.session.Session()
        s3 = session.client('s3')
    
        teacher_model_weights_buffer = io.BytesIO()
        s3.download_fileobj(bucket_name, model_weight_path, teacher_model_weights_buffer)
        teacher_model_weights_buffer.seek(0)  
    
        # Load the model
        # model = models_package.__dict__[model_name](num_class=num_class)
        teacher_ckpt = torch.load(teacher_model_weights_buffer)
        teacher.load_state_dict(teacher_ckpt)
        
        for param in teacher.parameters():
            param.requires_grad = False

    # res56    ./ckpt/teacher/resnet56/center_emb_train.json
    # res32x4  ./ckpt/teacher/resnet32x4/center_emb_train.json
    # wrn40_2  ./ckpt/teacher/wrn_40_2/center_emb_train.json
    # res50    ./ckpt/teacher/resnet50/center_emb_train.json
    # class-mean
    with open("./class_means/idenprof_embedding_fea/resnet32x4_idenprof.json", 'r') as f:
        T_EMB = json.load(f)
    f.close()

    logger.info(colorstr('green', 'Use ' + args.teacher + ' Training ' + args.model_name + ' ...'))
    # Train the model
    epoch_loop(model=model, teacher=teacher, train_loader = trainloader, test_loader = testloader, num_class = num_class, args=args)


19 Nov 2023 02:42:27 [line:84] [32mDistribute train, total batch size:128, epoch:240[0m
19 Nov 2023 02:42:27 [line:1255] Found credentials in shared credentials file: ~/.aws/credentials


Load Teacher Weights


19 Nov 2023 02:42:29 [line:122] [32mUse resnet32x4_idenprof Training resnet8x4_idenprof ...[0m


Epoch 1/240
2023-11-19 02:44:51 [141/141] - 508.69ms/step - nd_loss: 1.000 - kd_loss: 0.000 - cls_loss: 2.307 - train_loss: 3.307 - train_acc: 0.15009


19 Nov 2023 02:44:55 [line:175] [32mTrain Loss: 4.750, Train Acc: 0.140, Test Loss: 2.197, Test Acc: 0.193, lr: 0.10000[0m


Epoch 2/240
2023-11-19 02:47:16 [141/141] - 451.49ms/step - nd_loss: 1.000 - kd_loss: 0.701 - cls_loss: 2.081 - train_loss: 3.781 - train_acc: 0.275


19 Nov 2023 02:47:20 [line:175] [32mTrain Loss: 3.825, Train Acc: 0.239, Test Loss: 2.224, Test Acc: 0.170, lr: 0.10000[0m


Epoch 3/240
2023-11-19 02:49:42 [141/141] - 451.53ms/step - nd_loss: 0.999 - kd_loss: 1.517 - cls_loss: 2.394 - train_loss: 4.910 - train_acc: 0.325


19 Nov 2023 02:49:46 [line:175] [32mTrain Loss: 4.452, Train Acc: 0.274, Test Loss: 2.059, Test Acc: 0.261, lr: 0.10000[0m


Epoch 4/240
2023-11-19 02:52:06 [141/141] - 450.64ms/step - nd_loss: 0.999 - kd_loss: 1.982 - cls_loss: 2.061 - train_loss: 5.042 - train_acc: 0.325


19 Nov 2023 02:52:10 [line:175] [32mTrain Loss: 5.100, Train Acc: 0.282, Test Loss: 2.140, Test Acc: 0.268, lr: 0.10000[0m


Epoch 5/240
2023-11-19 02:54:31 [141/141] - 450.15ms/step - nd_loss: 0.999 - kd_loss: 2.394 - cls_loss: 1.927 - train_loss: 5.321 - train_acc: 0.300


19 Nov 2023 02:54:35 [line:175] [32mTrain Loss: 5.753, Train Acc: 0.288, Test Loss: 2.139, Test Acc: 0.276, lr: 0.10000[0m


Epoch 6/240
2023-11-19 02:56:56 [141/141] - 450.76ms/step - nd_loss: 0.999 - kd_loss: 3.282 - cls_loss: 2.101 - train_loss: 6.383 - train_acc: 0.425


19 Nov 2023 02:57:00 [line:175] [32mTrain Loss: 6.390, Train Acc: 0.290, Test Loss: 2.069, Test Acc: 0.282, lr: 0.10000[0m


Epoch 7/240
2023-11-19 02:59:20 [141/141] - 451.75ms/step - nd_loss: 0.999 - kd_loss: 3.194 - cls_loss: 2.185 - train_loss: 6.378 - train_acc: 0.200


19 Nov 2023 02:59:24 [line:175] [32mTrain Loss: 7.070, Train Acc: 0.298, Test Loss: 2.245, Test Acc: 0.298, lr: 0.10000[0m


Epoch 8/240
2023-11-19 03:01:45 [141/141] - 450.01ms/step - nd_loss: 1.000 - kd_loss: 5.026 - cls_loss: 1.982 - train_loss: 8.007 - train_acc: 0.300


19 Nov 2023 03:01:49 [line:175] [32mTrain Loss: 7.723, Train Acc: 0.289, Test Loss: 2.313, Test Acc: 0.281, lr: 0.10000[0m


Epoch 9/240
2023-11-19 03:04:09 [141/141] - 451.15ms/step - nd_loss: 0.999 - kd_loss: 5.203 - cls_loss: 1.812 - train_loss: 8.015 - train_acc: 0.3254


19 Nov 2023 03:04:13 [line:175] [32mTrain Loss: 8.385, Train Acc: 0.293, Test Loss: 2.155, Test Acc: 0.288, lr: 0.10000[0m


Epoch 10/240
2023-11-19 03:06:34 [141/141] - 450.58ms/step - nd_loss: 0.999 - kd_loss: 6.619 - cls_loss: 2.084 - train_loss: 9.702 - train_acc: 0.2751


19 Nov 2023 03:06:38 [line:175] [32mTrain Loss: 9.028, Train Acc: 0.292, Test Loss: 2.325, Test Acc: 0.210, lr: 0.10000[0m


Epoch 11/240
2023-11-19 03:08:59 [141/141] - 450.83ms/step - nd_loss: 0.999 - kd_loss: 6.757 - cls_loss: 2.119 - train_loss: 9.875 - train_acc: 0.4257


19 Nov 2023 03:09:03 [line:175] [32mTrain Loss: 9.720, Train Acc: 0.294, Test Loss: 2.246, Test Acc: 0.288, lr: 0.10000[0m


Epoch 12/240
2023-11-19 03:11:23 [141/141] - 450.44ms/step - nd_loss: 1.000 - kd_loss: 7.899 - cls_loss: 2.270 - train_loss: 11.169 - train_acc: 0.200


19 Nov 2023 03:11:27 [line:175] [32mTrain Loss: 10.486, Train Acc: 0.287, Test Loss: 2.176, Test Acc: 0.256, lr: 0.10000[0m


Epoch 13/240
2023-11-19 03:13:48 [141/141] - 450.93ms/step - nd_loss: 1.000 - kd_loss: 8.313 - cls_loss: 2.340 - train_loss: 11.652 - train_acc: 0.250


19 Nov 2023 03:13:52 [line:175] [32mTrain Loss: 11.088, Train Acc: 0.293, Test Loss: 2.136, Test Acc: 0.267, lr: 0.10000[0m


Epoch 14/240
2023-11-19 03:16:12 [141/141] - 452.60ms/step - nd_loss: 1.000 - kd_loss: 8.043 - cls_loss: 2.177 - train_loss: 11.220 - train_acc: 0.250


19 Nov 2023 03:16:16 [line:175] [32mTrain Loss: 11.520, Train Acc: 0.307, Test Loss: 2.399, Test Acc: 0.206, lr: 0.10000[0m


Epoch 15/240
2023-11-19 03:18:37 [141/141] - 450.22ms/step - nd_loss: 1.000 - kd_loss: 7.887 - cls_loss: 1.871 - train_loss: 10.758 - train_acc: 0.325


19 Nov 2023 03:18:41 [line:175] [32mTrain Loss: 11.754, Train Acc: 0.334, Test Loss: 2.244, Test Acc: 0.284, lr: 0.10000[0m


Epoch 16/240
2023-11-19 03:21:02 [141/141] - 451.31ms/step - nd_loss: 0.999 - kd_loss: 7.781 - cls_loss: 1.847 - train_loss: 10.627 - train_acc: 0.4007


19 Nov 2023 03:21:06 [line:175] [32mTrain Loss: 12.064, Train Acc: 0.343, Test Loss: 2.217, Test Acc: 0.350, lr: 0.10000[0m


Epoch 17/240
2023-11-19 03:23:26 [141/141] - 451.17ms/step - nd_loss: 0.999 - kd_loss: 9.258 - cls_loss: 2.014 - train_loss: 12.271 - train_acc: 0.3251


19 Nov 2023 03:23:30 [line:175] [32mTrain Loss: 11.479, Train Acc: 0.405, Test Loss: 2.464, Test Acc: 0.366, lr: 0.10000[0m


Epoch 18/240
2023-11-19 03:25:51 [141/141] - 451.24ms/step - nd_loss: 0.998 - kd_loss: 7.062 - cls_loss: 1.978 - train_loss: 10.038 - train_acc: 0.5258


19 Nov 2023 03:25:55 [line:175] [32mTrain Loss: 10.783, Train Acc: 0.464, Test Loss: 3.608, Test Acc: 0.302, lr: 0.10000[0m


Epoch 19/240
2023-11-19 03:28:15 [141/141] - 451.85ms/step - nd_loss: 0.997 - kd_loss: 5.572 - cls_loss: 1.614 - train_loss: 8.184 - train_acc: 0.6250


19 Nov 2023 03:28:20 [line:175] [32mTrain Loss: 10.062, Train Acc: 0.513, Test Loss: 2.970, Test Acc: 0.493, lr: 0.10000[0m


Epoch 20/240
2023-11-19 03:30:40 [141/141] - 451.76ms/step - nd_loss: 0.997 - kd_loss: 7.558 - cls_loss: 1.778 - train_loss: 10.333 - train_acc: 0.500


19 Nov 2023 03:30:44 [line:175] [32mTrain Loss: 9.341, Train Acc: 0.558, Test Loss: 2.862, Test Acc: 0.428, lr: 0.10000[0m


Epoch 21/240
2023-11-19 03:33:05 [141/141] - 452.07ms/step - nd_loss: 0.998 - kd_loss: 5.893 - cls_loss: 1.467 - train_loss: 8.358 - train_acc: 0.5757


19 Nov 2023 03:33:09 [line:175] [32mTrain Loss: 8.982, Train Acc: 0.588, Test Loss: 2.326, Test Acc: 0.491, lr: 0.10000[0m


Epoch 22/240
2023-11-19 03:35:29 [141/141] - 450.91ms/step - nd_loss: 0.998 - kd_loss: 5.375 - cls_loss: 1.199 - train_loss: 7.571 - train_acc: 0.6502


19 Nov 2023 03:35:33 [line:175] [32mTrain Loss: 8.620, Train Acc: 0.603, Test Loss: 2.137, Test Acc: 0.510, lr: 0.10000[0m


Epoch 23/240
2023-11-19 03:37:54 [141/141] - 450.45ms/step - nd_loss: 0.998 - kd_loss: 4.829 - cls_loss: 1.452 - train_loss: 7.279 - train_acc: 0.6004


19 Nov 2023 03:37:58 [line:175] [32mTrain Loss: 8.153, Train Acc: 0.618, Test Loss: 1.696, Test Acc: 0.555, lr: 0.10000[0m


Epoch 24/240
2023-11-19 03:40:19 [141/141] - 450.87ms/step - nd_loss: 0.997 - kd_loss: 7.643 - cls_loss: 2.341 - train_loss: 10.980 - train_acc: 0.500


19 Nov 2023 03:40:23 [line:175] [32mTrain Loss: 7.943, Train Acc: 0.630, Test Loss: 2.621, Test Acc: 0.516, lr: 0.10000[0m


Epoch 25/240
2023-11-19 03:42:43 [141/141] - 451.19ms/step - nd_loss: 0.997 - kd_loss: 4.163 - cls_loss: 0.966 - train_loss: 6.126 - train_acc: 0.7002


19 Nov 2023 03:42:47 [line:175] [32mTrain Loss: 7.616, Train Acc: 0.642, Test Loss: 1.987, Test Acc: 0.567, lr: 0.10000[0m


Epoch 26/240
2023-11-19 03:45:08 [141/141] - 450.97ms/step - nd_loss: 0.997 - kd_loss: 7.447 - cls_loss: 1.755 - train_loss: 10.199 - train_acc: 0.550


19 Nov 2023 03:45:12 [line:175] [32mTrain Loss: 7.352, Train Acc: 0.651, Test Loss: 2.543, Test Acc: 0.487, lr: 0.10000[0m


Epoch 27/240
2023-11-19 03:47:32 [141/141] - 450.54ms/step - nd_loss: 0.997 - kd_loss: 6.447 - cls_loss: 1.859 - train_loss: 9.303 - train_acc: 0.525


19 Nov 2023 03:47:36 [line:175] [32mTrain Loss: 7.232, Train Acc: 0.651, Test Loss: 1.614, Test Acc: 0.612, lr: 0.10000[0m


Epoch 28/240
2023-11-19 03:49:57 [141/141] - 451.18ms/step - nd_loss: 0.997 - kd_loss: 4.316 - cls_loss: 1.161 - train_loss: 6.474 - train_acc: 0.750


19 Nov 2023 03:50:01 [line:175] [32mTrain Loss: 6.982, Train Acc: 0.665, Test Loss: 1.593, Test Acc: 0.606, lr: 0.10000[0m


Epoch 29/240
2023-11-19 03:52:22 [141/141] - 451.19ms/step - nd_loss: 0.997 - kd_loss: 4.466 - cls_loss: 1.061 - train_loss: 6.524 - train_acc: 0.600


19 Nov 2023 03:52:26 [line:175] [32mTrain Loss: 6.783, Train Acc: 0.671, Test Loss: 1.823, Test Acc: 0.605, lr: 0.10000[0m


Epoch 30/240
2023-11-19 03:54:46 [141/141] - 451.29ms/step - nd_loss: 0.997 - kd_loss: 4.074 - cls_loss: 0.794 - train_loss: 5.865 - train_acc: 0.775


19 Nov 2023 03:54:50 [line:175] [32mTrain Loss: 6.554, Train Acc: 0.682, Test Loss: 1.404, Test Acc: 0.624, lr: 0.10000[0m


Epoch 31/240
2023-11-19 03:57:11 [141/141] - 451.31ms/step - nd_loss: 0.996 - kd_loss: 4.453 - cls_loss: 0.901 - train_loss: 6.350 - train_acc: 0.700


19 Nov 2023 03:57:15 [line:175] [32mTrain Loss: 6.334, Train Acc: 0.690, Test Loss: 1.444, Test Acc: 0.630, lr: 0.10000[0m


Epoch 32/240
2023-11-19 03:59:35 [141/141] - 451.47ms/step - nd_loss: 0.996 - kd_loss: 5.338 - cls_loss: 1.568 - train_loss: 7.902 - train_acc: 0.700


19 Nov 2023 03:59:40 [line:175] [32mTrain Loss: 6.189, Train Acc: 0.696, Test Loss: 2.397, Test Acc: 0.532, lr: 0.10000[0m


Epoch 33/240
2023-11-19 04:02:00 [141/141] - 450.52ms/step - nd_loss: 0.996 - kd_loss: 4.023 - cls_loss: 1.241 - train_loss: 6.260 - train_acc: 0.600


19 Nov 2023 04:02:04 [line:175] [32mTrain Loss: 6.126, Train Acc: 0.704, Test Loss: 1.593, Test Acc: 0.625, lr: 0.10000[0m


Epoch 34/240
2023-11-19 04:04:25 [141/141] - 451.59ms/step - nd_loss: 0.996 - kd_loss: 4.366 - cls_loss: 1.346 - train_loss: 6.707 - train_acc: 0.650


19 Nov 2023 04:04:29 [line:175] [32mTrain Loss: 5.879, Train Acc: 0.713, Test Loss: 1.682, Test Acc: 0.627, lr: 0.10000[0m


Epoch 35/240
2023-11-19 04:06:49 [141/141] - 451.35ms/step - nd_loss: 0.996 - kd_loss: 5.306 - cls_loss: 1.798 - train_loss: 8.101 - train_acc: 0.575


19 Nov 2023 04:06:53 [line:175] [32mTrain Loss: 5.997, Train Acc: 0.711, Test Loss: 1.837, Test Acc: 0.595, lr: 0.10000[0m


Epoch 36/240
2023-11-19 04:09:14 [141/141] - 450.66ms/step - nd_loss: 0.996 - kd_loss: 4.278 - cls_loss: 1.159 - train_loss: 6.433 - train_acc: 0.650


19 Nov 2023 04:09:18 [line:175] [32mTrain Loss: 5.786, Train Acc: 0.719, Test Loss: 1.532, Test Acc: 0.629, lr: 0.10000[0m


Epoch 37/240
2023-11-19 04:11:39 [141/141] - 450.41ms/step - nd_loss: 0.996 - kd_loss: 3.760 - cls_loss: 0.789 - train_loss: 5.545 - train_acc: 0.700


19 Nov 2023 04:11:43 [line:175] [32mTrain Loss: 5.564, Train Acc: 0.733, Test Loss: 1.474, Test Acc: 0.636, lr: 0.10000[0m


Epoch 38/240
2023-11-19 04:14:03 [141/141] - 451.42ms/step - nd_loss: 0.995 - kd_loss: 3.576 - cls_loss: 0.851 - train_loss: 5.422 - train_acc: 0.800


19 Nov 2023 04:14:07 [line:175] [32mTrain Loss: 5.635, Train Acc: 0.730, Test Loss: 1.646, Test Acc: 0.581, lr: 0.10000[0m


Epoch 39/240
2023-11-19 04:16:28 [141/141] - 451.75ms/step - nd_loss: 0.995 - kd_loss: 4.501 - cls_loss: 1.258 - train_loss: 6.754 - train_acc: 0.700


19 Nov 2023 04:16:32 [line:175] [32mTrain Loss: 5.489, Train Acc: 0.736, Test Loss: 1.584, Test Acc: 0.603, lr: 0.10000[0m


Epoch 40/240
2023-11-19 04:18:52 [141/141] - 450.81ms/step - nd_loss: 0.995 - kd_loss: 3.084 - cls_loss: 1.022 - train_loss: 5.101 - train_acc: 0.725


19 Nov 2023 04:18:56 [line:175] [32mTrain Loss: 5.255, Train Acc: 0.751, Test Loss: 1.586, Test Acc: 0.631, lr: 0.10000[0m


Epoch 41/240
2023-11-19 04:21:17 [141/141] - 451.07ms/step - nd_loss: 0.995 - kd_loss: 3.425 - cls_loss: 0.799 - train_loss: 5.219 - train_acc: 0.775


19 Nov 2023 04:21:21 [line:175] [32mTrain Loss: 5.135, Train Acc: 0.758, Test Loss: 1.596, Test Acc: 0.644, lr: 0.10000[0m


Epoch 42/240
2023-11-19 04:23:42 [141/141] - 451.77ms/step - nd_loss: 0.995 - kd_loss: 3.773 - cls_loss: 0.841 - train_loss: 5.609 - train_acc: 0.800


19 Nov 2023 04:23:46 [line:175] [32mTrain Loss: 4.974, Train Acc: 0.767, Test Loss: 1.593, Test Acc: 0.633, lr: 0.10000[0m


Epoch 43/240
2023-11-19 04:26:06 [141/141] - 451.51ms/step - nd_loss: 0.995 - kd_loss: 2.935 - cls_loss: 0.596 - train_loss: 4.526 - train_acc: 0.750


19 Nov 2023 04:26:10 [line:175] [32mTrain Loss: 4.755, Train Acc: 0.779, Test Loss: 1.360, Test Acc: 0.679, lr: 0.10000[0m


Epoch 44/240
2023-11-19 04:28:31 [141/141] - 451.63ms/step - nd_loss: 0.995 - kd_loss: 3.437 - cls_loss: 0.665 - train_loss: 5.097 - train_acc: 0.850


19 Nov 2023 04:28:35 [line:175] [32mTrain Loss: 4.746, Train Acc: 0.783, Test Loss: 1.615, Test Acc: 0.623, lr: 0.10000[0m


Epoch 45/240
2023-11-19 04:30:56 [141/141] - 450.56ms/step - nd_loss: 0.995 - kd_loss: 3.764 - cls_loss: 0.579 - train_loss: 5.337 - train_acc: 0.850


19 Nov 2023 04:31:00 [line:175] [32mTrain Loss: 4.744, Train Acc: 0.783, Test Loss: 1.519, Test Acc: 0.639, lr: 0.10000[0m


Epoch 46/240
2023-11-19 04:33:20 [141/141] - 451.31ms/step - nd_loss: 0.994 - kd_loss: 3.143 - cls_loss: 0.533 - train_loss: 4.670 - train_acc: 0.800


19 Nov 2023 04:33:24 [line:175] [32mTrain Loss: 4.549, Train Acc: 0.785, Test Loss: 1.787, Test Acc: 0.601, lr: 0.10000[0m


Epoch 47/240
2023-11-19 04:35:45 [141/141] - 450.25ms/step - nd_loss: 0.994 - kd_loss: 2.685 - cls_loss: 0.592 - train_loss: 4.271 - train_acc: 0.775


19 Nov 2023 04:35:49 [line:175] [32mTrain Loss: 4.459, Train Acc: 0.796, Test Loss: 1.579, Test Acc: 0.635, lr: 0.10000[0m


Epoch 48/240
2023-11-19 04:38:09 [141/141] - 451.19ms/step - nd_loss: 0.994 - kd_loss: 2.598 - cls_loss: 0.359 - train_loss: 3.951 - train_acc: 0.825


19 Nov 2023 04:38:13 [line:175] [32mTrain Loss: 4.208, Train Acc: 0.813, Test Loss: 1.457, Test Acc: 0.639, lr: 0.10000[0m


Epoch 49/240
2023-11-19 04:40:34 [141/141] - 450.86ms/step - nd_loss: 0.994 - kd_loss: 2.213 - cls_loss: 0.537 - train_loss: 3.744 - train_acc: 0.800


19 Nov 2023 04:40:38 [line:175] [32mTrain Loss: 4.117, Train Acc: 0.823, Test Loss: 1.772, Test Acc: 0.609, lr: 0.10000[0m


Epoch 50/240
2023-11-19 04:42:58 [141/141] - 450.99ms/step - nd_loss: 0.994 - kd_loss: 2.697 - cls_loss: 0.435 - train_loss: 4.126 - train_acc: 0.875


19 Nov 2023 04:43:03 [line:175] [32mTrain Loss: 4.185, Train Acc: 0.819, Test Loss: 1.485, Test Acc: 0.641, lr: 0.10000[0m


Epoch 51/240
2023-11-19 04:45:23 [141/141] - 450.64ms/step - nd_loss: 0.994 - kd_loss: 3.425 - cls_loss: 0.899 - train_loss: 5.317 - train_acc: 0.775


19 Nov 2023 04:45:27 [line:175] [32mTrain Loss: 4.021, Train Acc: 0.827, Test Loss: 1.650, Test Acc: 0.617, lr: 0.10000[0m


Epoch 52/240
2023-11-19 04:47:48 [141/141] - 450.60ms/step - nd_loss: 0.994 - kd_loss: 2.863 - cls_loss: 0.514 - train_loss: 4.370 - train_acc: 0.775


19 Nov 2023 04:47:52 [line:175] [32mTrain Loss: 4.048, Train Acc: 0.831, Test Loss: 1.621, Test Acc: 0.636, lr: 0.10000[0m


Epoch 53/240
2023-11-19 04:50:12 [141/141] - 452.08ms/step - nd_loss: 0.993 - kd_loss: 2.169 - cls_loss: 0.493 - train_loss: 3.656 - train_acc: 0.875


19 Nov 2023 04:50:16 [line:175] [32mTrain Loss: 3.856, Train Acc: 0.841, Test Loss: 1.867, Test Acc: 0.582, lr: 0.10000[0m


Epoch 54/240
2023-11-19 04:52:37 [141/141] - 450.87ms/step - nd_loss: 0.992 - kd_loss: 3.150 - cls_loss: 0.677 - train_loss: 4.819 - train_acc: 0.825


19 Nov 2023 04:52:41 [line:175] [32mTrain Loss: 3.781, Train Acc: 0.845, Test Loss: 1.720, Test Acc: 0.658, lr: 0.10000[0m


Epoch 55/240
2023-11-19 04:55:02 [141/141] - 450.52ms/step - nd_loss: 0.992 - kd_loss: 2.663 - cls_loss: 0.616 - train_loss: 4.272 - train_acc: 0.800


19 Nov 2023 04:55:06 [line:175] [32mTrain Loss: 3.804, Train Acc: 0.849, Test Loss: 1.662, Test Acc: 0.617, lr: 0.10000[0m


Epoch 56/240
2023-11-19 04:57:26 [141/141] - 450.73ms/step - nd_loss: 0.993 - kd_loss: 3.054 - cls_loss: 0.736 - train_loss: 4.783 - train_acc: 0.800


19 Nov 2023 04:57:30 [line:175] [32mTrain Loss: 3.547, Train Acc: 0.864, Test Loss: 1.319, Test Acc: 0.671, lr: 0.10000[0m


Epoch 57/240
2023-11-19 04:59:51 [141/141] - 450.24ms/step - nd_loss: 0.993 - kd_loss: 2.622 - cls_loss: 0.751 - train_loss: 4.365 - train_acc: 0.775


19 Nov 2023 04:59:55 [line:175] [32mTrain Loss: 3.459, Train Acc: 0.872, Test Loss: 1.632, Test Acc: 0.641, lr: 0.10000[0m


Epoch 58/240
2023-11-19 05:02:16 [141/141] - 451.16ms/step - nd_loss: 0.992 - kd_loss: 2.045 - cls_loss: 0.477 - train_loss: 3.514 - train_acc: 0.825


19 Nov 2023 05:02:20 [line:175] [32mTrain Loss: 3.434, Train Acc: 0.871, Test Loss: 1.951, Test Acc: 0.629, lr: 0.10000[0m


Epoch 59/240
2023-11-19 05:04:40 [141/141] - 451.25ms/step - nd_loss: 0.992 - kd_loss: 2.319 - cls_loss: 0.497 - train_loss: 3.807 - train_acc: 0.825


19 Nov 2023 05:04:44 [line:175] [32mTrain Loss: 3.472, Train Acc: 0.871, Test Loss: 1.627, Test Acc: 0.644, lr: 0.10000[0m


Epoch 60/240
2023-11-19 05:07:05 [141/141] - 451.12ms/step - nd_loss: 0.992 - kd_loss: 1.562 - cls_loss: 0.256 - train_loss: 2.810 - train_acc: 0.925


19 Nov 2023 05:07:09 [line:175] [32mTrain Loss: 3.330, Train Acc: 0.882, Test Loss: 1.652, Test Acc: 0.634, lr: 0.10000[0m


Epoch 61/240
2023-11-19 05:09:29 [141/141] - 451.82ms/step - nd_loss: 0.992 - kd_loss: 2.027 - cls_loss: 0.352 - train_loss: 3.371 - train_acc: 0.825


19 Nov 2023 05:09:34 [line:175] [32mTrain Loss: 3.207, Train Acc: 0.891, Test Loss: 2.058, Test Acc: 0.608, lr: 0.10000[0m


Epoch 62/240
2023-11-19 05:11:54 [141/141] - 450.62ms/step - nd_loss: 0.992 - kd_loss: 2.339 - cls_loss: 0.587 - train_loss: 3.918 - train_acc: 0.775


19 Nov 2023 05:11:58 [line:175] [32mTrain Loss: 3.251, Train Acc: 0.886, Test Loss: 1.443, Test Acc: 0.650, lr: 0.10000[0m


Epoch 63/240
2023-11-19 05:14:19 [141/141] - 450.21ms/step - nd_loss: 0.992 - kd_loss: 2.283 - cls_loss: 0.444 - train_loss: 3.718 - train_acc: 0.900


19 Nov 2023 05:14:23 [line:175] [32mTrain Loss: 3.279, Train Acc: 0.885, Test Loss: 1.912, Test Acc: 0.601, lr: 0.10000[0m


Epoch 64/240
2023-11-19 05:16:43 [141/141] - 449.94ms/step - nd_loss: 0.991 - kd_loss: 1.818 - cls_loss: 0.265 - train_loss: 3.074 - train_acc: 0.900


19 Nov 2023 05:16:48 [line:175] [32mTrain Loss: 3.172, Train Acc: 0.894, Test Loss: 1.656, Test Acc: 0.613, lr: 0.10000[0m


Epoch 65/240
2023-11-19 05:19:08 [141/141] - 450.98ms/step - nd_loss: 0.992 - kd_loss: 1.920 - cls_loss: 0.379 - train_loss: 3.291 - train_acc: 0.800


19 Nov 2023 05:19:12 [line:175] [32mTrain Loss: 2.934, Train Acc: 0.914, Test Loss: 1.450, Test Acc: 0.641, lr: 0.10000[0m


Epoch 66/240
2023-11-19 05:21:33 [141/141] - 451.41ms/step - nd_loss: 0.991 - kd_loss: 1.788 - cls_loss: 0.411 - train_loss: 3.190 - train_acc: 0.875


19 Nov 2023 05:21:37 [line:175] [32mTrain Loss: 2.982, Train Acc: 0.916, Test Loss: 1.588, Test Acc: 0.657, lr: 0.10000[0m


Epoch 67/240
2023-11-19 05:23:57 [141/141] - 451.51ms/step - nd_loss: 0.991 - kd_loss: 1.460 - cls_loss: 0.135 - train_loss: 2.586 - train_acc: 0.975


19 Nov 2023 05:24:01 [line:175] [32mTrain Loss: 2.938, Train Acc: 0.914, Test Loss: 1.554, Test Acc: 0.658, lr: 0.10000[0m


Epoch 68/240
2023-11-19 05:26:22 [141/141] - 450.41ms/step - nd_loss: 0.991 - kd_loss: 1.715 - cls_loss: 0.179 - train_loss: 2.885 - train_acc: 0.950


19 Nov 2023 05:26:26 [line:175] [32mTrain Loss: 3.023, Train Acc: 0.909, Test Loss: 1.626, Test Acc: 0.655, lr: 0.10000[0m


Epoch 69/240
2023-11-19 05:28:47 [141/141] - 450.79ms/step - nd_loss: 0.991 - kd_loss: 1.839 - cls_loss: 0.335 - train_loss: 3.165 - train_acc: 0.925


19 Nov 2023 05:28:51 [line:175] [32mTrain Loss: 3.108, Train Acc: 0.903, Test Loss: 1.451, Test Acc: 0.671, lr: 0.10000[0m


Epoch 70/240
2023-11-19 05:31:11 [141/141] - 452.24ms/step - nd_loss: 0.992 - kd_loss: 1.836 - cls_loss: 0.187 - train_loss: 3.014 - train_acc: 0.900


19 Nov 2023 05:31:15 [line:175] [32mTrain Loss: 2.818, Train Acc: 0.920, Test Loss: 1.639, Test Acc: 0.629, lr: 0.10000[0m


Epoch 71/240
2023-11-19 05:33:36 [141/141] - 450.96ms/step - nd_loss: 0.991 - kd_loss: 1.826 - cls_loss: 0.238 - train_loss: 3.055 - train_acc: 0.900


19 Nov 2023 05:33:40 [line:175] [32mTrain Loss: 2.880, Train Acc: 0.920, Test Loss: 1.410, Test Acc: 0.683, lr: 0.10000[0m


Epoch 72/240
2023-11-19 05:36:01 [141/141] - 450.11ms/step - nd_loss: 0.991 - kd_loss: 2.131 - cls_loss: 0.512 - train_loss: 3.634 - train_acc: 0.825


19 Nov 2023 05:36:05 [line:175] [32mTrain Loss: 2.812, Train Acc: 0.922, Test Loss: 1.636, Test Acc: 0.639, lr: 0.10000[0m


Epoch 73/240
2023-11-19 05:38:25 [141/141] - 451.11ms/step - nd_loss: 0.990 - kd_loss: 1.593 - cls_loss: 0.361 - train_loss: 2.945 - train_acc: 0.825


19 Nov 2023 05:38:29 [line:175] [32mTrain Loss: 2.711, Train Acc: 0.937, Test Loss: 1.524, Test Acc: 0.655, lr: 0.10000[0m


Epoch 74/240
2023-11-19 05:40:50 [141/141] - 451.74ms/step - nd_loss: 0.991 - kd_loss: 1.570 - cls_loss: 0.187 - train_loss: 2.747 - train_acc: 0.925


19 Nov 2023 05:40:54 [line:175] [32mTrain Loss: 2.736, Train Acc: 0.935, Test Loss: 1.428, Test Acc: 0.672, lr: 0.10000[0m


Epoch 75/240
2023-11-19 05:43:14 [141/141] - 450.69ms/step - nd_loss: 0.991 - kd_loss: 1.902 - cls_loss: 0.346 - train_loss: 3.239 - train_acc: 0.850


19 Nov 2023 05:43:19 [line:175] [32mTrain Loss: 2.709, Train Acc: 0.934, Test Loss: 1.403, Test Acc: 0.671, lr: 0.10000[0m


Epoch 76/240
2023-11-19 05:45:39 [141/141] - 450.65ms/step - nd_loss: 0.990 - kd_loss: 1.880 - cls_loss: 0.256 - train_loss: 3.125 - train_acc: 0.925


19 Nov 2023 05:45:43 [line:175] [32mTrain Loss: 2.779, Train Acc: 0.927, Test Loss: 1.574, Test Acc: 0.646, lr: 0.10000[0m


Epoch 77/240
2023-11-19 05:48:04 [141/141] - 451.60ms/step - nd_loss: 0.991 - kd_loss: 2.140 - cls_loss: 0.414 - train_loss: 3.545 - train_acc: 0.825


19 Nov 2023 05:48:08 [line:175] [32mTrain Loss: 2.830, Train Acc: 0.927, Test Loss: 1.606, Test Acc: 0.651, lr: 0.10000[0m


Epoch 78/240
2023-11-19 05:50:28 [141/141] - 452.59ms/step - nd_loss: 0.990 - kd_loss: 1.550 - cls_loss: 0.247 - train_loss: 2.787 - train_acc: 0.900


19 Nov 2023 05:50:33 [line:175] [32mTrain Loss: 2.740, Train Acc: 0.930, Test Loss: 1.486, Test Acc: 0.663, lr: 0.10000[0m


Epoch 79/240
2023-11-19 05:52:53 [141/141] - 450.27ms/step - nd_loss: 0.990 - kd_loss: 1.433 - cls_loss: 0.159 - train_loss: 2.582 - train_acc: 0.950


19 Nov 2023 05:52:57 [line:175] [32mTrain Loss: 2.568, Train Acc: 0.945, Test Loss: 1.471, Test Acc: 0.669, lr: 0.10000[0m


Epoch 80/240
2023-11-19 05:55:18 [141/141] - 450.29ms/step - nd_loss: 0.990 - kd_loss: 1.466 - cls_loss: 0.275 - train_loss: 2.731 - train_acc: 0.900


19 Nov 2023 05:55:22 [line:175] [32mTrain Loss: 2.602, Train Acc: 0.936, Test Loss: 1.352, Test Acc: 0.667, lr: 0.10000[0m


Epoch 81/240
2023-11-19 05:57:42 [141/141] - 451.00ms/step - nd_loss: 0.989 - kd_loss: 1.445 - cls_loss: 0.336 - train_loss: 2.770 - train_acc: 0.900


19 Nov 2023 05:57:46 [line:175] [32mTrain Loss: 2.469, Train Acc: 0.955, Test Loss: 1.852, Test Acc: 0.610, lr: 0.10000[0m


Epoch 82/240
2023-11-19 06:00:07 [141/141] - 451.81ms/step - nd_loss: 0.989 - kd_loss: 1.289 - cls_loss: 0.197 - train_loss: 2.476 - train_acc: 0.850


19 Nov 2023 06:00:11 [line:175] [32mTrain Loss: 2.643, Train Acc: 0.938, Test Loss: 1.442, Test Acc: 0.678, lr: 0.10000[0m


Epoch 83/240
2023-11-19 06:02:32 [141/141] - 451.26ms/step - nd_loss: 0.989 - kd_loss: 1.603 - cls_loss: 0.204 - train_loss: 2.796 - train_acc: 0.875


19 Nov 2023 06:02:36 [line:175] [32mTrain Loss: 2.472, Train Acc: 0.955, Test Loss: 1.646, Test Acc: 0.642, lr: 0.10000[0m


Epoch 84/240
2023-11-19 06:04:56 [141/141] - 451.94ms/step - nd_loss: 0.989 - kd_loss: 1.712 - cls_loss: 0.132 - train_loss: 2.833 - train_acc: 0.950


19 Nov 2023 06:05:00 [line:175] [32mTrain Loss: 2.421, Train Acc: 0.956, Test Loss: 1.554, Test Acc: 0.651, lr: 0.10000[0m


Epoch 85/240
2023-11-19 06:07:21 [141/141] - 450.61ms/step - nd_loss: 0.989 - kd_loss: 1.294 - cls_loss: 0.149 - train_loss: 2.432 - train_acc: 0.950


19 Nov 2023 06:07:25 [line:175] [32mTrain Loss: 2.515, Train Acc: 0.950, Test Loss: 1.510, Test Acc: 0.658, lr: 0.10000[0m


Epoch 86/240
2023-11-19 06:09:45 [141/141] - 450.86ms/step - nd_loss: 0.989 - kd_loss: 1.448 - cls_loss: 0.105 - train_loss: 2.543 - train_acc: 0.975


19 Nov 2023 06:09:50 [line:175] [32mTrain Loss: 2.504, Train Acc: 0.946, Test Loss: 1.805, Test Acc: 0.628, lr: 0.10000[0m


Epoch 87/240
2023-11-19 06:12:10 [141/141] - 451.75ms/step - nd_loss: 0.989 - kd_loss: 1.412 - cls_loss: 0.155 - train_loss: 2.557 - train_acc: 0.950


19 Nov 2023 06:12:14 [line:175] [32mTrain Loss: 2.539, Train Acc: 0.946, Test Loss: 1.533, Test Acc: 0.663, lr: 0.10000[0m


Epoch 88/240
2023-11-19 06:14:35 [141/141] - 452.12ms/step - nd_loss: 0.989 - kd_loss: 1.899 - cls_loss: 0.447 - train_loss: 3.335 - train_acc: 0.900


19 Nov 2023 06:14:39 [line:175] [32mTrain Loss: 2.556, Train Acc: 0.948, Test Loss: 1.441, Test Acc: 0.656, lr: 0.10000[0m


Epoch 89/240
2023-11-19 06:16:59 [141/141] - 451.02ms/step - nd_loss: 0.989 - kd_loss: 1.410 - cls_loss: 0.131 - train_loss: 2.530 - train_acc: 0.975


19 Nov 2023 06:17:03 [line:175] [32mTrain Loss: 2.414, Train Acc: 0.956, Test Loss: 1.785, Test Acc: 0.629, lr: 0.10000[0m


Epoch 90/240
2023-11-19 06:19:24 [141/141] - 451.18ms/step - nd_loss: 0.988 - kd_loss: 1.303 - cls_loss: 0.133 - train_loss: 2.424 - train_acc: 0.950


19 Nov 2023 06:19:28 [line:175] [32mTrain Loss: 2.388, Train Acc: 0.957, Test Loss: 1.814, Test Acc: 0.648, lr: 0.10000[0m


Epoch 91/240
2023-11-19 06:21:49 [141/141] - 450.40ms/step - nd_loss: 0.989 - kd_loss: 1.878 - cls_loss: 0.211 - train_loss: 3.079 - train_acc: 0.950


19 Nov 2023 06:21:53 [line:175] [32mTrain Loss: 2.585, Train Acc: 0.943, Test Loss: 1.906, Test Acc: 0.623, lr: 0.10000[0m


Epoch 92/240
2023-11-19 06:24:13 [141/141] - 451.57ms/step - nd_loss: 0.989 - kd_loss: 1.170 - cls_loss: 0.243 - train_loss: 2.402 - train_acc: 0.925


19 Nov 2023 06:24:17 [line:175] [32mTrain Loss: 2.500, Train Acc: 0.950, Test Loss: 1.581, Test Acc: 0.645, lr: 0.10000[0m


Epoch 93/240
2023-11-19 06:26:38 [141/141] - 450.94ms/step - nd_loss: 0.988 - kd_loss: 1.539 - cls_loss: 0.132 - train_loss: 2.659 - train_acc: 0.950


19 Nov 2023 06:26:42 [line:175] [32mTrain Loss: 2.330, Train Acc: 0.963, Test Loss: 1.810, Test Acc: 0.635, lr: 0.10000[0m


Epoch 94/240
2023-11-19 06:29:02 [141/141] - 450.53ms/step - nd_loss: 0.988 - kd_loss: 1.037 - cls_loss: 0.122 - train_loss: 2.148 - train_acc: 0.975


19 Nov 2023 06:29:07 [line:175] [32mTrain Loss: 2.219, Train Acc: 0.967, Test Loss: 1.509, Test Acc: 0.662, lr: 0.10000[0m


Epoch 95/240
2023-11-19 06:31:27 [141/141] - 450.28ms/step - nd_loss: 0.988 - kd_loss: 1.446 - cls_loss: 0.116 - train_loss: 2.550 - train_acc: 0.950


19 Nov 2023 06:31:31 [line:175] [32mTrain Loss: 2.163, Train Acc: 0.972, Test Loss: 1.700, Test Acc: 0.645, lr: 0.10000[0m


Epoch 96/240
2023-11-19 06:33:52 [141/141] - 451.13ms/step - nd_loss: 0.988 - kd_loss: 1.443 - cls_loss: 0.337 - train_loss: 2.767 - train_acc: 0.875


19 Nov 2023 06:33:56 [line:175] [32mTrain Loss: 2.310, Train Acc: 0.962, Test Loss: 1.575, Test Acc: 0.620, lr: 0.10000[0m


Epoch 97/240
2023-11-19 06:36:16 [141/141] - 450.88ms/step - nd_loss: 0.988 - kd_loss: 1.579 - cls_loss: 0.184 - train_loss: 2.752 - train_acc: 0.925


19 Nov 2023 06:36:20 [line:175] [32mTrain Loss: 2.404, Train Acc: 0.958, Test Loss: 1.596, Test Acc: 0.660, lr: 0.10000[0m


Epoch 98/240
2023-11-19 06:38:41 [141/141] - 451.88ms/step - nd_loss: 0.989 - kd_loss: 1.090 - cls_loss: 0.128 - train_loss: 2.207 - train_acc: 0.975


19 Nov 2023 06:38:45 [line:175] [32mTrain Loss: 2.574, Train Acc: 0.942, Test Loss: 1.676, Test Acc: 0.641, lr: 0.10000[0m


Epoch 99/240
2023-11-19 06:41:06 [141/141] - 450.81ms/step - nd_loss: 0.988 - kd_loss: 1.522 - cls_loss: 0.219 - train_loss: 2.729 - train_acc: 0.875


19 Nov 2023 06:41:10 [line:175] [32mTrain Loss: 2.545, Train Acc: 0.944, Test Loss: 1.435, Test Acc: 0.656, lr: 0.10000[0m


Epoch 100/240
2023-11-19 06:43:30 [141/141] - 450.84ms/step - nd_loss: 0.988 - kd_loss: 1.292 - cls_loss: 0.149 - train_loss: 2.429 - train_acc: 0.950


19 Nov 2023 06:43:34 [line:175] [32mTrain Loss: 2.321, Train Acc: 0.964, Test Loss: 1.423, Test Acc: 0.676, lr: 0.10000[0m


Epoch 101/240
2023-11-19 06:45:55 [141/141] - 450.81ms/step - nd_loss: 0.989 - kd_loss: 1.276 - cls_loss: 0.140 - train_loss: 2.405 - train_acc: 0.975


19 Nov 2023 06:45:59 [line:175] [32mTrain Loss: 2.145, Train Acc: 0.977, Test Loss: 1.352, Test Acc: 0.653, lr: 0.10000[0m


Epoch 102/240
2023-11-19 06:48:19 [141/141] - 451.91ms/step - nd_loss: 0.988 - kd_loss: 1.127 - cls_loss: 0.172 - train_loss: 2.286 - train_acc: 0.925


19 Nov 2023 06:48:24 [line:175] [32mTrain Loss: 2.187, Train Acc: 0.970, Test Loss: 1.377, Test Acc: 0.665, lr: 0.10000[0m


Epoch 103/240
2023-11-19 06:50:44 [141/141] - 451.33ms/step - nd_loss: 0.988 - kd_loss: 1.175 - cls_loss: 0.169 - train_loss: 2.332 - train_acc: 0.950


19 Nov 2023 06:50:48 [line:175] [32mTrain Loss: 2.198, Train Acc: 0.968, Test Loss: 1.693, Test Acc: 0.639, lr: 0.10000[0m


Epoch 104/240
2023-11-19 06:53:09 [141/141] - 450.07ms/step - nd_loss: 0.988 - kd_loss: 1.106 - cls_loss: 0.108 - train_loss: 2.202 - train_acc: 0.975


19 Nov 2023 06:53:13 [line:175] [32mTrain Loss: 2.352, Train Acc: 0.955, Test Loss: 1.815, Test Acc: 0.635, lr: 0.10000[0m


Epoch 105/240
2023-11-19 06:55:33 [141/141] - 452.18ms/step - nd_loss: 0.988 - kd_loss: 1.048 - cls_loss: 0.072 - train_loss: 2.108 - train_acc: 1.000


19 Nov 2023 06:55:38 [line:175] [32mTrain Loss: 2.403, Train Acc: 0.953, Test Loss: 1.813, Test Acc: 0.611, lr: 0.10000[0m


Epoch 106/240
2023-11-19 06:57:58 [141/141] - 450.73ms/step - nd_loss: 0.989 - kd_loss: 1.315 - cls_loss: 0.222 - train_loss: 2.525 - train_acc: 0.900


19 Nov 2023 06:58:02 [line:175] [32mTrain Loss: 2.397, Train Acc: 0.949, Test Loss: 1.564, Test Acc: 0.641, lr: 0.10000[0m


Epoch 107/240
2023-11-19 07:00:23 [141/141] - 451.02ms/step - nd_loss: 0.988 - kd_loss: 1.133 - cls_loss: 0.058 - train_loss: 2.178 - train_acc: 0.975


19 Nov 2023 07:00:27 [line:175] [32mTrain Loss: 2.377, Train Acc: 0.960, Test Loss: 1.445, Test Acc: 0.654, lr: 0.10000[0m


Epoch 108/240
2023-11-19 07:02:47 [141/141] - 450.55ms/step - nd_loss: 0.988 - kd_loss: 0.893 - cls_loss: 0.115 - train_loss: 1.996 - train_acc: 0.975


19 Nov 2023 07:02:52 [line:175] [32mTrain Loss: 2.159, Train Acc: 0.972, Test Loss: 1.361, Test Acc: 0.681, lr: 0.10000[0m


Epoch 109/240
2023-11-19 07:05:12 [141/141] - 451.42ms/step - nd_loss: 0.988 - kd_loss: 1.368 - cls_loss: 0.289 - train_loss: 2.644 - train_acc: 0.925


19 Nov 2023 07:05:16 [line:175] [32mTrain Loss: 2.185, Train Acc: 0.971, Test Loss: 1.408, Test Acc: 0.678, lr: 0.10000[0m


Epoch 110/240
2023-11-19 07:07:37 [141/141] - 450.91ms/step - nd_loss: 0.987 - kd_loss: 1.182 - cls_loss: 0.064 - train_loss: 2.233 - train_acc: 1.000


19 Nov 2023 07:07:41 [line:175] [32mTrain Loss: 2.123, Train Acc: 0.975, Test Loss: 1.331, Test Acc: 0.690, lr: 0.10000[0m


Epoch 111/240
2023-11-19 07:10:01 [141/141] - 451.47ms/step - nd_loss: 0.988 - kd_loss: 1.091 - cls_loss: 0.109 - train_loss: 2.188 - train_acc: 0.975


19 Nov 2023 07:10:06 [line:175] [32mTrain Loss: 2.144, Train Acc: 0.973, Test Loss: 1.544, Test Acc: 0.665, lr: 0.10000[0m


Epoch 112/240
2023-11-19 07:12:26 [141/141] - 451.64ms/step - nd_loss: 0.988 - kd_loss: 1.543 - cls_loss: 0.181 - train_loss: 2.712 - train_acc: 0.975


19 Nov 2023 07:12:30 [line:175] [32mTrain Loss: 2.237, Train Acc: 0.966, Test Loss: 1.527, Test Acc: 0.658, lr: 0.10000[0m


Epoch 113/240
2023-11-19 07:14:51 [141/141] - 451.06ms/step - nd_loss: 0.987 - kd_loss: 1.693 - cls_loss: 0.196 - train_loss: 2.876 - train_acc: 0.925


19 Nov 2023 07:14:55 [line:175] [32mTrain Loss: 2.353, Train Acc: 0.959, Test Loss: 1.448, Test Acc: 0.659, lr: 0.10000[0m


Epoch 114/240
2023-11-19 07:17:15 [141/141] - 451.41ms/step - nd_loss: 0.988 - kd_loss: 1.436 - cls_loss: 0.235 - train_loss: 2.659 - train_acc: 0.925


19 Nov 2023 07:17:20 [line:175] [32mTrain Loss: 2.250, Train Acc: 0.963, Test Loss: 1.520, Test Acc: 0.645, lr: 0.10000[0m


Epoch 115/240
2023-11-19 07:19:40 [141/141] - 451.47ms/step - nd_loss: 0.987 - kd_loss: 1.298 - cls_loss: 0.257 - train_loss: 2.543 - train_acc: 0.875


19 Nov 2023 07:19:44 [line:175] [32mTrain Loss: 2.217, Train Acc: 0.967, Test Loss: 1.405, Test Acc: 0.665, lr: 0.10000[0m


Epoch 116/240
2023-11-19 07:22:05 [141/141] - 451.88ms/step - nd_loss: 0.988 - kd_loss: 0.980 - cls_loss: 0.099 - train_loss: 2.067 - train_acc: 0.975


19 Nov 2023 07:22:09 [line:175] [32mTrain Loss: 2.147, Train Acc: 0.970, Test Loss: 1.511, Test Acc: 0.658, lr: 0.10000[0m


Epoch 117/240
2023-11-19 07:24:29 [141/141] - 452.36ms/step - nd_loss: 0.986 - kd_loss: 0.875 - cls_loss: 0.093 - train_loss: 1.954 - train_acc: 0.975


19 Nov 2023 07:24:33 [line:175] [32mTrain Loss: 2.112, Train Acc: 0.974, Test Loss: 1.488, Test Acc: 0.675, lr: 0.10000[0m


Epoch 118/240
2023-11-19 07:26:54 [141/141] - 451.53ms/step - nd_loss: 0.987 - kd_loss: 0.983 - cls_loss: 0.102 - train_loss: 2.072 - train_acc: 0.950


19 Nov 2023 07:26:58 [line:175] [32mTrain Loss: 2.135, Train Acc: 0.974, Test Loss: 1.316, Test Acc: 0.680, lr: 0.10000[0m


Epoch 119/240
2023-11-19 07:29:19 [141/141] - 452.76ms/step - nd_loss: 0.987 - kd_loss: 1.459 - cls_loss: 0.103 - train_loss: 2.548 - train_acc: 0.975


19 Nov 2023 07:29:23 [line:175] [32mTrain Loss: 2.182, Train Acc: 0.970, Test Loss: 1.588, Test Acc: 0.636, lr: 0.10000[0m


Epoch 120/240
2023-11-19 07:31:43 [141/141] - 450.39ms/step - nd_loss: 0.988 - kd_loss: 1.696 - cls_loss: 0.212 - train_loss: 2.895 - train_acc: 0.925


19 Nov 2023 07:31:47 [line:175] [32mTrain Loss: 2.126, Train Acc: 0.971, Test Loss: 1.428, Test Acc: 0.680, lr: 0.10000[0m


Epoch 121/240
2023-11-19 07:34:08 [141/141] - 450.50ms/step - nd_loss: 0.987 - kd_loss: 1.185 - cls_loss: 0.066 - train_loss: 2.237 - train_acc: 0.975


19 Nov 2023 07:34:12 [line:175] [32mTrain Loss: 2.241, Train Acc: 0.965, Test Loss: 1.375, Test Acc: 0.680, lr: 0.10000[0m


Epoch 122/240
2023-11-19 07:36:33 [141/141] - 450.55ms/step - nd_loss: 0.988 - kd_loss: 1.139 - cls_loss: 0.062 - train_loss: 2.190 - train_acc: 1.000


19 Nov 2023 07:36:37 [line:175] [32mTrain Loss: 2.164, Train Acc: 0.971, Test Loss: 1.380, Test Acc: 0.669, lr: 0.10000[0m


Epoch 123/240
2023-11-19 07:38:57 [141/141] - 451.21ms/step - nd_loss: 0.987 - kd_loss: 1.222 - cls_loss: 0.172 - train_loss: 2.382 - train_acc: 0.950


19 Nov 2023 07:39:01 [line:175] [32mTrain Loss: 2.130, Train Acc: 0.971, Test Loss: 1.594, Test Acc: 0.649, lr: 0.10000[0m


Epoch 124/240
2023-11-19 07:41:22 [141/141] - 451.92ms/step - nd_loss: 0.987 - kd_loss: 1.277 - cls_loss: 0.175 - train_loss: 2.439 - train_acc: 0.900


19 Nov 2023 07:41:26 [line:175] [32mTrain Loss: 2.255, Train Acc: 0.969, Test Loss: 1.474, Test Acc: 0.661, lr: 0.10000[0m


Epoch 125/240
2023-11-19 07:43:47 [141/141] - 450.42ms/step - nd_loss: 0.987 - kd_loss: 1.435 - cls_loss: 0.178 - train_loss: 2.600 - train_acc: 0.925


19 Nov 2023 07:43:51 [line:175] [32mTrain Loss: 2.267, Train Acc: 0.965, Test Loss: 1.890, Test Acc: 0.626, lr: 0.10000[0m


Epoch 126/240
2023-11-19 07:46:11 [141/141] - 451.39ms/step - nd_loss: 0.987 - kd_loss: 1.080 - cls_loss: 0.079 - train_loss: 2.146 - train_acc: 0.975


19 Nov 2023 07:46:15 [line:175] [32mTrain Loss: 2.261, Train Acc: 0.963, Test Loss: 1.500, Test Acc: 0.653, lr: 0.10000[0m


Epoch 127/240
2023-11-19 07:48:36 [141/141] - 451.62ms/step - nd_loss: 0.987 - kd_loss: 1.253 - cls_loss: 0.158 - train_loss: 2.398 - train_acc: 0.950


19 Nov 2023 07:48:40 [line:175] [32mTrain Loss: 2.209, Train Acc: 0.969, Test Loss: 1.445, Test Acc: 0.656, lr: 0.10000[0m


Epoch 128/240
2023-11-19 07:51:01 [141/141] - 451.86ms/step - nd_loss: 0.986 - kd_loss: 0.864 - cls_loss: 0.036 - train_loss: 1.886 - train_acc: 1.000


19 Nov 2023 07:51:05 [line:175] [32mTrain Loss: 2.114, Train Acc: 0.977, Test Loss: 1.403, Test Acc: 0.677, lr: 0.10000[0m


Epoch 129/240
2023-11-19 07:53:25 [141/141] - 450.87ms/step - nd_loss: 0.987 - kd_loss: 0.945 - cls_loss: 0.045 - train_loss: 1.977 - train_acc: 1.000


19 Nov 2023 07:53:29 [line:175] [32mTrain Loss: 1.982, Train Acc: 0.980, Test Loss: 1.261, Test Acc: 0.693, lr: 0.10000[0m


Epoch 130/240
2023-11-19 07:55:50 [141/141] - 451.68ms/step - nd_loss: 0.986 - kd_loss: 0.823 - cls_loss: 0.098 - train_loss: 1.907 - train_acc: 1.000


19 Nov 2023 07:55:54 [line:175] [32mTrain Loss: 2.020, Train Acc: 0.977, Test Loss: 1.317, Test Acc: 0.675, lr: 0.10000[0m


Epoch 131/240
2023-11-19 07:58:14 [141/141] - 450.39ms/step - nd_loss: 0.987 - kd_loss: 1.789 - cls_loss: 0.207 - train_loss: 2.983 - train_acc: 0.925


19 Nov 2023 07:58:19 [line:175] [32mTrain Loss: 2.157, Train Acc: 0.969, Test Loss: 1.894, Test Acc: 0.624, lr: 0.10000[0m


Epoch 132/240
2023-11-19 08:00:39 [141/141] - 450.60ms/step - nd_loss: 0.987 - kd_loss: 1.299 - cls_loss: 0.077 - train_loss: 2.362 - train_acc: 0.975


19 Nov 2023 08:00:43 [line:175] [32mTrain Loss: 2.322, Train Acc: 0.960, Test Loss: 1.840, Test Acc: 0.615, lr: 0.10000[0m


Epoch 133/240
2023-11-19 08:03:04 [141/141] - 451.83ms/step - nd_loss: 0.987 - kd_loss: 1.089 - cls_loss: 0.146 - train_loss: 2.222 - train_acc: 0.950


19 Nov 2023 08:03:08 [line:175] [32mTrain Loss: 2.409, Train Acc: 0.954, Test Loss: 1.761, Test Acc: 0.635, lr: 0.10000[0m


Epoch 134/240
2023-11-19 08:05:28 [141/141] - 450.29ms/step - nd_loss: 0.987 - kd_loss: 1.306 - cls_loss: 0.068 - train_loss: 2.361 - train_acc: 1.000


19 Nov 2023 08:05:33 [line:175] [32mTrain Loss: 2.211, Train Acc: 0.969, Test Loss: 1.430, Test Acc: 0.674, lr: 0.10000[0m


Epoch 135/240
2023-11-19 08:07:53 [141/141] - 451.21ms/step - nd_loss: 0.987 - kd_loss: 0.829 - cls_loss: 0.064 - train_loss: 1.880 - train_acc: 0.975


19 Nov 2023 08:07:57 [line:175] [32mTrain Loss: 2.059, Train Acc: 0.976, Test Loss: 1.335, Test Acc: 0.678, lr: 0.10000[0m


Epoch 136/240
2023-11-19 08:10:18 [141/141] - 450.96ms/step - nd_loss: 0.986 - kd_loss: 0.727 - cls_loss: 0.044 - train_loss: 1.758 - train_acc: 1.000


19 Nov 2023 08:10:22 [line:175] [32mTrain Loss: 1.915, Train Acc: 0.986, Test Loss: 1.617, Test Acc: 0.641, lr: 0.10000[0m


Epoch 137/240
2023-11-19 08:12:42 [141/141] - 450.28ms/step - nd_loss: 0.986 - kd_loss: 0.770 - cls_loss: 0.131 - train_loss: 1.887 - train_acc: 0.950


19 Nov 2023 08:12:47 [line:175] [32mTrain Loss: 1.887, Train Acc: 0.985, Test Loss: 1.539, Test Acc: 0.643, lr: 0.10000[0m


Epoch 138/240
2023-11-19 08:15:07 [141/141] - 450.27ms/step - nd_loss: 0.987 - kd_loss: 1.099 - cls_loss: 0.068 - train_loss: 2.155 - train_acc: 1.000


19 Nov 2023 08:15:11 [line:175] [32mTrain Loss: 2.092, Train Acc: 0.973, Test Loss: 1.343, Test Acc: 0.684, lr: 0.10000[0m


Epoch 139/240
2023-11-19 08:17:32 [141/141] - 451.84ms/step - nd_loss: 0.987 - kd_loss: 1.756 - cls_loss: 0.182 - train_loss: 2.925 - train_acc: 0.950


19 Nov 2023 08:17:36 [line:175] [32mTrain Loss: 2.436, Train Acc: 0.945, Test Loss: 1.522, Test Acc: 0.653, lr: 0.10000[0m


Epoch 140/240
2023-11-19 08:19:56 [141/141] - 451.37ms/step - nd_loss: 0.987 - kd_loss: 1.134 - cls_loss: 0.050 - train_loss: 2.171 - train_acc: 1.000


19 Nov 2023 08:20:01 [line:175] [32mTrain Loss: 2.414, Train Acc: 0.954, Test Loss: 1.322, Test Acc: 0.667, lr: 0.10000[0m


Epoch 141/240
2023-11-19 08:22:21 [141/141] - 450.70ms/step - nd_loss: 0.987 - kd_loss: 0.970 - cls_loss: 0.076 - train_loss: 2.034 - train_acc: 1.000


19 Nov 2023 08:22:25 [line:175] [32mTrain Loss: 2.247, Train Acc: 0.964, Test Loss: 1.461, Test Acc: 0.670, lr: 0.10000[0m


Epoch 142/240
2023-11-19 08:24:46 [141/141] - 451.47ms/step - nd_loss: 0.988 - kd_loss: 1.154 - cls_loss: 0.146 - train_loss: 2.288 - train_acc: 0.950


19 Nov 2023 08:24:50 [line:175] [32mTrain Loss: 2.043, Train Acc: 0.977, Test Loss: 1.525, Test Acc: 0.621, lr: 0.10000[0m


Epoch 143/240
2023-11-19 08:27:10 [141/141] - 451.81ms/step - nd_loss: 0.986 - kd_loss: 1.132 - cls_loss: 0.122 - train_loss: 2.240 - train_acc: 0.975


19 Nov 2023 08:27:14 [line:175] [32mTrain Loss: 1.916, Train Acc: 0.986, Test Loss: 1.281, Test Acc: 0.689, lr: 0.10000[0m


Epoch 144/240
2023-11-19 08:29:35 [141/141] - 451.39ms/step - nd_loss: 0.986 - kd_loss: 0.801 - cls_loss: 0.048 - train_loss: 1.836 - train_acc: 1.000


19 Nov 2023 08:29:39 [line:175] [32mTrain Loss: 1.899, Train Acc: 0.987, Test Loss: 1.278, Test Acc: 0.675, lr: 0.10000[0m


Epoch 145/240
2023-11-19 08:32:00 [141/141] - 451.29ms/step - nd_loss: 0.986 - kd_loss: 0.894 - cls_loss: 0.121 - train_loss: 2.000 - train_acc: 0.950


19 Nov 2023 08:32:04 [line:175] [32mTrain Loss: 1.918, Train Acc: 0.983, Test Loss: 1.510, Test Acc: 0.655, lr: 0.10000[0m


Epoch 146/240
2023-11-19 08:34:24 [141/141] - 451.18ms/step - nd_loss: 0.986 - kd_loss: 1.154 - cls_loss: 0.131 - train_loss: 2.271 - train_acc: 0.950


19 Nov 2023 08:34:28 [line:175] [32mTrain Loss: 2.045, Train Acc: 0.972, Test Loss: 1.567, Test Acc: 0.653, lr: 0.10000[0m


Epoch 147/240
2023-11-19 08:36:49 [141/141] - 450.43ms/step - nd_loss: 0.986 - kd_loss: 1.274 - cls_loss: 0.159 - train_loss: 2.419 - train_acc: 0.925


19 Nov 2023 08:36:53 [line:175] [32mTrain Loss: 2.306, Train Acc: 0.960, Test Loss: 1.860, Test Acc: 0.633, lr: 0.10000[0m


Epoch 148/240
2023-11-19 08:39:14 [141/141] - 453.05ms/step - nd_loss: 0.987 - kd_loss: 1.192 - cls_loss: 0.220 - train_loss: 2.399 - train_acc: 0.925


19 Nov 2023 08:39:18 [line:175] [32mTrain Loss: 2.341, Train Acc: 0.955, Test Loss: 1.433, Test Acc: 0.663, lr: 0.10000[0m


Epoch 149/240
2023-11-19 08:41:38 [141/141] - 451.15ms/step - nd_loss: 0.987 - kd_loss: 0.877 - cls_loss: 0.100 - train_loss: 1.964 - train_acc: 0.975


19 Nov 2023 08:41:42 [line:175] [32mTrain Loss: 2.123, Train Acc: 0.972, Test Loss: 1.323, Test Acc: 0.676, lr: 0.10000[0m


Epoch 150/240
2023-11-19 08:44:03 [141/141] - 451.49ms/step - nd_loss: 0.986 - kd_loss: 0.933 - cls_loss: 0.068 - train_loss: 1.988 - train_acc: 1.000


19 Nov 2023 08:44:07 [line:175] [32mTrain Loss: 2.000, Train Acc: 0.979, Test Loss: 1.595, Test Acc: 0.643, lr: 0.10000[0m


Epoch 151/240
2023-11-19 08:46:28 [141/141] - 450.34ms/step - nd_loss: 0.986 - kd_loss: 0.547 - cls_loss: 0.041 - train_loss: 1.575 - train_acc: 0.975


19 Nov 2023 08:46:32 [line:175] [32mTrain Loss: 1.621, Train Acc: 0.996, Test Loss: 1.179, Test Acc: 0.704, lr: 0.01000[0m


Epoch 152/240
2023-11-19 08:48:52 [141/141] - 450.57ms/step - nd_loss: 0.988 - kd_loss: 0.604 - cls_loss: 0.006 - train_loss: 1.597 - train_acc: 1.000


19 Nov 2023 08:48:56 [line:175] [32mTrain Loss: 1.482, Train Acc: 0.999, Test Loss: 1.169, Test Acc: 0.701, lr: 0.01000[0m


Epoch 153/240
2023-11-19 08:51:17 [141/141] - 451.04ms/step - nd_loss: 0.987 - kd_loss: 0.459 - cls_loss: 0.059 - train_loss: 1.505 - train_acc: 1.000


19 Nov 2023 08:51:21 [line:175] [32mTrain Loss: 1.423, Train Acc: 0.999, Test Loss: 1.181, Test Acc: 0.696, lr: 0.01000[0m


Epoch 154/240
2023-11-19 08:53:41 [141/141] - 451.40ms/step - nd_loss: 0.986 - kd_loss: 0.367 - cls_loss: 0.010 - train_loss: 1.363 - train_acc: 1.000


19 Nov 2023 08:53:46 [line:175] [32mTrain Loss: 1.385, Train Acc: 0.999, Test Loss: 1.185, Test Acc: 0.700, lr: 0.01000[0m


Epoch 155/240
2023-11-19 08:56:06 [141/141] - 450.82ms/step - nd_loss: 0.986 - kd_loss: 0.421 - cls_loss: 0.013 - train_loss: 1.419 - train_acc: 1.000


19 Nov 2023 08:56:10 [line:175] [32mTrain Loss: 1.371, Train Acc: 0.999, Test Loss: 1.170, Test Acc: 0.704, lr: 0.01000[0m


Epoch 156/240
2023-11-19 08:58:31 [141/141] - 451.95ms/step - nd_loss: 0.985 - kd_loss: 0.368 - cls_loss: 0.015 - train_loss: 1.368 - train_acc: 1.000


19 Nov 2023 08:58:35 [line:175] [32mTrain Loss: 1.353, Train Acc: 1.000, Test Loss: 1.160, Test Acc: 0.702, lr: 0.01000[0m


Epoch 157/240
2023-11-19 09:00:55 [141/141] - 451.83ms/step - nd_loss: 0.985 - kd_loss: 0.370 - cls_loss: 0.060 - train_loss: 1.416 - train_acc: 0.975


19 Nov 2023 09:00:59 [line:175] [32mTrain Loss: 1.337, Train Acc: 1.000, Test Loss: 1.189, Test Acc: 0.698, lr: 0.01000[0m


Epoch 158/240
2023-11-19 09:03:20 [141/141] - 451.41ms/step - nd_loss: 0.986 - kd_loss: 0.353 - cls_loss: 0.009 - train_loss: 1.347 - train_acc: 1.000


19 Nov 2023 09:03:24 [line:175] [32mTrain Loss: 1.318, Train Acc: 0.999, Test Loss: 1.172, Test Acc: 0.701, lr: 0.01000[0m


Epoch 159/240
2023-11-19 09:05:45 [141/141] - 450.50ms/step - nd_loss: 0.985 - kd_loss: 0.338 - cls_loss: 0.013 - train_loss: 1.336 - train_acc: 1.000


19 Nov 2023 09:05:49 [line:175] [32mTrain Loss: 1.312, Train Acc: 1.000, Test Loss: 1.164, Test Acc: 0.701, lr: 0.01000[0m


Epoch 160/240
2023-11-19 09:08:09 [141/141] - 451.55ms/step - nd_loss: 0.985 - kd_loss: 0.318 - cls_loss: 0.019 - train_loss: 1.321 - train_acc: 1.000


19 Nov 2023 09:08:13 [line:175] [32mTrain Loss: 1.307, Train Acc: 1.000, Test Loss: 1.180, Test Acc: 0.700, lr: 0.01000[0m


Epoch 161/240
2023-11-19 09:10:34 [141/141] - 451.29ms/step - nd_loss: 0.984 - kd_loss: 0.329 - cls_loss: 0.015 - train_loss: 1.329 - train_acc: 1.000


19 Nov 2023 09:10:38 [line:175] [32mTrain Loss: 1.298, Train Acc: 1.000, Test Loss: 1.164, Test Acc: 0.698, lr: 0.01000[0m


Epoch 162/240
2023-11-19 09:12:59 [141/141] - 451.64ms/step - nd_loss: 0.984 - kd_loss: 0.296 - cls_loss: 0.017 - train_loss: 1.297 - train_acc: 1.000


19 Nov 2023 09:13:03 [line:175] [32mTrain Loss: 1.287, Train Acc: 0.999, Test Loss: 1.164, Test Acc: 0.702, lr: 0.01000[0m


Epoch 163/240
2023-11-19 09:15:23 [141/141] - 451.52ms/step - nd_loss: 0.984 - kd_loss: 0.311 - cls_loss: 0.020 - train_loss: 1.315 - train_acc: 1.000


19 Nov 2023 09:15:27 [line:175] [32mTrain Loss: 1.277, Train Acc: 1.000, Test Loss: 1.183, Test Acc: 0.698, lr: 0.01000[0m


Epoch 164/240
2023-11-19 09:17:48 [141/141] - 451.15ms/step - nd_loss: 0.985 - kd_loss: 0.331 - cls_loss: 0.017 - train_loss: 1.332 - train_acc: 1.000


19 Nov 2023 09:17:52 [line:175] [32mTrain Loss: 1.269, Train Acc: 1.000, Test Loss: 1.170, Test Acc: 0.700, lr: 0.01000[0m


Epoch 165/240
2023-11-19 09:20:13 [141/141] - 450.63ms/step - nd_loss: 0.984 - kd_loss: 0.263 - cls_loss: 0.015 - train_loss: 1.262 - train_acc: 1.000


19 Nov 2023 09:20:17 [line:175] [32mTrain Loss: 1.263, Train Acc: 1.000, Test Loss: 1.171, Test Acc: 0.698, lr: 0.01000[0m


Epoch 166/240
2023-11-19 09:22:37 [141/141] - 451.53ms/step - nd_loss: 0.984 - kd_loss: 0.248 - cls_loss: 0.033 - train_loss: 1.265 - train_acc: 0.975


19 Nov 2023 09:22:41 [line:175] [32mTrain Loss: 1.264, Train Acc: 0.999, Test Loss: 1.167, Test Acc: 0.701, lr: 0.01000[0m


Epoch 167/240
2023-11-19 09:25:02 [141/141] - 450.66ms/step - nd_loss: 0.983 - kd_loss: 0.430 - cls_loss: 0.017 - train_loss: 1.431 - train_acc: 1.000


19 Nov 2023 09:25:06 [line:175] [32mTrain Loss: 1.256, Train Acc: 1.000, Test Loss: 1.186, Test Acc: 0.703, lr: 0.01000[0m


Epoch 168/240
2023-11-19 09:27:27 [141/141] - 452.06ms/step - nd_loss: 0.983 - kd_loss: 0.280 - cls_loss: 0.017 - train_loss: 1.280 - train_acc: 1.000


19 Nov 2023 09:27:31 [line:175] [32mTrain Loss: 1.250, Train Acc: 1.000, Test Loss: 1.175, Test Acc: 0.698, lr: 0.01000[0m


Epoch 169/240
2023-11-19 09:29:51 [141/141] - 451.63ms/step - nd_loss: 0.982 - kd_loss: 0.291 - cls_loss: 0.015 - train_loss: 1.288 - train_acc: 1.000


19 Nov 2023 09:29:55 [line:175] [32mTrain Loss: 1.242, Train Acc: 0.999, Test Loss: 1.197, Test Acc: 0.699, lr: 0.01000[0m


Epoch 170/240
2023-11-19 09:32:16 [141/141] - 451.25ms/step - nd_loss: 0.982 - kd_loss: 0.276 - cls_loss: 0.019 - train_loss: 1.277 - train_acc: 1.000


19 Nov 2023 09:32:20 [line:175] [32mTrain Loss: 1.237, Train Acc: 0.999, Test Loss: 1.189, Test Acc: 0.700, lr: 0.01000[0m


Epoch 171/240
2023-11-19 09:34:41 [141/141] - 450.78ms/step - nd_loss: 0.982 - kd_loss: 0.279 - cls_loss: 0.008 - train_loss: 1.270 - train_acc: 1.000


19 Nov 2023 09:34:45 [line:175] [32mTrain Loss: 1.233, Train Acc: 1.000, Test Loss: 1.176, Test Acc: 0.698, lr: 0.01000[0m


Epoch 172/240
2023-11-19 09:37:05 [141/141] - 450.57ms/step - nd_loss: 0.982 - kd_loss: 0.278 - cls_loss: 0.025 - train_loss: 1.285 - train_acc: 1.000


19 Nov 2023 09:37:09 [line:175] [32mTrain Loss: 1.237, Train Acc: 0.999, Test Loss: 1.189, Test Acc: 0.698, lr: 0.01000[0m


Epoch 173/240
2023-11-19 09:39:30 [141/141] - 451.86ms/step - nd_loss: 0.982 - kd_loss: 0.372 - cls_loss: 0.022 - train_loss: 1.375 - train_acc: 1.000


19 Nov 2023 09:39:34 [line:175] [32mTrain Loss: 1.228, Train Acc: 1.000, Test Loss: 1.196, Test Acc: 0.699, lr: 0.01000[0m


Epoch 174/240
2023-11-19 09:41:55 [141/141] - 451.82ms/step - nd_loss: 0.982 - kd_loss: 0.233 - cls_loss: 0.015 - train_loss: 1.230 - train_acc: 1.000


19 Nov 2023 09:41:59 [line:175] [32mTrain Loss: 1.219, Train Acc: 1.000, Test Loss: 1.192, Test Acc: 0.699, lr: 0.01000[0m


Epoch 175/240
2023-11-19 09:44:19 [141/141] - 451.60ms/step - nd_loss: 0.981 - kd_loss: 0.240 - cls_loss: 0.013 - train_loss: 1.234 - train_acc: 1.000


19 Nov 2023 09:44:23 [line:175] [32mTrain Loss: 1.218, Train Acc: 0.999, Test Loss: 1.189, Test Acc: 0.698, lr: 0.01000[0m


Epoch 176/240
2023-11-19 09:46:44 [141/141] - 450.49ms/step - nd_loss: 0.982 - kd_loss: 0.280 - cls_loss: 0.017 - train_loss: 1.279 - train_acc: 1.000


19 Nov 2023 09:46:48 [line:175] [32mTrain Loss: 1.211, Train Acc: 1.000, Test Loss: 1.196, Test Acc: 0.699, lr: 0.01000[0m


Epoch 177/240
2023-11-19 09:49:09 [141/141] - 451.34ms/step - nd_loss: 0.980 - kd_loss: 0.208 - cls_loss: 0.008 - train_loss: 1.197 - train_acc: 1.000


19 Nov 2023 09:49:13 [line:175] [32mTrain Loss: 1.214, Train Acc: 0.999, Test Loss: 1.187, Test Acc: 0.695, lr: 0.01000[0m


Epoch 178/240
2023-11-19 09:51:33 [141/141] - 450.90ms/step - nd_loss: 0.982 - kd_loss: 0.231 - cls_loss: 0.009 - train_loss: 1.221 - train_acc: 1.000


19 Nov 2023 09:51:37 [line:175] [32mTrain Loss: 1.210, Train Acc: 1.000, Test Loss: 1.174, Test Acc: 0.701, lr: 0.01000[0m


Epoch 179/240
2023-11-19 09:53:58 [141/141] - 450.24ms/step - nd_loss: 0.980 - kd_loss: 0.238 - cls_loss: 0.021 - train_loss: 1.239 - train_acc: 1.000


19 Nov 2023 09:54:02 [line:175] [32mTrain Loss: 1.207, Train Acc: 0.999, Test Loss: 1.178, Test Acc: 0.697, lr: 0.01000[0m


Epoch 180/240
2023-11-19 09:56:23 [141/141] - 451.69ms/step - nd_loss: 0.980 - kd_loss: 0.250 - cls_loss: 0.015 - train_loss: 1.245 - train_acc: 1.000


19 Nov 2023 09:56:27 [line:175] [32mTrain Loss: 1.199, Train Acc: 1.000, Test Loss: 1.189, Test Acc: 0.701, lr: 0.01000[0m


Epoch 181/240
2023-11-19 09:58:47 [141/141] - 451.49ms/step - nd_loss: 0.981 - kd_loss: 0.234 - cls_loss: 0.019 - train_loss: 1.234 - train_acc: 1.000


19 Nov 2023 09:58:51 [line:175] [32mTrain Loss: 1.186, Train Acc: 0.999, Test Loss: 1.187, Test Acc: 0.697, lr: 0.00100[0m


Epoch 182/240
2023-11-19 10:01:12 [141/141] - 450.70ms/step - nd_loss: 0.981 - kd_loss: 0.176 - cls_loss: 0.011 - train_loss: 1.167 - train_acc: 1.000


19 Nov 2023 10:01:16 [line:175] [32mTrain Loss: 1.184, Train Acc: 1.000, Test Loss: 1.183, Test Acc: 0.699, lr: 0.00100[0m


Epoch 183/240
2023-11-19 10:03:36 [141/141] - 450.87ms/step - nd_loss: 0.980 - kd_loss: 0.187 - cls_loss: 0.015 - train_loss: 1.182 - train_acc: 1.000


19 Nov 2023 10:03:41 [line:175] [32mTrain Loss: 1.178, Train Acc: 1.000, Test Loss: 1.185, Test Acc: 0.702, lr: 0.00100[0m


Epoch 184/240
2023-11-19 10:06:01 [141/141] - 451.06ms/step - nd_loss: 0.980 - kd_loss: 0.198 - cls_loss: 0.008 - train_loss: 1.186 - train_acc: 1.000


19 Nov 2023 10:06:05 [line:175] [32mTrain Loss: 1.178, Train Acc: 1.000, Test Loss: 1.190, Test Acc: 0.699, lr: 0.00100[0m


Epoch 185/240
2023-11-19 10:08:26 [141/141] - 450.41ms/step - nd_loss: 0.980 - kd_loss: 0.197 - cls_loss: 0.021 - train_loss: 1.199 - train_acc: 1.000


19 Nov 2023 10:08:30 [line:175] [32mTrain Loss: 1.178, Train Acc: 0.999, Test Loss: 1.187, Test Acc: 0.700, lr: 0.00100[0m


Epoch 186/240
2023-11-19 10:10:50 [141/141] - 452.32ms/step - nd_loss: 0.979 - kd_loss: 0.209 - cls_loss: 0.014 - train_loss: 1.203 - train_acc: 1.000


19 Nov 2023 10:10:55 [line:175] [32mTrain Loss: 1.182, Train Acc: 1.000, Test Loss: 1.198, Test Acc: 0.700, lr: 0.00100[0m


Epoch 187/240
2023-11-19 10:13:15 [141/141] - 450.99ms/step - nd_loss: 0.980 - kd_loss: 0.191 - cls_loss: 0.018 - train_loss: 1.189 - train_acc: 1.000


19 Nov 2023 10:13:19 [line:175] [32mTrain Loss: 1.176, Train Acc: 1.000, Test Loss: 1.189, Test Acc: 0.700, lr: 0.00100[0m


Epoch 188/240
2023-11-19 10:15:40 [141/141] - 450.71ms/step - nd_loss: 0.980 - kd_loss: 0.153 - cls_loss: 0.015 - train_loss: 1.148 - train_acc: 1.000


19 Nov 2023 10:15:44 [line:175] [32mTrain Loss: 1.181, Train Acc: 1.000, Test Loss: 1.178, Test Acc: 0.698, lr: 0.00100[0m


Epoch 189/240
2023-11-19 10:18:04 [141/141] - 450.57ms/step - nd_loss: 0.979 - kd_loss: 0.218 - cls_loss: 0.006 - train_loss: 1.203 - train_acc: 1.000


19 Nov 2023 10:18:09 [line:175] [32mTrain Loss: 1.182, Train Acc: 1.000, Test Loss: 1.189, Test Acc: 0.699, lr: 0.00100[0m


Epoch 190/240
2023-11-19 10:20:29 [141/141] - 450.85ms/step - nd_loss: 0.981 - kd_loss: 0.199 - cls_loss: 0.012 - train_loss: 1.192 - train_acc: 1.000


19 Nov 2023 10:20:33 [line:175] [32mTrain Loss: 1.176, Train Acc: 1.000, Test Loss: 1.187, Test Acc: 0.697, lr: 0.00100[0m


Epoch 191/240
2023-11-19 10:22:54 [141/141] - 450.79ms/step - nd_loss: 0.981 - kd_loss: 0.229 - cls_loss: 0.014 - train_loss: 1.224 - train_acc: 1.000


19 Nov 2023 10:22:58 [line:175] [32mTrain Loss: 1.178, Train Acc: 1.000, Test Loss: 1.183, Test Acc: 0.698, lr: 0.00100[0m


Epoch 192/240
2023-11-19 10:25:18 [141/141] - 450.72ms/step - nd_loss: 0.980 - kd_loss: 0.208 - cls_loss: 0.014 - train_loss: 1.202 - train_acc: 1.000


19 Nov 2023 10:25:23 [line:175] [32mTrain Loss: 1.176, Train Acc: 1.000, Test Loss: 1.184, Test Acc: 0.699, lr: 0.00100[0m


Epoch 193/240
2023-11-19 10:27:43 [141/141] - 450.86ms/step - nd_loss: 0.979 - kd_loss: 0.208 - cls_loss: 0.021 - train_loss: 1.208 - train_acc: 1.000


19 Nov 2023 10:27:47 [line:175] [32mTrain Loss: 1.177, Train Acc: 1.000, Test Loss: 1.188, Test Acc: 0.700, lr: 0.00100[0m


Epoch 194/240
2023-11-19 10:30:08 [141/141] - 452.10ms/step - nd_loss: 0.980 - kd_loss: 0.194 - cls_loss: 0.018 - train_loss: 1.191 - train_acc: 1.000


19 Nov 2023 10:30:12 [line:175] [32mTrain Loss: 1.177, Train Acc: 0.999, Test Loss: 1.184, Test Acc: 0.699, lr: 0.00100[0m


Epoch 195/240
2023-11-19 10:32:32 [141/141] - 451.26ms/step - nd_loss: 0.980 - kd_loss: 0.234 - cls_loss: 0.015 - train_loss: 1.229 - train_acc: 1.000


19 Nov 2023 10:32:37 [line:175] [32mTrain Loss: 1.176, Train Acc: 1.000, Test Loss: 1.182, Test Acc: 0.697, lr: 0.00100[0m


Epoch 196/240
2023-11-19 10:34:57 [141/141] - 451.01ms/step - nd_loss: 0.979 - kd_loss: 0.178 - cls_loss: 0.013 - train_loss: 1.170 - train_acc: 1.000


19 Nov 2023 10:35:01 [line:175] [32mTrain Loss: 1.172, Train Acc: 1.000, Test Loss: 1.185, Test Acc: 0.697, lr: 0.00100[0m


Epoch 197/240
2023-11-19 10:37:22 [141/141] - 451.33ms/step - nd_loss: 0.981 - kd_loss: 0.246 - cls_loss: 0.010 - train_loss: 1.236 - train_acc: 1.000


19 Nov 2023 10:37:26 [line:175] [32mTrain Loss: 1.175, Train Acc: 1.000, Test Loss: 1.187, Test Acc: 0.698, lr: 0.00100[0m


Epoch 198/240
2023-11-19 10:39:46 [141/141] - 450.80ms/step - nd_loss: 0.979 - kd_loss: 0.240 - cls_loss: 0.023 - train_loss: 1.241 - train_acc: 1.000


19 Nov 2023 10:39:51 [line:175] [32mTrain Loss: 1.174, Train Acc: 1.000, Test Loss: 1.191, Test Acc: 0.698, lr: 0.00100[0m


Epoch 199/240
2023-11-19 10:42:11 [141/141] - 451.17ms/step - nd_loss: 0.980 - kd_loss: 0.205 - cls_loss: 0.019 - train_loss: 1.204 - train_acc: 1.000


19 Nov 2023 10:42:15 [line:175] [32mTrain Loss: 1.174, Train Acc: 1.000, Test Loss: 1.186, Test Acc: 0.698, lr: 0.00100[0m


Epoch 200/240
2023-11-19 10:44:36 [141/141] - 451.69ms/step - nd_loss: 0.980 - kd_loss: 0.197 - cls_loss: 0.014 - train_loss: 1.191 - train_acc: 1.000


19 Nov 2023 10:44:40 [line:175] [32mTrain Loss: 1.172, Train Acc: 1.000, Test Loss: 1.187, Test Acc: 0.699, lr: 0.00100[0m


Epoch 201/240
2023-11-19 10:47:00 [141/141] - 450.34ms/step - nd_loss: 0.979 - kd_loss: 0.169 - cls_loss: 0.018 - train_loss: 1.166 - train_acc: 1.000


19 Nov 2023 10:47:04 [line:175] [32mTrain Loss: 1.170, Train Acc: 1.000, Test Loss: 1.187, Test Acc: 0.698, lr: 0.00100[0m


Epoch 202/240
2023-11-19 10:49:25 [141/141] - 451.11ms/step - nd_loss: 0.980 - kd_loss: 0.168 - cls_loss: 0.010 - train_loss: 1.157 - train_acc: 1.000


19 Nov 2023 10:49:29 [line:175] [32mTrain Loss: 1.172, Train Acc: 1.000, Test Loss: 1.186, Test Acc: 0.698, lr: 0.00100[0m


Epoch 203/240
2023-11-19 10:51:50 [141/141] - 450.58ms/step - nd_loss: 0.980 - kd_loss: 0.193 - cls_loss: 0.019 - train_loss: 1.191 - train_acc: 1.000


19 Nov 2023 10:51:54 [line:175] [32mTrain Loss: 1.172, Train Acc: 1.000, Test Loss: 1.186, Test Acc: 0.700, lr: 0.00100[0m


Epoch 204/240
2023-11-19 10:54:14 [141/141] - 451.28ms/step - nd_loss: 0.980 - kd_loss: 0.212 - cls_loss: 0.029 - train_loss: 1.221 - train_acc: 1.000


19 Nov 2023 10:54:18 [line:175] [32mTrain Loss: 1.173, Train Acc: 1.000, Test Loss: 1.189, Test Acc: 0.696, lr: 0.00100[0m


Epoch 205/240
2023-11-19 10:56:39 [141/141] - 451.11ms/step - nd_loss: 0.980 - kd_loss: 0.193 - cls_loss: 0.013 - train_loss: 1.185 - train_acc: 1.000


19 Nov 2023 10:56:43 [line:175] [32mTrain Loss: 1.168, Train Acc: 1.000, Test Loss: 1.185, Test Acc: 0.699, lr: 0.00100[0m


Epoch 206/240
2023-11-19 10:59:04 [141/141] - 451.27ms/step - nd_loss: 0.980 - kd_loss: 0.242 - cls_loss: 0.014 - train_loss: 1.236 - train_acc: 1.000


19 Nov 2023 10:59:08 [line:175] [32mTrain Loss: 1.172, Train Acc: 1.000, Test Loss: 1.187, Test Acc: 0.698, lr: 0.00100[0m


Epoch 207/240
2023-11-19 11:01:28 [141/141] - 450.65ms/step - nd_loss: 0.980 - kd_loss: 0.217 - cls_loss: 0.022 - train_loss: 1.219 - train_acc: 1.000


19 Nov 2023 11:01:33 [line:175] [32mTrain Loss: 1.173, Train Acc: 1.000, Test Loss: 1.186, Test Acc: 0.697, lr: 0.00100[0m


Epoch 208/240
2023-11-19 11:03:53 [141/141] - 450.52ms/step - nd_loss: 0.978 - kd_loss: 0.242 - cls_loss: 0.017 - train_loss: 1.238 - train_acc: 1.000


19 Nov 2023 11:03:57 [line:175] [32mTrain Loss: 1.166, Train Acc: 1.000, Test Loss: 1.198, Test Acc: 0.700, lr: 0.00100[0m


Epoch 209/240
2023-11-19 11:06:18 [141/141] - 451.54ms/step - nd_loss: 0.979 - kd_loss: 0.238 - cls_loss: 0.015 - train_loss: 1.232 - train_acc: 1.000


19 Nov 2023 11:06:22 [line:175] [32mTrain Loss: 1.168, Train Acc: 1.000, Test Loss: 1.189, Test Acc: 0.699, lr: 0.00100[0m


Epoch 210/240
2023-11-19 11:08:42 [141/141] - 451.26ms/step - nd_loss: 0.979 - kd_loss: 0.140 - cls_loss: 0.012 - train_loss: 1.131 - train_acc: 1.000


19 Nov 2023 11:08:47 [line:175] [32mTrain Loss: 1.167, Train Acc: 1.000, Test Loss: 1.184, Test Acc: 0.698, lr: 0.00100[0m


Epoch 211/240
2023-11-19 11:11:07 [141/141] - 451.33ms/step - nd_loss: 0.980 - kd_loss: 0.165 - cls_loss: 0.008 - train_loss: 1.153 - train_acc: 1.000


19 Nov 2023 11:11:11 [line:175] [32mTrain Loss: 1.165, Train Acc: 1.000, Test Loss: 1.192, Test Acc: 0.703, lr: 0.00010[0m


Epoch 212/240
2023-11-19 11:13:32 [141/141] - 450.58ms/step - nd_loss: 0.979 - kd_loss: 0.201 - cls_loss: 0.012 - train_loss: 1.192 - train_acc: 1.000


19 Nov 2023 11:13:36 [line:175] [32mTrain Loss: 1.170, Train Acc: 1.000, Test Loss: 1.196, Test Acc: 0.697, lr: 0.00010[0m


Epoch 213/240
2023-11-19 11:15:56 [141/141] - 451.08ms/step - nd_loss: 0.979 - kd_loss: 0.185 - cls_loss: 0.019 - train_loss: 1.183 - train_acc: 1.000


19 Nov 2023 11:16:01 [line:175] [32mTrain Loss: 1.166, Train Acc: 1.000, Test Loss: 1.189, Test Acc: 0.698, lr: 0.00010[0m


Epoch 214/240
2023-11-19 11:18:21 [141/141] - 451.44ms/step - nd_loss: 0.980 - kd_loss: 0.206 - cls_loss: 0.024 - train_loss: 1.210 - train_acc: 1.000


19 Nov 2023 11:18:25 [line:175] [32mTrain Loss: 1.165, Train Acc: 1.000, Test Loss: 1.189, Test Acc: 0.696, lr: 0.00010[0m


Epoch 215/240
2023-11-19 11:20:46 [141/141] - 451.62ms/step - nd_loss: 0.979 - kd_loss: 0.192 - cls_loss: 0.012 - train_loss: 1.183 - train_acc: 1.000


19 Nov 2023 11:20:50 [line:175] [32mTrain Loss: 1.167, Train Acc: 1.000, Test Loss: 1.186, Test Acc: 0.701, lr: 0.00010[0m


Epoch 216/240
2023-11-19 11:23:10 [141/141] - 450.30ms/step - nd_loss: 0.980 - kd_loss: 0.148 - cls_loss: 0.016 - train_loss: 1.144 - train_acc: 1.000


19 Nov 2023 11:23:15 [line:175] [32mTrain Loss: 1.165, Train Acc: 1.000, Test Loss: 1.187, Test Acc: 0.699, lr: 0.00010[0m


Epoch 217/240
2023-11-19 11:25:35 [141/141] - 452.12ms/step - nd_loss: 0.979 - kd_loss: 0.178 - cls_loss: 0.015 - train_loss: 1.173 - train_acc: 1.000


19 Nov 2023 11:25:39 [line:175] [32mTrain Loss: 1.167, Train Acc: 1.000, Test Loss: 1.186, Test Acc: 0.700, lr: 0.00010[0m


Epoch 218/240
2023-11-19 11:28:00 [141/141] - 450.65ms/step - nd_loss: 0.979 - kd_loss: 0.187 - cls_loss: 0.013 - train_loss: 1.179 - train_acc: 1.000


19 Nov 2023 11:28:04 [line:175] [32mTrain Loss: 1.165, Train Acc: 1.000, Test Loss: 1.185, Test Acc: 0.699, lr: 0.00010[0m


Epoch 219/240
2023-11-19 11:30:24 [141/141] - 450.33ms/step - nd_loss: 0.979 - kd_loss: 0.133 - cls_loss: 0.011 - train_loss: 1.123 - train_acc: 1.000


19 Nov 2023 11:30:29 [line:175] [32mTrain Loss: 1.168, Train Acc: 1.000, Test Loss: 1.199, Test Acc: 0.698, lr: 0.00010[0m


Epoch 220/240
2023-11-19 11:32:49 [141/141] - 451.01ms/step - nd_loss: 0.979 - kd_loss: 0.196 - cls_loss: 0.017 - train_loss: 1.192 - train_acc: 1.000


19 Nov 2023 11:32:53 [line:175] [32mTrain Loss: 1.169, Train Acc: 1.000, Test Loss: 1.189, Test Acc: 0.699, lr: 0.00010[0m


Epoch 221/240
2023-11-19 11:35:14 [141/141] - 452.26ms/step - nd_loss: 0.979 - kd_loss: 0.225 - cls_loss: 0.015 - train_loss: 1.219 - train_acc: 1.000


19 Nov 2023 11:35:18 [line:175] [32mTrain Loss: 1.168, Train Acc: 1.000, Test Loss: 1.189, Test Acc: 0.697, lr: 0.00010[0m


Epoch 222/240
2023-11-19 11:37:38 [141/141] - 450.63ms/step - nd_loss: 0.980 - kd_loss: 0.192 - cls_loss: 0.013 - train_loss: 1.184 - train_acc: 1.000


19 Nov 2023 11:37:43 [line:175] [32mTrain Loss: 1.168, Train Acc: 1.000, Test Loss: 1.192, Test Acc: 0.698, lr: 0.00010[0m


Epoch 223/240
2023-11-19 11:40:03 [141/141] - 450.73ms/step - nd_loss: 0.980 - kd_loss: 0.146 - cls_loss: 0.014 - train_loss: 1.140 - train_acc: 1.000


19 Nov 2023 11:40:07 [line:175] [32mTrain Loss: 1.165, Train Acc: 1.000, Test Loss: 1.185, Test Acc: 0.699, lr: 0.00010[0m


Epoch 224/240
2023-11-19 11:42:28 [141/141] - 450.70ms/step - nd_loss: 0.979 - kd_loss: 0.164 - cls_loss: 0.008 - train_loss: 1.151 - train_acc: 1.000


19 Nov 2023 11:42:32 [line:175] [32mTrain Loss: 1.169, Train Acc: 1.000, Test Loss: 1.196, Test Acc: 0.699, lr: 0.00010[0m


Epoch 225/240
2023-11-19 11:44:52 [141/141] - 451.11ms/step - nd_loss: 0.980 - kd_loss: 0.200 - cls_loss: 0.015 - train_loss: 1.195 - train_acc: 1.000


19 Nov 2023 11:44:57 [line:175] [32mTrain Loss: 1.163, Train Acc: 1.000, Test Loss: 1.192, Test Acc: 0.697, lr: 0.00010[0m


Epoch 226/240
2023-11-19 11:47:17 [141/141] - 451.74ms/step - nd_loss: 0.981 - kd_loss: 0.206 - cls_loss: 0.007 - train_loss: 1.194 - train_acc: 1.000


19 Nov 2023 11:47:21 [line:175] [32mTrain Loss: 1.168, Train Acc: 1.000, Test Loss: 1.186, Test Acc: 0.696, lr: 0.00010[0m


Epoch 227/240
2023-11-19 11:49:42 [141/141] - 451.42ms/step - nd_loss: 0.980 - kd_loss: 0.163 - cls_loss: 0.016 - train_loss: 1.159 - train_acc: 1.000


19 Nov 2023 11:49:46 [line:175] [32mTrain Loss: 1.165, Train Acc: 1.000, Test Loss: 1.186, Test Acc: 0.699, lr: 0.00010[0m


Epoch 228/240
2023-11-19 11:52:06 [141/141] - 450.70ms/step - nd_loss: 0.979 - kd_loss: 0.207 - cls_loss: 0.019 - train_loss: 1.206 - train_acc: 1.000


19 Nov 2023 11:52:11 [line:175] [32mTrain Loss: 1.166, Train Acc: 1.000, Test Loss: 1.189, Test Acc: 0.696, lr: 0.00010[0m


Epoch 229/240
2023-11-19 11:54:31 [141/141] - 451.17ms/step - nd_loss: 0.980 - kd_loss: 0.181 - cls_loss: 0.011 - train_loss: 1.172 - train_acc: 1.000


19 Nov 2023 11:54:35 [line:175] [32mTrain Loss: 1.164, Train Acc: 1.000, Test Loss: 1.194, Test Acc: 0.698, lr: 0.00010[0m


Epoch 230/240
2023-11-19 11:56:56 [141/141] - 450.62ms/step - nd_loss: 0.980 - kd_loss: 0.191 - cls_loss: 0.015 - train_loss: 1.186 - train_acc: 1.000


19 Nov 2023 11:57:00 [line:175] [32mTrain Loss: 1.163, Train Acc: 1.000, Test Loss: 1.190, Test Acc: 0.698, lr: 0.00010[0m


Epoch 231/240
2023-11-19 11:59:20 [141/141] - 451.09ms/step - nd_loss: 0.979 - kd_loss: 0.132 - cls_loss: 0.013 - train_loss: 1.125 - train_acc: 1.000


19 Nov 2023 11:59:25 [line:175] [32mTrain Loss: 1.165, Train Acc: 1.000, Test Loss: 1.193, Test Acc: 0.699, lr: 0.00010[0m


Epoch 232/240
2023-11-19 12:01:45 [141/141] - 450.78ms/step - nd_loss: 0.979 - kd_loss: 0.218 - cls_loss: 0.027 - train_loss: 1.224 - train_acc: 1.000


19 Nov 2023 12:01:49 [line:175] [32mTrain Loss: 1.163, Train Acc: 1.000, Test Loss: 1.189, Test Acc: 0.699, lr: 0.00010[0m


Epoch 233/240
2023-11-19 12:04:10 [141/141] - 452.90ms/step - nd_loss: 0.979 - kd_loss: 0.224 - cls_loss: 0.010 - train_loss: 1.213 - train_acc: 1.000


19 Nov 2023 12:04:14 [line:175] [32mTrain Loss: 1.165, Train Acc: 1.000, Test Loss: 1.185, Test Acc: 0.699, lr: 0.00010[0m


Epoch 234/240
2023-11-19 12:06:34 [141/141] - 450.80ms/step - nd_loss: 0.978 - kd_loss: 0.174 - cls_loss: 0.018 - train_loss: 1.170 - train_acc: 1.000


19 Nov 2023 12:06:39 [line:175] [32mTrain Loss: 1.166, Train Acc: 1.000, Test Loss: 1.195, Test Acc: 0.700, lr: 0.00010[0m


Epoch 235/240
2023-11-19 12:08:59 [141/141] - 451.44ms/step - nd_loss: 0.980 - kd_loss: 0.164 - cls_loss: 0.017 - train_loss: 1.160 - train_acc: 1.000


19 Nov 2023 12:09:03 [line:175] [32mTrain Loss: 1.162, Train Acc: 1.000, Test Loss: 1.186, Test Acc: 0.696, lr: 0.00010[0m


Epoch 236/240
2023-11-19 12:11:24 [141/141] - 450.24ms/step - nd_loss: 0.980 - kd_loss: 0.181 - cls_loss: 0.010 - train_loss: 1.171 - train_acc: 1.000


19 Nov 2023 12:11:28 [line:175] [32mTrain Loss: 1.166, Train Acc: 0.999, Test Loss: 1.188, Test Acc: 0.698, lr: 0.00010[0m


Epoch 237/240
2023-11-19 12:13:48 [141/141] - 451.40ms/step - nd_loss: 0.980 - kd_loss: 0.225 - cls_loss: 0.013 - train_loss: 1.218 - train_acc: 1.000


19 Nov 2023 12:13:53 [line:175] [32mTrain Loss: 1.165, Train Acc: 1.000, Test Loss: 1.190, Test Acc: 0.698, lr: 0.00010[0m


Epoch 238/240
2023-11-19 12:16:13 [141/141] - 450.97ms/step - nd_loss: 0.979 - kd_loss: 0.196 - cls_loss: 0.014 - train_loss: 1.189 - train_acc: 1.000


19 Nov 2023 12:16:17 [line:175] [32mTrain Loss: 1.167, Train Acc: 1.000, Test Loss: 1.186, Test Acc: 0.697, lr: 0.00010[0m


Epoch 239/240
2023-11-19 12:18:38 [141/141] - 452.33ms/step - nd_loss: 0.980 - kd_loss: 0.206 - cls_loss: 0.025 - train_loss: 1.212 - train_acc: 1.000


19 Nov 2023 12:18:42 [line:175] [32mTrain Loss: 1.169, Train Acc: 1.000, Test Loss: 1.187, Test Acc: 0.697, lr: 0.00010[0m


Epoch 240/240
2023-11-19 12:21:02 [141/141] - 450.69ms/step - nd_loss: 0.979 - kd_loss: 0.152 - cls_loss: 0.013 - train_loss: 1.144 - train_acc: 1.000


19 Nov 2023 12:21:06 [line:175] [32mTrain Loss: 1.161, Train Acc: 1.000, Test Loss: 1.181, Test Acc: 0.700, lr: 0.00010[0m


## ReviewKD++

In [6]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '0,1'
import torch
import torchvision
from torch import nn
from torch.utils.data import DataLoader
import torch.nn.functional as F
import torch.backends.cudnn as cudnn

import models_package
from utils.loss_functions import DKDLoss, DirectNormLoss, KDLoss
# from Models.embtrans_cifar import EmbTrans
# from Dataset import CIFAR, IDENPROF
from utils.misc_tools import colorstr, Save_Checkpoint, AverageMeter, epoch_loop_reviewkd

import numpy as np
from pathlib import Path
import time
import json
import random
import logging
import argparse
import warnings
from torch.utils.tensorboard import SummaryWriter
import pdb

import gc
torch.cuda.empty_cache()
gc.collect()

torch.cuda.empty_cache()



if __name__ == "__main__":
    model_names = sorted(name for name in models.__dict__
                         if name.islower() and not name.startswith("__")
                         and callable(models.__dict__[name]))

    parser = argparse.ArgumentParser(description='PyTorch Cifar Training')
    parser.add_argument('-f') # added to make this run in collab
    parser.add_argument("--model_name", type=str, default="resnet8x4_idenprof", choices=model_names, help="model architecture")
    parser.add_argument("--dataset", type=str, default='idenprof')
    parser.add_argument("--epochs", type=int, default=2)
    # parser.add_argument("--epochs", type=int, default=4)
    parser.add_argument("--batch_size", type=int, default=64, help="batch size per gpu")
    parser.add_argument('--workers', default=8, type=int, help='number of data loading workers')
    parser.add_argument("--lr", type=float, default=0.1)
    parser.add_argument('--momentum', type=float, default=0.9, help='SGD momentum')
    parser.add_argument("--weight_decay", type=float, default=5e-4)

    parser.add_argument("--teacher", type=str, default="resnet32x4_idenprof", help="teacher architecture")
    parser.add_argument("--teacher_weights", type=str, default="./weights/resnet_32x4/weights.pth", help="teacher weights path")
    parser.add_argument("--kd_loss_factor", type=float, default=1.0, help="KL loss weight factor")
    parser.add_argument("--nd_loss_factor", type=float, default=1.0, help="ND loss weight factor")
    parser.add_argument("--warm_up", type=float, default=20.0, help='loss weight warm up epochs')

    parser.add_argument("--gpus", type=list, default=[0, 1])
    parser.add_argument('--seed', default=None, type=int, help='seed for initializing training.')
    parser.add_argument("--resume", type=str, help="best ckpt's path to resume most recent training")
    parser.add_argument("--save_dir", type=str, default="./run/IdenProf/ReviewKD++", help="save path, eg, acc_loss, weights, tensorboard, and so on")
    args = parser.parse_args()

    if args.seed is not None:
        random.seed(args.seed)
        torch.manual_seed(args.seed)
        cudnn.deterministic = True
        cudnn.benchmark = False
        warnings.warn('You have chosen to seed training. '
                      'This will turn on the CUDNN deterministic setting, '
                      'which can slow down your training considerably! '
                      'You may see unexpected behavior when restarting '
                      'from checkpoints.')

    logging.basicConfig(level=logging.INFO, format='%(asctime)s [line:%(lineno)d] %(message)s',
                        datefmt='%d %b %Y %H:%M:%S')
    logger = logging.getLogger(__name__)

    args.batch_size = args.batch_size * len(args.gpus)
    # args.batch_size = args.batch_size * 1

    # logger.info(colorstr('green', "Distribute train, gpus:{}, total batch size:{}, epoch:{}".format(args.gpus, args.batch_size, args.epochs)))
    logger.info(colorstr('green', "Distribute train, total batch size:{}, epoch:{}".format(args.batch_size, args.epochs)))


    # train_set, test_set, num_class = IDENPROF(name=args.dataset)
    num_class = 10
    model = models_package.__dict__[args.model_name](num_class=num_class)

    # if args.model_name in ['wrn40_1_cifar', 'mobilenetv2', 'shufflev1_cifar', 'shufflev2_cifar']:
    #     model = EmbTrans(student=model, model_name=args.model_name)

    teacher = models_package.__dict__[args.teacher](num_class=num_class)

    if args.teacher_weights:
        print('Load Teacher Weights')
        session = boto3.session.Session()
        s3 = session.client('s3')
    
        teacher_model_weights_buffer = io.BytesIO()
        s3.download_fileobj(bucket_name, model_weight_path, teacher_model_weights_buffer)
        teacher_model_weights_buffer.seek(0)  
    
        # Load the model
        # model = models_package.__dict__[model_name](num_class=num_class)
        teacher_ckpt = torch.load(teacher_model_weights_buffer)
        teacher.load_state_dict(teacher_ckpt)
        
        for param in teacher.parameters():
            param.requires_grad = False

    # res56    ./ckpt/teacher/resnet56/center_emb_train.json
    # res32x4  ./ckpt/teacher/resnet32x4/center_emb_train.json
    # wrn40_2  ./ckpt/teacher/wrn_40_2/center_emb_train.json
    # res50    ./ckpt/teacher/resnet50/center_emb_train.json
    # class-mean
    with open("./class_means/idenprof_embedding_fea/resnet32x4_idenprof.json", 'r') as f:
        T_EMB = json.load(f)
    f.close()

    logger.info(colorstr('green', 'Use ' + args.teacher + ' Training ' + args.model_name + ' ...'))
    # Train the model
    epoch_loop_reviewkd(model=model, teacher=teacher, train_loader = trainloader, test_loader = testloader, num_class = num_class, T_EMB=T_EMB, args=args)


19 Nov 2023 23:19:41 [line:83] [32mDistribute train, total batch size:128, epoch:2[0m
19 Nov 2023 23:19:41 [line:1255] Found credentials in shared credentials file: ~/.aws/credentials


Load Teacher Weights


19 Nov 2023 23:19:42 [line:121] [32mUse resnet32x4_idenprof Training resnet8x4_idenprof ...[0m


Epoch 1/2


ValueError: not enough values to unpack (expected 3, got 2)