In [1]:
# Default libs
import os
import warnings
from pathlib import Path
import numpy as np

# Set the working dir
PROJECT_DIR = Path().resolve()
PROJECT_ROOT_NAME = "vision-skanformer"

while PROJECT_DIR.name != PROJECT_ROOT_NAME and PROJECT_DIR.parent != PROJECT_DIR:
    PROJECT_DIR = PROJECT_DIR.parent

if Path().resolve() != PROJECT_DIR:
    os.chdir(PROJECT_DIR)

# Specilized modules
from src.utils.data.transform import Resize
from src.utils.data.dataset import Galaxy10_SDSS_Dataset
from src.utils.data.dataloader import get_loaders
from src.models.viskan import VisionSKANformer
from src.engine.trainer import TrainerVISKAN
from src.callbacks.early_stopping import EarlyStopping
from logs.config.logging_config import setup_logger

# PyTorch libraries
import torch
from torch import nn, optim
from torch.optim import lr_scheduler

# Settings
warnings.filterwarnings("ignore")
np.random.seed(42)
torch.manual_seed(42)
torch.cuda.manual_seed_all(42)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [2]:
batch_size = 16
split_ratios = [0.8, 0.1, 0.1]
lr = 0.0001
image_size = 64
num_epochs = 50
patience = 5

transform = Resize(image_size)
dataset = Galaxy10_SDSS_Dataset('data/Galaxy10.h5', transform)
train_loader, val_loader, test_loader = get_loaders(dataset, batch_size, split_ratios)

model = VisionSKANformer(image_size=image_size, patch_size=4).to(device)
criterion = nn.CrossEntropyLoss().to(device)
optimizer = optim.Adam(model.parameters(), lr=lr)
scheduler = lr_scheduler.ExponentialLR(optimizer=optimizer, gamma=0.95)
logger = setup_logger(logger_name='TrainerVISKAN', log_file='logs/logging/training_01.log')
callback = EarlyStopping(patience=patience, verbose=True)

trainer = TrainerVISKAN(train_loader, 
                        val_loader, 
                        test_loader, 
                        model, 
                        criterion, 
                        optimizer, 
                        scheduler, 
                        lr, 
                        logger,
                        image_size,
                        batch_size,
                        num_epochs,
                        callback,
                        patience
                        )

trainer.train()

TrainerVISKAN - INFO - Training initialized with batch_size=16, lr=0.0001, num_epochs=50, optimizer=Adam, model=VisionSKANformer


Training Progress:   0%|          | 0/54500 [00:00<?, ?it/s]

TrainerVISKAN - INFO - Epoch 1/50: Train Loss=1.8935, Train Acc=0.2524, Val Loss=1.6395, Val Acc=0.3802, LR=0.000100


Epoch 1/50: Train Loss=1.8935, Val Loss=1.6395
Saved checkpoint at epoch 0 with val_loss 1.6394730921290794


TrainerVISKAN - INFO - Epoch 2/50: Train Loss=1.4718, Train Acc=0.4270, Val Loss=1.3566, Val Acc=0.4720, LR=0.000095


Epoch 2/50: Train Loss=1.4718, Val Loss=1.3566
Saved checkpoint at epoch 1 with val_loss 1.356576092972243


TrainerVISKAN - INFO - Epoch 3/50: Train Loss=1.3161, Train Acc=0.5009, Val Loss=1.2955, Val Acc=0.5298, LR=0.000090


Epoch 3/50: Train Loss=1.3161, Val Loss=1.2955
Saved checkpoint at epoch 2 with val_loss 1.295548815153628


TrainerVISKAN - INFO - Epoch 4/50: Train Loss=1.2213, Train Acc=0.5363, Val Loss=1.2437, Val Acc=0.5253, LR=0.000086


Epoch 4/50: Train Loss=1.2213, Val Loss=1.2437
Saved checkpoint at epoch 3 with val_loss 1.2437175074446847


TrainerVISKAN - INFO - Epoch 5/50: Train Loss=1.1502, Train Acc=0.5659, Val Loss=1.2992, Val Acc=0.5129, LR=0.000081


Epoch 5/50: Train Loss=1.1502, Val Loss=1.2992
EarlyStopping counter: 1 out of 5


TrainerVISKAN - INFO - Epoch 6/50: Train Loss=1.1004, Train Acc=0.5810, Val Loss=1.2839, Val Acc=0.5092, LR=0.000077


Epoch 6/50: Train Loss=1.1004, Val Loss=1.2839
EarlyStopping counter: 2 out of 5


TrainerVISKAN - INFO - Epoch 7/50: Train Loss=1.0580, Train Acc=0.5932, Val Loss=1.3078, Val Acc=0.5193, LR=0.000074


Epoch 7/50: Train Loss=1.0580, Val Loss=1.3078
EarlyStopping counter: 3 out of 5


TrainerVISKAN - INFO - Epoch 8/50: Train Loss=1.0374, Train Acc=0.6069, Val Loss=1.1198, Val Acc=0.5877, LR=0.000070


Epoch 8/50: Train Loss=1.0374, Val Loss=1.1198
Saved checkpoint at epoch 7 with val_loss 1.1197907545349814


TrainerVISKAN - INFO - Epoch 9/50: Train Loss=1.0089, Train Acc=0.6145, Val Loss=1.0710, Val Acc=0.6079, LR=0.000066


Epoch 9/50: Train Loss=1.0089, Val Loss=1.0710
Saved checkpoint at epoch 8 with val_loss 1.0710295206696971


TrainerVISKAN - INFO - Epoch 10/50: Train Loss=0.9792, Train Acc=0.6250, Val Loss=1.0487, Val Acc=0.6001, LR=0.000063


Epoch 10/50: Train Loss=0.9792, Val Loss=1.0487
Saved checkpoint at epoch 9 with val_loss 1.0487378748652034


TrainerVISKAN - INFO - Epoch 11/50: Train Loss=0.9565, Train Acc=0.6363, Val Loss=1.2309, Val Acc=0.5519, LR=0.000060


Epoch 11/50: Train Loss=0.9565, Val Loss=1.2309
EarlyStopping counter: 1 out of 5


TrainerVISKAN - INFO - Epoch 12/50: Train Loss=0.9338, Train Acc=0.6449, Val Loss=1.1287, Val Acc=0.5886, LR=0.000057


Epoch 12/50: Train Loss=0.9338, Val Loss=1.1287
EarlyStopping counter: 2 out of 5


TrainerVISKAN - INFO - Epoch 13/50: Train Loss=0.9101, Train Acc=0.6544, Val Loss=1.0102, Val Acc=0.6171, LR=0.000054


Epoch 13/50: Train Loss=0.9101, Val Loss=1.0102
Saved checkpoint at epoch 12 with val_loss 1.0101610193348893


TrainerVISKAN - INFO - Epoch 14/50: Train Loss=0.8988, Train Acc=0.6634, Val Loss=0.9253, Val Acc=0.6543, LR=0.000051


Epoch 14/50: Train Loss=0.8988, Val Loss=0.9253
Saved checkpoint at epoch 13 with val_loss 0.9252830495300153


TrainerVISKAN - INFO - Epoch 15/50: Train Loss=0.8805, Train Acc=0.6674, Val Loss=0.9498, Val Acc=0.6414, LR=0.000049


Epoch 15/50: Train Loss=0.8805, Val Loss=0.9498
EarlyStopping counter: 1 out of 5


TrainerVISKAN - INFO - Epoch 16/50: Train Loss=0.8732, Train Acc=0.6711, Val Loss=1.1103, Val Acc=0.5854, LR=0.000046


Epoch 16/50: Train Loss=0.8732, Val Loss=1.1103
EarlyStopping counter: 2 out of 5


TrainerVISKAN - INFO - Epoch 17/50: Train Loss=0.8694, Train Acc=0.6739, Val Loss=0.9889, Val Acc=0.6180, LR=0.000044


Epoch 17/50: Train Loss=0.8694, Val Loss=0.9889
EarlyStopping counter: 3 out of 5


TrainerVISKAN - INFO - Epoch 18/50: Train Loss=0.8423, Train Acc=0.6836, Val Loss=1.0849, Val Acc=0.5946, LR=0.000042


Epoch 18/50: Train Loss=0.8423, Val Loss=1.0849
EarlyStopping counter: 4 out of 5


TrainerVISKAN - INFO - Epoch 19/50: Train Loss=0.8266, Train Acc=0.6891, Val Loss=0.9963, Val Acc=0.6368, LR=0.000040
TrainerVISKAN - INFO - Early stopping triggered


Epoch 19/50: Train Loss=0.8266, Val Loss=0.9963
EarlyStopping counter: 5 out of 5


TrainerVISKAN - INFO - Test results - Loss: 0.9820, Accuracy: 0.6397


Test Loss: 0.9820, Test Acc: 0.6397
