In [1]:
# Default libs
import os
import warnings
from pathlib import Path
import numpy as np

# Set the working dir
PROJECT_DIR = Path().resolve()
PROJECT_ROOT_NAME = "vision-skanformer"

while PROJECT_DIR.name != PROJECT_ROOT_NAME and PROJECT_DIR.parent != PROJECT_DIR:
    PROJECT_DIR = PROJECT_DIR.parent

if Path().resolve() != PROJECT_DIR:
    os.chdir(PROJECT_DIR)

# Specilized modules
from src.utils.data.transform import Resize
from src.utils.data.dataset import Galaxy10_SDSS_Dataset
from src.utils.data.dataloader import get_loaders
from src.models.cnn import SimpleCNN
from src.engine.trainer import TrainerVISKAN
from src.callbacks.early_stopping import EarlyStopping
from logs.config.logging_config import setup_logger

# PyTorch libraries
import torch
from torch import nn, optim
from torch.optim import lr_scheduler

# Settings
warnings.filterwarnings("ignore")
np.random.seed(42)
torch.manual_seed(42)
torch.cuda.manual_seed_all(42)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [2]:
batch_size = 16
split_ratios = [0.8, 0.1, 0.1]
lr = 0.0001
image_size = 64
num_epochs = 50
patience = 5

transform = Resize(image_size)
dataset = Galaxy10_SDSS_Dataset('data/Galaxy10.h5', transform)
train_loader, val_loader, test_loader = get_loaders(dataset, batch_size, split_ratios)

model = SimpleCNN(image_size=image_size).to(device)
criterion = nn.CrossEntropyLoss().to(device)
optimizer = optim.Adam(model.parameters(), lr=lr)
scheduler = lr_scheduler.ExponentialLR(optimizer=optimizer, gamma=0.97)
logger = setup_logger(logger_name='TrainerVISKAN', log_file='logs/logging/training_01.log')
callback = EarlyStopping(patience=patience, verbose=True)

trainer = TrainerVISKAN(train_loader, 
                        val_loader, 
                        test_loader, 
                        model, 
                        criterion, 
                        optimizer, 
                        scheduler, 
                        lr, 
                        logger,
                        image_size,
                        batch_size,
                        num_epochs,
                        callback,
                        patience
                        )

trainer.train()

TrainerVISKAN - INFO - Training initialized with batch_size=16, lr=0.0001, num_epochs=50, optimizer=Adam, model=SimpleCNN


Training Progress:   0%|          | 0/54500 [00:00<?, ?it/s]

TrainerVISKAN - INFO - Epoch 1/50: Train Loss=1.3444, Train Acc=0.4888, Val Loss=1.3350, Val Acc=0.4660, LR=0.000100


Epoch 1/50: Train Loss=1.3444, Val Loss=1.3350
Saved checkpoint at epoch 0 with val_loss 1.335035256446229


TrainerVISKAN - INFO - Epoch 2/50: Train Loss=1.0295, Train Acc=0.6043, Val Loss=1.1022, Val Acc=0.5542, LR=0.000097


Epoch 2/50: Train Loss=1.0295, Val Loss=1.1022
Saved checkpoint at epoch 1 with val_loss 1.1022454484873019


TrainerVISKAN - INFO - Epoch 3/50: Train Loss=0.8739, Train Acc=0.6753, Val Loss=0.9939, Val Acc=0.6001, LR=0.000094


Epoch 3/50: Train Loss=0.8739, Val Loss=0.9939
Saved checkpoint at epoch 2 with val_loss 0.9939498108827926


TrainerVISKAN - INFO - Epoch 4/50: Train Loss=0.7630, Train Acc=0.7199, Val Loss=0.9269, Val Acc=0.6579, LR=0.000091


Epoch 4/50: Train Loss=0.7630, Val Loss=0.9269
Saved checkpoint at epoch 3 with val_loss 0.9268960190870217


TrainerVISKAN - INFO - Epoch 5/50: Train Loss=0.6830, Train Acc=0.7482, Val Loss=0.8469, Val Acc=0.6910, LR=0.000089


Epoch 5/50: Train Loss=0.6830, Val Loss=0.8469
Saved checkpoint at epoch 4 with val_loss 0.8469180003342222


TrainerVISKAN - INFO - Epoch 6/50: Train Loss=0.6185, Train Acc=0.7765, Val Loss=0.8017, Val Acc=0.7043, LR=0.000086


Epoch 6/50: Train Loss=0.6185, Val Loss=0.8017
Saved checkpoint at epoch 5 with val_loss 0.8016817015830917


TrainerVISKAN - INFO - Epoch 7/50: Train Loss=0.5600, Train Acc=0.7944, Val Loss=0.7856, Val Acc=0.7094, LR=0.000083


Epoch 7/50: Train Loss=0.5600, Val Loss=0.7856
Saved checkpoint at epoch 6 with val_loss 0.7855979850031675


TrainerVISKAN - INFO - Epoch 8/50: Train Loss=0.5322, Train Acc=0.8094, Val Loss=0.7717, Val Acc=0.7080, LR=0.000081


Epoch 8/50: Train Loss=0.5322, Val Loss=0.7717
Saved checkpoint at epoch 7 with val_loss 0.7716881121321486


TrainerVISKAN - INFO - Epoch 9/50: Train Loss=0.4736, Train Acc=0.8291, Val Loss=0.7491, Val Acc=0.7296, LR=0.000078


Epoch 9/50: Train Loss=0.4736, Val Loss=0.7491
Saved checkpoint at epoch 8 with val_loss 0.7490994857745613


TrainerVISKAN - INFO - Epoch 10/50: Train Loss=0.4336, Train Acc=0.8478, Val Loss=0.7348, Val Acc=0.7332, LR=0.000076


Epoch 10/50: Train Loss=0.4336, Val Loss=0.7348
Saved checkpoint at epoch 9 with val_loss 0.73477883037621


TrainerVISKAN - INFO - Epoch 11/50: Train Loss=0.4079, Train Acc=0.8585, Val Loss=0.7612, Val Acc=0.7287, LR=0.000074


Epoch 11/50: Train Loss=0.4079, Val Loss=0.7612
EarlyStopping counter: 1 out of 5


TrainerVISKAN - INFO - Epoch 12/50: Train Loss=0.3836, Train Acc=0.8668, Val Loss=0.7683, Val Acc=0.7231, LR=0.000072


Epoch 12/50: Train Loss=0.3836, Val Loss=0.7683
EarlyStopping counter: 2 out of 5


TrainerVISKAN - INFO - Epoch 13/50: Train Loss=0.3633, Train Acc=0.8724, Val Loss=0.7290, Val Acc=0.7319, LR=0.000069


Epoch 13/50: Train Loss=0.3633, Val Loss=0.7290
Saved checkpoint at epoch 12 with val_loss 0.7290000458366833


TrainerVISKAN - INFO - Epoch 14/50: Train Loss=0.3356, Train Acc=0.8825, Val Loss=0.7430, Val Acc=0.7415, LR=0.000067


Epoch 14/50: Train Loss=0.3356, Val Loss=0.7430
EarlyStopping counter: 1 out of 5


TrainerVISKAN - INFO - Epoch 15/50: Train Loss=0.3167, Train Acc=0.8895, Val Loss=0.7736, Val Acc=0.7365, LR=0.000065


Epoch 15/50: Train Loss=0.3167, Val Loss=0.7736
EarlyStopping counter: 2 out of 5


TrainerVISKAN - INFO - Epoch 16/50: Train Loss=0.2905, Train Acc=0.9011, Val Loss=0.7592, Val Acc=0.7415, LR=0.000063


Epoch 16/50: Train Loss=0.2905, Val Loss=0.7592
EarlyStopping counter: 3 out of 5


TrainerVISKAN - INFO - Epoch 17/50: Train Loss=0.2783, Train Acc=0.9040, Val Loss=0.7184, Val Acc=0.7576, LR=0.000061


Epoch 17/50: Train Loss=0.2783, Val Loss=0.7184
Saved checkpoint at epoch 16 with val_loss 0.7184175440855153


TrainerVISKAN - INFO - Epoch 18/50: Train Loss=0.2589, Train Acc=0.9107, Val Loss=0.7570, Val Acc=0.7502, LR=0.000060


Epoch 18/50: Train Loss=0.2589, Val Loss=0.7570
EarlyStopping counter: 1 out of 5


TrainerVISKAN - INFO - Epoch 19/50: Train Loss=0.2533, Train Acc=0.9122, Val Loss=0.7024, Val Acc=0.7608, LR=0.000058


Epoch 19/50: Train Loss=0.2533, Val Loss=0.7024
Saved checkpoint at epoch 18 with val_loss 0.7024269443658703


TrainerVISKAN - INFO - Epoch 20/50: Train Loss=0.2374, Train Acc=0.9188, Val Loss=0.7066, Val Acc=0.7553, LR=0.000056


Epoch 20/50: Train Loss=0.2374, Val Loss=0.7066
EarlyStopping counter: 1 out of 5


TrainerVISKAN - INFO - Epoch 21/50: Train Loss=0.2226, Train Acc=0.9246, Val Loss=0.7627, Val Acc=0.7507, LR=0.000054


Epoch 21/50: Train Loss=0.2226, Val Loss=0.7627
EarlyStopping counter: 2 out of 5


TrainerVISKAN - INFO - Epoch 22/50: Train Loss=0.2130, Train Acc=0.9297, Val Loss=0.7545, Val Acc=0.7567, LR=0.000053


Epoch 22/50: Train Loss=0.2130, Val Loss=0.7545
EarlyStopping counter: 3 out of 5


TrainerVISKAN - INFO - Epoch 23/50: Train Loss=0.2025, Train Acc=0.9293, Val Loss=0.7836, Val Acc=0.7507, LR=0.000051


Epoch 23/50: Train Loss=0.2025, Val Loss=0.7836
EarlyStopping counter: 4 out of 5


TrainerVISKAN - INFO - Epoch 24/50: Train Loss=0.1951, Train Acc=0.9323, Val Loss=0.7742, Val Acc=0.7493, LR=0.000050
TrainerVISKAN - INFO - Early stopping triggered


Epoch 24/50: Train Loss=0.1951, Val Loss=0.7742
EarlyStopping counter: 5 out of 5


TrainerVISKAN - INFO - Test results - Loss: 0.7960, Accuracy: 0.7402


Test Loss: 0.7960, Test Acc: 0.7402
