# Training Notebook for a Binary Classification using the Instant Time Approach

### Import Dependencies

In [4]:
import torch
from torch.utils.data import DataLoader
import numpy as np
from sklearn.preprocessing import StandardScaler, LabelEncoder
import pandas as pd
import pytorch_lightning as pl
from pytorch_lightning import Trainer
from pytorch_lightning.callbacks import EarlyStopping, ModelCheckpoint
import os
from utils import *
import json
import random

model_output_path = 'models'
os.makedirs(model_output_path, exist_ok=True)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [5]:
#set random seed for reproducibility
random.seed(42)
np.random.seed(42)
torch.manual_seed(42)
torch.cuda.manual_seed(42)
pl.seed_everything(42)
torch.backends.cudnn.deterministic = True

Seed set to 42


### Datasets (k = 12 Folds Cross Validation) & Training of Encoder + MLP

In [6]:
folds = 12
exp_names_per_fold = 3
labels_to_include = ['ETHANOL', 'ACETONE']
hidden_dim = 64
MAX_EPOCHS = 25

df = pd.read_csv("original_training_set_filtered.csv")
df = df[df['label'].isin(labels_to_include)]

exp_labels = df.groupby("exp_name")["label"].first()
label_encoder = LabelEncoder()
label_encoder.fit(exp_labels.values)
exp_name_to_label = exp_labels.to_dict()

In [None]:
splits_path = r"C:\Users\miche\OneDrive\Desktop\tesi\repo-tesi\time_windows_approach\models\binaryETHvsACE\fold_splits_exp_names.json"
splits = json.load(open(splits_path, "r"))
splits = {int(k): v for k, v in splits.items()}

for fold in range(1, folds+1):

    # Fold Splitting
    df_train = df[df["exp_name"].isin(splits[fold]["train"])]
    df_val = df[df["exp_name"].isin(splits[fold]["val"])]
    df_test = df[df["exp_name"].isin(splits[fold]["test"])]

    feature_columns = [col for col in df_train.columns if col not in ["label", "exp_name"]]
    scaler = StandardScaler()
    scaler.fit(df_train[feature_columns])
    mean = scaler.mean_
    std = scaler.scale_

    train_dataset = SensorDataset(df = df_train,
                                  label_encoder=label_encoder,
                                  exp_name_to_label=exp_name_to_label,
                                  mean=mean,
                                  std=std)
    
    val_dataset = SensorDataset(df = df_val,
                                label_encoder=label_encoder,
                                exp_name_to_label=exp_name_to_label,
                                mean=mean,
                                std=std)
    
    test_dataset = SensorDataset(df = df_test,
                                 label_encoder=label_encoder,
                                 exp_name_to_label=exp_name_to_label,
                                 mean=mean,
                                 std=std)
    
    print(f"\nFold {fold}/{folds} -> Train: {len(train_dataset)} (exp: {len(df_train['exp_name'].unique())}), ",
        f"Val: {len(val_dataset)} (exp: {len(df_val['exp_name'].unique())}), ",
        f"Test: {len(test_dataset)} (exp: {len(df_test['exp_name'].unique())})")
    
    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=4, persistent_workers=True)
    val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, num_workers=4, persistent_workers=True)
    test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=4, persistent_workers=True)

    model = MLPClassifier(
        num_features=len(feature_columns),
        hidden_dim=hidden_dim,
        num_classes=2,
        lr=1e-4,
        dropout=0.2,
        weight_decay=1e-5
    )

    early_stop_callback = EarlyStopping(
        monitor="val_loss",
        patience=5,
        verbose=False,
        mode="min"
    )

    trainer = Trainer(
        max_epochs=MAX_EPOCHS,
        callbacks=[early_stop_callback],
        enable_model_summary=False,  # <- Hides the model summary
        accelerator="gpu" if torch.cuda.is_available() else "cpu",
        devices=1
    )

    trainer.fit(model, train_loader, val_loader)
    # Save the model
    output_name = f"binaryETHvsACE/binaryETHvsACE_MLP_h{hidden_dim}_{fold}.ckpt"
    model_path = os.path.join(model_output_path, output_name)
    trainer.save_checkpoint(model_path)

    # Test the model
    trainer.test(model, test_loader)

    _, train_exp_stats = get_stats(model_path, train_loader, len(label_encoder.classes_), device)
    train_performance, _, _ = compute_experiment_performance(train_exp_stats)
    print(f"Train Performance: {train_performance}")

    _, val_exp_stats = get_stats(model_path, val_loader, len(label_encoder.classes_), device)
    val_performance, _, _ = compute_experiment_performance(val_exp_stats)
    print(f"Validation Performance: {val_performance}")

    _, test_exp_stats = get_stats(model_path, test_loader, len(label_encoder.classes_), device)
    test_performance, _, _ = compute_experiment_performance(test_exp_stats)
    print(f"Test Performance: {test_performance}")

You are using the plain ModelCheckpoint callback. Consider using LitModelCheckpoint which with seamless uploading to Model registry.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
c:\Users\miche\OneDrive\Desktop\tesi\repo-tesi\venv\Lib\site-packages\pytorch_lightning\trainer\connectors\logger_connector\logger_connector.py:76: Starting from v1.9.0, `tensorboardX` has been removed as a dependency of the `pytorch_lightning` package, due to potential conflicts with other packages in the ML ecosystem. For this reason, `logger=True` will use `CSVLogger` as the default logger, unless the `tensorboard` or `tensorboardX` packages are found. Please `pip install lightning[extra]` or one of them to enable TensorBoard support by default
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]



Fold 1/12 -> Train: 60854 (exp: 61),  Val: 6751 (exp: 6),  Test: 5966 (exp: 6)
Epoch 24: 100%|██████████| 1902/1902 [01:12<00:00, 26.42it/s, v_num=0, train_loss=0.182, train_acc=1.000, val_loss=0.278, val_acc=0.865]

`Trainer.fit` stopped: `max_epochs=25` reached.


Epoch 24: 100%|██████████| 1902/1902 [01:12<00:00, 26.40it/s, v_num=0, train_loss=0.182, train_acc=1.000, val_loss=0.278, val_acc=0.865]


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing DataLoader 0: 100%|██████████| 187/187 [00:05<00:00, 32.10it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        test_acc            0.7497485876083374
        test_loss           0.45618465542793274
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


100%|██████████| 1902/1902 [01:41<00:00, 18.72it/s]


Train Performance: 0.9836065573770492


100%|██████████| 211/211 [01:07<00:00,  3.11it/s]


Validation Performance: 0.8333333333333334


100%|██████████| 187/187 [01:07<00:00,  2.76it/s]


Test Performance: 0.8333333333333334

Fold 2/12 -> Train: 62502 (exp: 61),  Val: 5966 (exp: 6),  Test: 5103 (exp: 6)


You are using the plain ModelCheckpoint callback. Consider using LitModelCheckpoint which with seamless uploading to Model registry.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Epoch 19: 100%|██████████| 1954/1954 [01:12<00:00, 27.11it/s, v_num=1, train_loss=0.071, train_acc=1.000, val_loss=0.402, val_acc=0.784] 


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing DataLoader 0: 100%|██████████| 160/160 [00:03<00:00, 45.06it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        test_acc            0.8159905672073364
        test_loss           0.39808669686317444
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


100%|██████████| 1954/1954 [01:34<00:00, 20.60it/s]


Train Performance: 1.0


100%|██████████| 187/187 [01:00<00:00,  3.09it/s]


Validation Performance: 0.8333333333333334


100%|██████████| 160/160 [01:02<00:00,  2.57it/s]


Test Performance: 0.8333333333333334

Fold 3/12 -> Train: 62432 (exp: 61),  Val: 5103 (exp: 6),  Test: 6036 (exp: 6)


You are using the plain ModelCheckpoint callback. Consider using LitModelCheckpoint which with seamless uploading to Model registry.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Epoch 17: 100%|██████████| 1951/1951 [01:08<00:00, 28.44it/s, v_num=2, train_loss=0.324, train_acc=0.812, val_loss=0.462, val_acc=0.753]


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing DataLoader 0: 100%|██████████| 189/189 [00:04<00:00, 44.57it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        test_acc            0.8426110148429871
        test_loss           0.3443835377693176
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


100%|██████████| 1951/1951 [01:34<00:00, 20.70it/s]


Train Performance: 0.9672131147540983


100%|██████████| 160/160 [01:01<00:00,  2.62it/s]


Validation Performance: 0.8333333333333334


100%|██████████| 189/189 [01:03<00:00,  3.00it/s]


Test Performance: 1.0

Fold 4/12 -> Train: 61311 (exp: 61),  Val: 6036 (exp: 6),  Test: 6224 (exp: 6)


You are using the plain ModelCheckpoint callback. Consider using LitModelCheckpoint which with seamless uploading to Model registry.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Epoch 22: 100%|██████████| 1916/1916 [01:04<00:00, 29.69it/s, v_num=3, train_loss=0.234, train_acc=0.935, val_loss=0.340, val_acc=0.779] 


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing DataLoader 0: 100%|██████████| 195/195 [00:03<00:00, 49.20it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        test_acc            0.5620179772377014
        test_loss           0.6597840785980225
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


100%|██████████| 1916/1916 [01:27<00:00, 21.85it/s]


Train Performance: 0.9508196721311475


100%|██████████| 189/189 [01:01<00:00,  3.07it/s]


Validation Performance: 0.8333333333333334


100%|██████████| 195/195 [01:02<00:00,  3.12it/s]


Test Performance: 0.5

Fold 5/12 -> Train: 62200 (exp: 61),  Val: 6224 (exp: 6),  Test: 5147 (exp: 6)


You are using the plain ModelCheckpoint callback. Consider using LitModelCheckpoint which with seamless uploading to Model registry.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Epoch 24: 100%|██████████| 1944/1944 [01:00<00:00, 32.12it/s, v_num=4, train_loss=0.215, train_acc=0.917, val_loss=0.647, val_acc=0.589] 

`Trainer.fit` stopped: `max_epochs=25` reached.


Epoch 24: 100%|██████████| 1944/1944 [01:00<00:00, 32.10it/s, v_num=4, train_loss=0.215, train_acc=0.917, val_loss=0.647, val_acc=0.589]


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing DataLoader 0: 100%|██████████| 161/161 [00:03<00:00, 51.27it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        test_acc             0.812900722026825
        test_loss           0.36282646656036377
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


100%|██████████| 1944/1944 [01:17<00:00, 25.14it/s]


Train Performance: 0.9508196721311475


100%|██████████| 195/195 [00:50<00:00,  3.84it/s]


Validation Performance: 0.5


100%|██████████| 161/161 [00:52<00:00,  3.08it/s]


Test Performance: 0.8333333333333334

Fold 6/12 -> Train: 63151 (exp: 61),  Val: 5147 (exp: 6),  Test: 5273 (exp: 6)


You are using the plain ModelCheckpoint callback. Consider using LitModelCheckpoint which with seamless uploading to Model registry.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Epoch 17: 100%|██████████| 1974/1974 [01:01<00:00, 31.97it/s, v_num=5, train_loss=0.330, train_acc=0.933, val_loss=0.435, val_acc=0.771]


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing DataLoader 0: 100%|██████████| 165/165 [00:03<00:00, 50.00it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        test_acc            0.7508059740066528
        test_loss           0.5509343147277832
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


100%|██████████| 1974/1974 [01:38<00:00, 19.97it/s]


Train Performance: 0.9672131147540983


100%|██████████| 161/161 [01:06<00:00,  2.42it/s]


Validation Performance: 0.8333333333333334


100%|██████████| 165/165 [00:58<00:00,  2.84it/s]


Test Performance: 0.6666666666666666

Fold 7/12 -> Train: 61754 (exp: 61),  Val: 5273 (exp: 6),  Test: 6544 (exp: 6)


You are using the plain ModelCheckpoint callback. Consider using LitModelCheckpoint which with seamless uploading to Model registry.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Epoch 24: 100%|██████████| 1930/1930 [01:04<00:00, 30.15it/s, v_num=6, train_loss=0.369, train_acc=0.846, val_loss=0.497, val_acc=0.766]

`Trainer.fit` stopped: `max_epochs=25` reached.


Epoch 24: 100%|██████████| 1930/1930 [01:04<00:00, 30.13it/s, v_num=6, train_loss=0.369, train_acc=0.846, val_loss=0.497, val_acc=0.766]


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing DataLoader 0: 100%|██████████| 205/205 [00:04<00:00, 44.57it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        test_acc            0.8849327564239502
        test_loss           0.24400033056735992
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


100%|██████████| 1930/1930 [01:26<00:00, 22.37it/s]


Train Performance: 0.9672131147540983


100%|██████████| 165/165 [01:03<00:00,  2.61it/s]


Validation Performance: 0.6666666666666666


100%|██████████| 205/205 [01:06<00:00,  3.07it/s]


Test Performance: 1.0

Fold 8/12 -> Train: 61609 (exp: 61),  Val: 6544 (exp: 6),  Test: 5418 (exp: 6)


You are using the plain ModelCheckpoint callback. Consider using LitModelCheckpoint which with seamless uploading to Model registry.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Epoch 24: 100%|██████████| 1926/1926 [01:06<00:00, 28.94it/s, v_num=7, train_loss=0.268, train_acc=0.778, val_loss=0.252, val_acc=0.884] 

`Trainer.fit` stopped: `max_epochs=25` reached.


Epoch 24: 100%|██████████| 1926/1926 [01:06<00:00, 28.93it/s, v_num=7, train_loss=0.268, train_acc=0.778, val_loss=0.252, val_acc=0.884]


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing DataLoader 0: 100%|██████████| 170/170 [00:03<00:00, 46.90it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        test_acc            0.7927279472351074
        test_loss           0.41747820377349854
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


100%|██████████| 1926/1926 [01:26<00:00, 22.37it/s]


Train Performance: 0.9508196721311475


100%|██████████| 205/205 [01:00<00:00,  3.37it/s]


Validation Performance: 1.0


  0%|          | 0/170 [00:00<?, ?it/s]

In [7]:
#breakpoint
breakpoint_fold = 8

splits_path = r"C:\Users\miche\OneDrive\Desktop\tesi\repo-tesi\time_windows_approach\models\binaryETHvsACE\fold_splits_exp_names.json"
splits = json.load(open(splits_path, "r"))
splits = {int(k): v for k, v in splits.items()}

for fold in range(breakpoint_fold, folds+1):

    # Fold Splitting
    df_train = df[df["exp_name"].isin(splits[fold]["train"])]
    df_val = df[df["exp_name"].isin(splits[fold]["val"])]
    df_test = df[df["exp_name"].isin(splits[fold]["test"])]

    feature_columns = [col for col in df_train.columns if col not in ["label", "exp_name"]]
    scaler = StandardScaler()
    scaler.fit(df_train[feature_columns])
    mean = scaler.mean_
    std = scaler.scale_

    train_dataset = SensorDataset(df = df_train,
                                  label_encoder=label_encoder,
                                  exp_name_to_label=exp_name_to_label,
                                  mean=mean,
                                  std=std)
    
    val_dataset = SensorDataset(df = df_val,
                                label_encoder=label_encoder,
                                exp_name_to_label=exp_name_to_label,
                                mean=mean,
                                std=std)
    
    test_dataset = SensorDataset(df = df_test,
                                 label_encoder=label_encoder,
                                 exp_name_to_label=exp_name_to_label,
                                 mean=mean,
                                 std=std)
    
    print(f"\nFold {fold}/{folds} -> Train: {len(train_dataset)} (exp: {len(df_train['exp_name'].unique())}), ",
        f"Val: {len(val_dataset)} (exp: {len(df_val['exp_name'].unique())}), ",
        f"Test: {len(test_dataset)} (exp: {len(df_test['exp_name'].unique())})")
    
    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=4, persistent_workers=True)
    val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, num_workers=4, persistent_workers=True)
    test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=4, persistent_workers=True)

    model = MLPClassifier(
        num_features=len(feature_columns),
        hidden_dim=hidden_dim,
        num_classes=2,
        lr=1e-4,
        dropout=0.2,
        weight_decay=1e-5
    )

    early_stop_callback = EarlyStopping(
        monitor="val_loss",
        patience=5,
        verbose=False,
        mode="min"
    )

    trainer = Trainer(
        max_epochs=MAX_EPOCHS,
        callbacks=[early_stop_callback],
        enable_model_summary=False,  # <- Hides the model summary
        accelerator="gpu" if torch.cuda.is_available() else "cpu",
        devices=1
    )

    trainer.fit(model, train_loader, val_loader)
    # Save the model
    output_name = f"binaryETHvsACE/binaryETHvsACE_MLP_h{hidden_dim}_{fold}.ckpt"
    model_path = os.path.join(model_output_path, output_name)
    trainer.save_checkpoint(model_path)

    # Test the model
    trainer.test(model, test_loader)

    _, train_exp_stats = get_stats(model_path, train_loader, len(label_encoder.classes_), device)
    train_performance, _, _ = compute_experiment_performance(train_exp_stats)
    print(f"Train Performance: {train_performance}")

    _, val_exp_stats = get_stats(model_path, val_loader, len(label_encoder.classes_), device)
    val_performance, _, _ = compute_experiment_performance(val_exp_stats)
    print(f"Validation Performance: {val_performance}")

    _, test_exp_stats = get_stats(model_path, test_loader, len(label_encoder.classes_), device)
    test_performance, _, _ = compute_experiment_performance(test_exp_stats)
    print(f"Test Performance: {test_performance}")

You are using the plain ModelCheckpoint callback. Consider using LitModelCheckpoint which with seamless uploading to Model registry.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
c:\Users\miche\OneDrive\Desktop\tesi\repo-tesi\venv\Lib\site-packages\pytorch_lightning\trainer\connectors\logger_connector\logger_connector.py:76: Starting from v1.9.0, `tensorboardX` has been removed as a dependency of the `pytorch_lightning` package, due to potential conflicts with other packages in the ML ecosystem. For this reason, `logger=True` will use `CSVLogger` as the default logger, unless the `tensorboard` or `tensorboardX` packages are found. Please `pip install lightning[extra]` or one of them to enable TensorBoard support by default
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]



Fold 8/12 -> Train: 61609 (exp: 61),  Val: 6544 (exp: 6),  Test: 5418 (exp: 6)
Epoch 24: 100%|██████████| 1926/1926 [00:47<00:00, 40.97it/s, v_num=7, train_loss=0.301, train_acc=0.778, val_loss=0.245, val_acc=0.886] 

`Trainer.fit` stopped: `max_epochs=25` reached.


Epoch 24: 100%|██████████| 1926/1926 [00:47<00:00, 40.96it/s, v_num=7, train_loss=0.301, train_acc=0.778, val_loss=0.245, val_acc=0.886]


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing DataLoader 0: 100%|██████████| 170/170 [00:04<00:00, 41.90it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        test_acc            0.8045403957366943
        test_loss           0.39853134751319885
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


100%|██████████| 1926/1926 [01:31<00:00, 20.95it/s]


Train Performance: 0.9508196721311475


100%|██████████| 205/205 [00:58<00:00,  3.48it/s]


Validation Performance: 1.0


100%|██████████| 170/170 [00:57<00:00,  2.94it/s]


Test Performance: 0.8333333333333334

Fold 9/12 -> Train: 61380 (exp: 61),  Val: 5418 (exp: 6),  Test: 6773 (exp: 6)


You are using the plain ModelCheckpoint callback. Consider using LitModelCheckpoint which with seamless uploading to Model registry.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Epoch 24: 100%|██████████| 1919/1919 [01:02<00:00, 30.52it/s, v_num=8, train_loss=0.213, train_acc=0.750, val_loss=0.370, val_acc=0.873] 

`Trainer.fit` stopped: `max_epochs=25` reached.


Epoch 24: 100%|██████████| 1919/1919 [01:02<00:00, 30.50it/s, v_num=8, train_loss=0.213, train_acc=0.750, val_loss=0.370, val_acc=0.873]


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing DataLoader 0: 100%|██████████| 212/212 [00:04<00:00, 46.13it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        test_acc            0.7711501717567444
        test_loss           0.45131757855415344
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


100%|██████████| 1919/1919 [01:24<00:00, 22.67it/s]


Train Performance: 0.9672131147540983


100%|██████████| 170/170 [00:54<00:00,  3.11it/s]


Validation Performance: 1.0


100%|██████████| 212/212 [00:58<00:00,  3.62it/s]


Test Performance: 0.8333333333333334

Fold 10/12 -> Train: 60487 (exp: 61),  Val: 6773 (exp: 6),  Test: 6311 (exp: 6)


You are using the plain ModelCheckpoint callback. Consider using LitModelCheckpoint which with seamless uploading to Model registry.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Epoch 5: 100%|██████████| 1891/1891 [01:14<00:00, 25.28it/s, v_num=9, train_loss=0.310, train_acc=0.857, val_loss=1.180, val_acc=0.675]


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing DataLoader 0: 100%|██████████| 198/198 [00:04<00:00, 46.62it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        test_acc            0.8399619460105896
        test_loss           0.4312269687652588
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


100%|██████████| 1891/1891 [01:23<00:00, 22.72it/s]


Train Performance: 0.8524590163934426


100%|██████████| 212/212 [00:57<00:00,  3.70it/s]


Validation Performance: 0.8333333333333334


100%|██████████| 198/198 [01:06<00:00,  3.00it/s]


Test Performance: 1.0

Fold 11/12 -> Train: 60223 (exp: 61),  Val: 6311 (exp: 6),  Test: 7037 (exp: 6)


You are using the plain ModelCheckpoint callback. Consider using LitModelCheckpoint which with seamless uploading to Model registry.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Epoch 5: 100%|██████████| 1882/1882 [01:00<00:00, 31.17it/s, v_num=10, train_loss=0.471, train_acc=0.742, val_loss=0.437, val_acc=0.755]


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing DataLoader 0: 100%|██████████| 220/220 [00:04<00:00, 46.40it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        test_acc             0.603950560092926
        test_loss           0.5233703851699829
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


100%|██████████| 1882/1882 [01:15<00:00, 24.85it/s]


Train Performance: 0.8688524590163934


100%|██████████| 198/198 [00:49<00:00,  3.98it/s]


Validation Performance: 0.8333333333333334


100%|██████████| 220/220 [00:54<00:00,  4.06it/s]


Test Performance: 0.6666666666666666

Fold 12/12 -> Train: 65546 (exp: 66),  Val: 7037 (exp: 6),  Test: 988 (exp: 1)


You are using the plain ModelCheckpoint callback. Consider using LitModelCheckpoint which with seamless uploading to Model registry.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Epoch 5: 100%|██████████| 2049/2049 [01:06<00:00, 30.73it/s, v_num=11, train_loss=0.270, train_acc=0.900, val_loss=0.535, val_acc=0.599]


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing DataLoader 0: 100%|██████████| 31/31 [00:00<00:00, 61.94it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        test_acc           0.037449393421411514
        test_loss            1.256492257118225
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


100%|██████████| 2049/2049 [01:28<00:00, 23.16it/s]


Train Performance: 0.8787878787878788


100%|██████████| 220/220 [00:52<00:00,  4.21it/s]


Validation Performance: 0.6666666666666666


100%|██████████| 31/31 [00:55<00:00,  1.81s/it]

Test Performance: 0.0



