In [1]:
# requirements: nilearn, nibabel, numpy, pandas
from nilearn.input_data import NiftiSpheresMasker
import numpy as np
import pandas as pd
import os
import re
import nibabel as nib
from nilearn import image
os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"
from pathlib import Path
import sys
import torch
from torch import nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from statsmodels.tsa.ar_model import AutoReg

from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import StratifiedKFold, train_test_split, cross_val_score
from sklearn.svm import SVC, LinearSVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.decomposition import PCA, FastICA
from sklearn.feature_selection import SelectPercentile, f_classif
from sklearn.pipeline import make_pipeline
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix, ConfusionMatrixDisplay
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.inspection import permutation_importance

from nilearn import datasets, masking, image
from nilearn.masking import compute_epi_mask, apply_mask, unmask
from nilearn.plotting import plot_stat_map, plot_glass_brain, show
from nilearn.image import resample_img, mean_img
import nibabel as nib
import optuna
import pywt
import pickle
import joblib
import matplotlib.pyplot as plt
import warnings
from sklearn.exceptions import ConvergenceWarning
from torch.utils.data import Dataset
from nilearn.input_data import NiftiLabelsMasker

warnings.filterwarnings("ignore")

optuna.logging.set_verbosity(optuna.logging.INFO)

# -----------------------------
# Device check
# -----------------------------
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)


  from nilearn.input_data import NiftiSpheresMasker


Using device: cuda


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
pattern_dir = "Data/beta_images_activation_async"
betas_path = os.path.join(pattern_dir, 'single_trial_sess1/beta_0001.nii')
R_4D = image.load_img(betas_path)
print(f"Shape of betas: {R_4D.shape}")

mask_img1 = image.load_img(os.path.join(pattern_dir, 'single_trial_sess1/mask.nii'))
mask_img2 = image.load_img(os.path.join(pattern_dir, 'single_trial_sess2/mask.nii'))
print(f"Shape of mask1: {mask_img1.shape}, Shape of mask2: {mask_img2.shape}")

Shape of betas: (79, 95, 79)
Shape of mask1: (79, 95, 79), Shape of mask2: (79, 95, 79)


In [3]:
def load_beta_dataset(root):
    root = Path(root)
    base = root / "beta_images_activation_async"

    sess_dirs = [
        base / "single_trial_sess1",
        base / "single_trial_sess2",
    ]

    X, y = [], []

    # all label CSVs
    csv_files = sorted(base.glob("beta_labels_subject*_session*.csv"))

    for csv_file in csv_files:
        df = pd.read_csv(csv_file)

        for _, row in df.iterrows():
            fname = row["file"]
            label = row["class"]

            # look in both session folders
            found_path = None
            for d in sess_dirs:
                candidate = d / fname
                if candidate.exists():
                    found_path = candidate
                    break

            if found_path is None:
                raise FileNotFoundError(f"Could not find {fname} in session folders")

            # load nii â†’ keep 3D shape
            img = nib.load(str(found_path)).get_fdata()

            # FIX NaNs HERE
            img = np.nan_to_num(img, nan=0.0)

            X.append(img)
            y.append(label)

    return np.array(X), np.array(y)


X_all, y_all = load_beta_dataset("Data")


In [4]:
print(f"X shape: {X_all.shape}, y shape: {y_all.shape}")

X shape: (320, 79, 95, 79), y shape: (320,)


In [5]:

# Hold-out test BEFORE any Optuna tuning
X_train_val, X_test, y_train_val, y_test = train_test_split(
    X_all, y_all,
    test_size=0.15,
    stratify=y_all,
    random_state=42
)

print("Train shape:", X_train_val.shape)
print("Test shape:", X_test.shape)

Train shape: (272, 79, 95, 79)
Test shape: (48, 79, 95, 79)


In [6]:
class BimodalDataset(Dataset):
    def __init__(self, fmri, eeg, labels, label_encoder=None):
        self.fmri = fmri
        self.eeg = eeg

        if label_encoder is None:
            self.encoder = LabelEncoder()
            self.labels = self.encoder.fit_transform(labels)
        else:
            self.encoder = label_encoder
            self.labels = self.encoder.transform(labels)

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, i):
        return {
            "fmri": torch.tensor(self.fmri[i], dtype=torch.float32),
            "eeg": torch.tensor(self.eeg[i], dtype=torch.float32),
            "label": torch.tensor(self.labels[i], dtype=torch.long)
        }

    def inverse_labels(self, encoded):
        """Return original labels from encoded integers"""
        return self.encoder.inverse_transform(encoded)


train_async = BimodalDataset(X_train_val, None, y_train_val)

In [7]:
test_async = BimodalDataset(X_test, None, y_test)

In [8]:


# Build AAL masker once
_aal = datasets.fetch_atlas_aal()
_aal_img = _aal.maps

# Fit masker once on the atlas only
_masker = NiftiLabelsMasker(labels_img=_aal_img, standardize=True)
_masker.fit()

def _fmri_to_aal_timeseries(fmri_4d):
    # Ensure float32 to save RAM
    fmri_4d = fmri_4d.astype("float32", copy=False)

    # Convert array â†’ NIfTI
    fmri_nii = image.new_img_like(_aal_img, fmri_4d)

    # Only transform (masker already fitted)
    ts = _masker.transform(fmri_nii)   # (T, n_regions)
    return ts.T                        # (n_regions, T)

def convert_dataset_to_aal(train_ds, test_ds, verbose=True):
    def convert_split(split, name):
        out = []
        for i, f in enumerate(split.fmri):
            if verbose:
                print(f"{name}: {i+1}/{len(split.fmri)}", end="\r", flush=True)
            out.append(_fmri_to_aal_timeseries(f))
        return np.stack(out, axis=0)

    train_fmri = convert_split(train_ds, "train")
    test_fmri  = convert_split(test_ds, "test")

    new_train = BimodalDataset(
        fmri=train_fmri,
        eeg=train_ds.eeg,
        labels=train_ds.inverse_labels(train_ds.labels)
    )
    new_test = BimodalDataset(
        fmri=test_fmri,
        eeg=test_ds.eeg,
        labels=test_ds.inverse_labels(test_ds.labels)
    )

    return new_train, new_test





[fetch_atlas_aal] Dataset found in C:\Users\Lova\nilearn_data\aal_SPM12


In [9]:



train_async_aal, test_async_aal = convert_dataset_to_aal(train_async, test_async)


test: 48/48272

In [10]:
print(f"Train shape: {train_async_aal.fmri.shape}")

print(f"Test shape: {test_async_aal.fmri.shape}")



Train shape: (272, 116)
Test shape: (48, 116)


In [11]:
def run_optuna_with_cv(
    X_train_val, y_train_val, X_test, y_test,
    n_trials=30, n_splits=5, label_decoder=None
):
    import warnings
    warnings.filterwarnings("ignore", category=UserWarning)
    warnings.filterwarnings("ignore", category=FutureWarning)

    model_types = ["svc", "linear_svc", "rf"]
    all_results = {}
    N_samples, N_features = X_train_val.shape

    # ===== Adaptive feature selection limits =====
    # ANOVA percentile
    if N_features <= 50:  # ROI regime
        min_percent, max_percent = 1, 100
    else:  # voxel regime
        max_voxels = 30000
        max_percent = int(100 * min(max_voxels, N_features) / N_features)
        max_percent = min(max_percent, 100)
        min_percent = 1
   

    # PCA/ICA component limits
    max_pca = min(200, N_features, N_samples - 1)
    max_ica = min(80, N_features, N_samples // 4)

    for model_type in model_types:
        print(f"\nðŸ”¹ OPTUNA + {n_splits}-Fold CV for: {model_type.upper()}")

        # ===== Objective function =====
        def objective_wrapper(trial):
            # Feature selection
            feat_method = trial.suggest_categorical("feat_method", ["anova", "pca", "ica"])

            if feat_method == "anova":
                percentile = trial.suggest_int("percentile", min_percent, max_percent)
                selector = SelectPercentile(f_classif, percentile=percentile)

            elif feat_method == "pca":
                n_components = trial.suggest_int("pca_n_components", 10, 100)
                selector = PCA(n_components=n_components, random_state=42)

            else:  # ICA
                n_components = trial.suggest_int("ica_n_components", 10, 100)
                selector = FastICA(n_components=n_components, random_state=42, max_iter=1000)

            # Model selection
            if model_type == "svc":
                kernel = trial.suggest_categorical("kernel", ["linear", "poly", "sigmoid"])
                C = trial.suggest_float("C", 1e-2, 1e1, log=True)

                if kernel == "linear":
                    model = SVC(kernel="linear", C=C, random_state=42)
                else:
                    gamma = trial.suggest_float("gamma", 1e-4, 1e-1, log=True)
                    coef0 = trial.suggest_float("coef0", 0.0, 1.0)
                    degree = trial.suggest_int("degree", 2, 6) if kernel == "poly" else 3
                    model = SVC(kernel=kernel, C=C, gamma=gamma, coef0=coef0, degree=degree, random_state=42)

            elif model_type == "linear_svc":
                C = trial.suggest_float("C", 1e-3, 1e2, log=True)
                model = LinearSVC(C=C, max_iter=10000, random_state=42)

            else:  # RandomForest
                n_estimators = trial.suggest_int("n_estimators", 50, 300)
                max_depth = trial.suggest_int("max_depth", 2, 20)
                min_samples_split = trial.suggest_int("min_samples_split", 2, 5)
                min_samples_leaf = trial.suggest_int("min_samples_leaf", 1, 3)
                model = RandomForestClassifier(
                    n_estimators=n_estimators,
                    max_depth=max_depth,
                    min_samples_split=min_samples_split,
                    min_samples_leaf=min_samples_leaf,
                    random_state=42,
                )

            # Pipeline
            pipeline = make_pipeline(StandardScaler(), selector, model)

            # K-Fold CV
            cv = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=42)
            scores = cross_val_score(
                pipeline,
                X_train_val,
                y_train_val,
                cv=cv,
                scoring="f1_macro",
                n_jobs=1,
            )

            return scores.mean()

        # ===== Run Optuna =====
        study = optuna.create_study(direction="maximize")
        study.optimize(objective_wrapper, n_trials=n_trials)
        best_params = study.best_params
        print(f"Best Params for {model_type} = {best_params}")

        # ===== Rebuild final pipeline =====
        feat_method = best_params["feat_method"]
        if feat_method == "anova":
            selector = SelectPercentile(f_classif, percentile=best_params["percentile"])
        elif feat_method == "pca":
            selector = PCA(n_components=best_params["pca_n_components"], random_state=42)
        else:
            selector = FastICA(n_components=best_params["ica_n_components"], random_state=42, max_iter=1000)

        # Model
        if model_type == "svc":
            if best_params["kernel"] == "linear":
                model = SVC(kernel="linear", C=best_params["C"], random_state=42)
            else:
                model = SVC(
                    kernel=best_params["kernel"],
                    C=best_params["C"],
                    gamma=best_params.get("gamma", "scale"),
                    coef0=best_params.get("coef0", 0),
                    degree=best_params.get("degree", 3),
                    random_state=42,
                )
        elif model_type == "linear_svc":
            model = LinearSVC(C=best_params["C"], max_iter=10000, random_state=42)
        else:
            model = RandomForestClassifier(
                n_estimators=best_params["n_estimators"],
                max_depth=best_params["max_depth"],
                min_samples_split=best_params["min_samples_split"],
                min_samples_leaf=best_params["min_samples_leaf"],
                random_state=42,
            )

        final_pipeline = make_pipeline(StandardScaler(), selector, model)
        final_pipeline.fit(X_train_val, y_train_val)

        # Test evaluation
        y_pred = final_pipeline.predict(X_test)
        acc = accuracy_score(y_test, y_pred)
        f1 = f1_score(y_test, y_pred, average="macro")
        print(f"FINAL RESULTS {model_type.upper()}: acc={acc:.3f}, f1={f1:.3f}")

        cm = confusion_matrix(y_test, y_pred)
        ConfusionMatrixDisplay(cm, display_labels=label_decoder).plot(cmap="Blues")
        plt.show()

        all_results[model_type] = {
            "pipeline": final_pipeline,
            "acc": acc,
            "f1": f1,
            "cm": cm,
            "best_params": best_params,
            "study": study,
        }

    return all_results


In [12]:
def run_optuna_with_bimodal_dataset(
    train_dataset: BimodalDataset,
    test_dataset: BimodalDataset,
    n_trials=30,
    n_splits=10,
    use_temporal_flattener=False
):
    import numpy as np
    from sklearn.model_selection import StratifiedKFold, cross_val_score
    from sklearn.pipeline import make_pipeline
    from sklearn.preprocessing import StandardScaler
    from sklearn.feature_selection import SelectPercentile, f_classif
    from sklearn.decomposition import PCA, FastICA
    from sklearn.svm import SVC, LinearSVC
    from sklearn.ensemble import RandomForestClassifier
    from sklearn.metrics import accuracy_score, f1_score, confusion_matrix, ConfusionMatrixDisplay
    import optuna
    import matplotlib.pyplot as plt

    # Convert datasets to arrays
    def dataset_to_arrays(ds):
        fmri = np.array([ds[i]['fmri'].numpy() for i in range(len(ds))])
        X = fmri.reshape(len(ds), -1)  # flatten each 3D fMRI into 1D
        y = np.array([ds[i]['label'].item() for i in range(len(ds))])
        return X, y


    X_train, y_train = dataset_to_arrays(train_dataset)
    X_test, y_test   = dataset_to_arrays(test_dataset)

    N_samples, N_features = X_train.shape
    min_percent = 1
    max_voxels = 30000
    max_percent = int(100 * min(max_voxels, N_features) / N_features)
    max_percent = min(max_percent, 100)
    max_pca = min(200, N_features, N_samples-1)
    max_ica = min(80, N_features, N_samples//4)

    model_types = ["svc", "linear_svc", "rf"]
    all_results = {}

    for model_type in model_types:
        print(f"\nðŸ”¹ OPTUNA + {n_splits}-Fold CV for: {model_type.upper()}")

        def objective_wrapper(trial):
            # Feature selection
            feat_method = trial.suggest_categorical("feat_method", ["anova", "pca", "ica"])
            if feat_method == "anova":
                percentile = trial.suggest_int("percentile", min_percent, max_percent)
                selector = SelectPercentile(f_classif, percentile=percentile)
            elif feat_method == "pca":
                n_components = trial.suggest_int("pca_n_components", 10, max_pca)
                selector = PCA(n_components=n_components, random_state=42)
            else:
                n_components = trial.suggest_int("ica_n_components", 10, max_ica)
                selector = FastICA(n_components=n_components, random_state=42, max_iter=1000)

            # Model selection
            if model_type == "svc":
                kernel = trial.suggest_categorical("kernel", ["linear", "poly", "sigmoid"])
                C = trial.suggest_float("C", 1e-2, 1e1, log=True)
                if kernel == "linear":
                    model = SVC(kernel="linear", C=C, random_state=42)
                else:
                    gamma = trial.suggest_float("gamma", 1e-4, 1e-1, log=True)
                    coef0 = trial.suggest_float("coef0", 0.0, 1.0)
                    degree = trial.suggest_int("degree", 2, 6) if kernel == "poly" else 3
                    model = SVC(kernel=kernel, C=C, gamma=gamma, coef0=coef0, degree=degree, random_state=42)
            elif model_type == "linear_svc":
                C = trial.suggest_float("C", 1e-3, 1e2, log=True)
                model = LinearSVC(C=C, max_iter=10000, random_state=42)
            else:
                n_estimators = trial.suggest_int("n_estimators", 50, 300)
                max_depth = trial.suggest_int("max_depth", 2, 20)
                min_samples_split = trial.suggest_int("min_samples_split", 2, 5)
                min_samples_leaf = trial.suggest_int("min_samples_leaf", 1, 3)
                model = RandomForestClassifier(
                    n_estimators=n_estimators,
                    max_depth=max_depth,
                    min_samples_split=min_samples_split,
                    min_samples_leaf=min_samples_leaf,
                    random_state=42,
                )

            pipeline = make_pipeline(StandardScaler(), selector, model)
            cv = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=42)
            scores = cross_val_score(pipeline, X_train, y_train, cv=cv, scoring="f1_macro", n_jobs=1)
            return scores.mean()

        study = optuna.create_study(direction="maximize")
        study.optimize(objective_wrapper, n_trials=n_trials)
        best_params = study.best_params
        print(f"Best Params for {model_type} = {best_params}")

        # Build final pipeline
        feat_method = best_params["feat_method"]
        if feat_method == "anova":
            selector = SelectPercentile(f_classif, percentile=best_params["percentile"])
        elif feat_method == "pca":
            selector = PCA(n_components=best_params["pca_n_components"], random_state=42)
        else:
            selector = FastICA(n_components=best_params["ica_n_components"], random_state=42, max_iter=1000)

        if model_type == "svc":
            if best_params["kernel"] == "linear":
                model = SVC(kernel="linear", C=best_params["C"], random_state=42)
            else:
                model = SVC(
                    kernel=best_params["kernel"],
                    C=best_params["C"],
                    gamma=best_params.get("gamma", "scale"),
                    coef0=best_params.get("coef0", 0),
                    degree=best_params.get("degree", 3),
                    random_state=42,
                )
        elif model_type == "linear_svc":
            model = LinearSVC(C=best_params["C"], max_iter=10000, random_state=42)
        else:
            model = RandomForestClassifier(
                n_estimators=best_params["n_estimators"],
                max_depth=best_params["max_depth"],
                min_samples_split=best_params["min_samples_split"],
                min_samples_leaf=best_params["min_samples_leaf"],
                random_state=42,
            )

        final_pipeline = make_pipeline(StandardScaler(), selector, model)
        final_pipeline.fit(X_train, y_train)
        y_pred = final_pipeline.predict(X_test)

        acc = accuracy_score(y_test, y_pred)
        f1 = f1_score(y_test, y_pred, average="macro")
        print(f"FINAL RESULTS {model_type.upper()}: acc={acc:.3f}, f1={f1:.3f}")

        cm = confusion_matrix(y_test, y_pred)
        ConfusionMatrixDisplay(cm, display_labels=train_dataset.encoder.classes_).plot(cmap="Blues")
        plt.show()

        all_results[model_type] = {
            "pipeline": final_pipeline,
            "acc": acc,
            "f1": f1,
            "cm": cm,
            "best_params": best_params,
            "study": study,
        }

    return all_results


In [13]:
train_async_aal.eeg = np.zeros_like(train_async_aal.fmri)  # same shape as fmri or at least first dim
test_async_aal.eeg  = np.zeros_like(test_async_aal.fmri)


In [None]:
results = run_optuna_with_bimodal_dataset(
    train_async_aal, 
    test_async_aal, 
    n_trials=500)


[I 2025-12-11 10:58:21,397] A new study created in memory with name: no-name-6536ea35-aa8d-48f7-a531-9d1b02e7e4a2



ðŸ”¹ OPTUNA + 10-Fold CV for: SVC


[I 2025-12-11 10:58:21,578] Trial 0 finished with value: 0.13748015873015873 and parameters: {'feat_method': 'anova', 'percentile': 51, 'kernel': 'linear', 'C': 0.029234663897664038}. Best is trial 0 with value: 0.13748015873015873.
[I 2025-12-11 10:58:21,906] Trial 1 finished with value: 0.030007434774676156 and parameters: {'feat_method': 'anova', 'percentile': 79, 'kernel': 'poly', 'C': 0.06137347207941753, 'gamma': 0.002784914713937487, 'coef0': 0.2811739259665452, 'degree': 6}. Best is trial 0 with value: 0.13748015873015873.
[I 2025-12-11 10:58:22,106] Trial 2 finished with value: 0.08575514763014762 and parameters: {'feat_method': 'anova', 'percentile': 4, 'kernel': 'linear', 'C': 2.0887194490206}. Best is trial 0 with value: 0.13748015873015873.
[I 2025-12-11 10:58:25,192] Trial 3 finished with value: 0.0899226467976468 and parameters: {'feat_method': 'ica', 'ica_n_components': 52, 'kernel': 'sigmoid', 'C': 0.22928844465022913, 'gamma': 0.009890948889046168, 'coef0': 0.37179035

In [14]:
class ROI1DCNN(nn.Module):
    # Input is (B, L) where L=N_regions. We add C=1 channel dim.
    def __init__(self, in_features, num_classes):
        super().__init__()
        self.conv1 = nn.Conv1d(1, 16, kernel_size=3, padding=1)
        self.conv2 = nn.Conv1d(16, 32, kernel_size=3, padding=1)
        self.conv3 = nn.Conv1d(32, 64, kernel_size=3, padding=1)
        self.pool = nn.AdaptiveMaxPool1d(1)
        self.fc = nn.Linear(64, num_classes)
    
    def forward(self, x):
        # x: (batch, N_regions) -> adds channel dim (batch, 1, N_regions)
        x = x.unsqueeze(1)
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))
        x = F.relu(self.conv3(x))
        x = self.pool(x).squeeze(-1)
        x = self.fc(x)
        return x

class CNN1D(nn.Module):
    def __init__(self, n_regions, num_classes=8, dropout=0.3):
        super().__init__()
        # FIX: Change kernel_size to 1 since input length L is 1
        self.conv1 = nn.Conv1d(n_regions, 64, kernel_size=1) 
        self.conv2 = nn.Conv1d(64, 128, kernel_size=1) # Subsequent convolutions must also be 1
        self.pool = nn.AdaptiveMaxPool1d(1) # This is fine
        self.dropout = nn.Dropout(dropout)
        self.fc = nn.Linear(128, num_classes)
        
    def forward(self, x):
        # x: (batch, N_regions) -> reshape to (batch, N_regions, 1) to fit Conv1D structure
        # This treats N_regions as channels and sequence length as 1
        x = x.unsqueeze(-1)
        x = torch.relu(self.conv1(x))
        x = torch.relu(self.conv2(x))
        x = self.pool(x).squeeze(-1)
        x = self.dropout(x)
        return self.fc(x)

class MLP(nn.Module):
    # Input is (B, N_regions). Flattening is trivial.
    def __init__(self, input_size, num_classes=8, dropout=0.3):
        super().__init__()
        self.fc1 = nn.Linear(input_size, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, num_classes)
        self.dropout = nn.Dropout(dropout)
        
    def forward(self, x):
        x = x.view(x.size(0), -1)  # (B, N_regions)
        x = torch.relu(self.fc1(x))
        x = self.dropout(x)
        x = torch.relu(self.fc2(x))
        x = self.dropout(x)
        return self.fc3(x)

In [15]:
def train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs=1000, LR_DECAY_FACTOR=0.3, LR_PATIENCE=30, MAX_PATIENCE=60):
    """
    Trains the 3D ResNet-50 model with a custom learning rate scheduler 
    and early stopping based on validation loss.
    """
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)


    # Scheduler: Monitors validation loss and decreases LR when loss doesn't improve
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(
        optimizer, 
        mode='min', 
        factor=LR_DECAY_FACTOR, 
        patience=LR_PATIENCE, 
    )

    best_val_loss = float('inf')
    patience_counter = 0

    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        correct_train = 0
        total_train = 0

        for batch in train_loader:
            # Data from BimodalDataset: fMRI, EEG (placeholder), Label
            fmri = batch['fmri'].to(device) # Add Channel dim: (B, 1, D, H, W)
            labels = batch['label'].to(device)
            
            optimizer.zero_grad()
            outputs = model(fmri)
            
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item() * fmri.size(0)
            _, predicted = torch.max(outputs.data, 1)
            total_train += labels.size(0)
            correct_train += (predicted == labels).sum().item()

        train_loss = running_loss / len(train_loader.dataset)
        train_accuracy = correct_train / total_train
        
        # Validation Step
        val_loss, val_accuracy = evaluate_model(model, val_loader, criterion, device)
        
        print(f'Epoch {epoch+1}/{num_epochs} | Train Loss: {train_loss:.4f} | Train Acc: {train_accuracy:.4f} | Val Loss: {val_loss:.4f} | Val Acc: {val_accuracy:.4f}', end = "\r", flush = True)

        # Step LR Scheduler and Check for Early Stopping
        scheduler.step(val_loss)
        
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            patience_counter = 0
            # Optionally save the best model weights here
        else:
            patience_counter += 1
            if patience_counter >= MAX_PATIENCE:
                #print(f"Early stopping triggered after {patience_counter} epochs without improvement in validation loss.")
                break
                
    #print("Training finished.")
    return model

def evaluate_model(model, data_loader, criterion, device):
    """
    Evaluates the model's loss and accuracy on a given dataset.
    """
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():
        for batch in data_loader:
            fmri = batch['fmri'].to(device)
            labels = batch['label'].to(device)
            
            outputs = model(fmri)
            loss = criterion(outputs, labels)
            
            running_loss += loss.item() * fmri.size(0)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    avg_loss = running_loss / len(data_loader.dataset)
    accuracy = correct / total
    return avg_loss, accuracy

In [16]:
class ROIDataset(Dataset):
    """
    Final simplified Dataset: Assumes input 'rois' is the fully processed
    (scaled/PCA'd) NumPy array ready for tensor conversion.
    """
    def __init__(self, rois, labels):
        self.labels = labels
        # rois is now the FINAL (N_samples, N_features_final) NumPy array
        self.rois = rois

    def __len__(self):
        return len(self.rois)

    def __getitem__(self, idx):
        # x is the final feature vector, shape (N_features_final,)
        x = self.rois[idx].astype(np.float32)
        
        # Convert to tensor. No more scaling or PCA logic here.
        x = torch.tensor(x, dtype=torch.float32)
        y = torch.tensor(self.labels[idx], dtype=torch.long)
        
        return {"fmri": x, "label": y}

In [17]:

def get_model(model_name, num_classes, in_channels):
    # ... (Model initialization logic)
    if model_name == "ROI1DCNN":
        return ROI1DCNN(in_features = in_channels, num_classes=num_classes)
    elif model_name == "MLP":
        return MLP(input_size=in_channels, num_classes=num_classes)
    elif model_name == "1DCNN":
        return CNN1D(n_regions=in_channels, num_classes=num_classes)
    else:
        raise ValueError(f"Unknown model_name: {model_name}")

def evaluate_model(model, data_loader, criterion, device):
    """
    Evaluates the model's loss and accuracy on a given dataset.
    """
    # ... (Evaluation logic is unchanged and correct)
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():
        for batch in data_loader:
            fmri = batch['fmri'].to(device)
            labels = batch['label'].to(device)
            
            outputs = model(fmri)
            loss = criterion(outputs, labels)
            
            running_loss += loss.item() * fmri.size(0)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    avg_loss = running_loss / len(data_loader.dataset)
    accuracy = correct / total
    return avg_loss, accuracy


def train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs=1000, LR_DECAY_FACTOR=0.3, LR_PATIENCE=30, MAX_PATIENCE=60):
    """
    Trains the model with gradient clipping and early stopping.
    """
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)

    # Scheduler: Monitors validation loss and decreases LR
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(
        optimizer, mode='min', factor=LR_DECAY_FACTOR, patience=LR_PATIENCE
    )

    best_val_loss = float('inf')
    patience_counter = 0
    best_val_acc = 0.0

    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        correct_train = 0
        total_train = 0

        for batch in train_loader:
            fmri = batch['fmri'].to(device)
            labels = batch['label'].to(device)
            
            optimizer.zero_grad()
            outputs = model(fmri)
            
            loss = criterion(outputs, labels)
            
            # --- GRADIENT CLIPPING FIX FOR NAN LOSS ---
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            optimizer.step()
            # ------------------------------------------
            
            running_loss += loss.item() * fmri.size(0)
            _, predicted = torch.max(outputs.data, 1)
            total_train += labels.size(0)
            correct_train += (predicted == labels).sum().item()

        train_loss = running_loss / len(train_loader.dataset)
        train_accuracy = correct_train / total_train
        
        # Validation Step
        val_loss, val_accuracy = evaluate_model(model, val_loader, criterion, device)
        
        print(f'Epoch {epoch+1}/{num_epochs} | Train Loss: {train_loss:.4f} | Train Acc: {train_accuracy:.4f} | Val Loss: {val_loss:.4f} | Val Acc: {val_accuracy:.4f}', end = "\r", flush = True)

        # Step LR Scheduler and Check for Early Stopping
        scheduler.step(val_loss)
        
        if val_loss < best_val_loss or val_accuracy >= best_val_acc:
            best_val_loss = val_loss
            best_val_acc = val_accuracy
            patience_counter = 0
        else:
            patience_counter += 1
            if patience_counter >= MAX_PATIENCE:
                break
                
    return model

class ROI1DCNN(nn.Module):
    def __init__(self, in_features, num_classes, dropout_rate=0.3, n_filters_start=16):
        super().__init__()
        self.conv1 = nn.Conv1d(1, n_filters_start, kernel_size=3, padding=1)
        self.conv2 = nn.Conv1d(n_filters_start, n_filters_start * 2, kernel_size=3, padding=1)
        self.conv3 = nn.Conv1d(n_filters_start * 2, n_filters_start * 4, kernel_size=3, padding=1)
        self.pool = nn.AdaptiveMaxPool1d(1)
        self.dropout = nn.Dropout(dropout_rate) # New dropout layer
        self.fc = nn.Linear(n_filters_start * 4, num_classes)
    
    def forward(self, x):
        x = x.unsqueeze(1)
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))
        x = F.relu(self.conv3(x))
        x = self.pool(x).squeeze(-1)
        x = self.dropout(x) # Apply dropout after pooling
        x = self.fc(x)
        return x

class CNN1D(nn.Module):
    def __init__(self, n_regions, num_classes=8, dropout_rate=0.3, n_filters_start=64):
        super().__init__()
        # Kernel size 1 fixed for L=1 input
        self.conv1 = nn.Conv1d(n_regions, n_filters_start, kernel_size=1) 
        self.conv2 = nn.Conv1d(n_filters_start, n_filters_start * 2, kernel_size=1) 
        self.pool = nn.AdaptiveMaxPool1d(1)
        self.dropout = nn.Dropout(dropout_rate)
        self.fc = nn.Linear(n_filters_start * 2, num_classes)
        
    def forward(self, x):
        x = x.unsqueeze(-1)
        x = torch.relu(self.conv1(x))
        x = torch.relu(self.conv2(x))
        x = self.pool(x).squeeze(-1)
        x = self.dropout(x)
        return self.fc(x)

class MLP(nn.Module):
    def __init__(self, input_size, num_classes=8, dropout_rate=0.3, n_fc_units=128):
        super().__init__()
        self.fc1 = nn.Linear(input_size, n_fc_units)
        self.fc2 = nn.Linear(n_fc_units, n_fc_units // 2)
        self.fc3 = nn.Linear(n_fc_units // 2, num_classes)
        self.dropout = nn.Dropout(dropout_rate)
        
    def forward(self, x):
        x = x.view(x.size(0), -1) 
        x = torch.relu(self.fc1(x))
        x = self.dropout(x)
        x = torch.relu(self.fc2(x))
        x = self.dropout(x)
        return self.fc3(x)

def get_model(model_name, num_classes, in_channels, **kwargs):
    if model_name == "ROI1DCNN":
        return ROI1DCNN(in_features=in_channels, num_classes=num_classes, **kwargs)
    elif model_name == "MLP":
        return MLP(input_size=in_channels, num_classes=num_classes, **kwargs)
    elif model_name == "1DCNN":
        return CNN1D(n_regions=in_channels, num_classes=num_classes, **kwargs)
    else:
        raise ValueError(f"Unknown model_name: {model_name}")


class DummyDatasetContainer:
    def __init__(self, n_samples=100, n_regions=50, n_classes=8):
        self.fmri = np.random.randn(n_samples, n_regions).astype(np.float32)
        self.labels = np.random.randint(0, n_classes, n_samples)
        self.label_map = {i: f"class_{i}" for i in range(n_classes)}
    def inverse_labels(self, labels):
        return [self.label_map[l] for l in labels]



In [18]:
X_data_raw = train_async_aal.fmri 
y_cat = train_async_aal.inverse_labels(train_async_aal.labels)
le = LabelEncoder()
y_int = le.fit_transform(y_cat)
N_CLASSES = len(le.classes_)

# 2. Split ONCE using Stratification (FIXED)
TEST_SIZE_RATIO = 0.15 # Using your original 85/15 split ratio

# Use a fixed random_state (e.g., 42) to ensure the split is reproducible across entire Optuna runs.
# The 'stratify=y_int' argument ensures the class proportions are maintained in both splits.
X_TRAIN_RAW, X_VAL_RAW, Y_TRAIN, Y_VAL = train_test_split(
    X_data_raw, 
    y_int, 
    test_size=TEST_SIZE_RATIO, 
    random_state=42, # Ensure reproducibility
    stratify=y_int   # ENSURES STRATIFICATION
)

# 3. Fit StandardScaler ONCE
# Always fit the scaler only on the training data.
SCALER = StandardScaler().fit(X_TRAIN_RAW)
TRAIN_MEAN = SCALER.mean_
TRAIN_STD = np.sqrt(SCALER.var_)
INPUT_CHANNELS_RAW = X_TRAIN_RAW.shape[1]

print("--- Stratified Data Split Complete ---")
print(f"Total samples: {len(X_data_raw)}")
print(f"Training samples: {len(X_TRAIN_RAW)} ({100 - (TEST_SIZE_RATIO * 100)}%)")
print(f"Validation samples: {len(X_VAL_RAW)} ({TEST_SIZE_RATIO * 100}%)")
print(f"Input dimension (N_regions) fixed at: {INPUT_CHANNELS_RAW}")
print(f"N_CLASSES fixed at: {N_CLASSES}")

--- Stratified Data Split Complete ---
Total samples: 272
Training samples: 231 (85.0%)
Validation samples: 41 (15.0%)
Input dimension (N_regions) fixed at: 116
N_CLASSES fixed at: 8


In [19]:
# --- Analysis of Training Set ---
from collections import Counter
train_counts = Counter(Y_TRAIN)
total_train = len(Y_TRAIN)
train_data = []

for class_id in range(N_CLASSES):
    count = train_counts.get(class_id, 0)
    percentage = (count / total_train) * 100 if total_train > 0 else 0
    train_data.append({
        'Class ID': class_id,
        'Train Count': count,
        'Train Percentage': f'{percentage:.2f}%'
    })

df_train = pd.DataFrame(train_data).set_index('Class ID')

# --- Analysis of Validation Set ---
val_counts = Counter(Y_VAL)
total_val = len(Y_VAL)
val_data = []

for class_id in range(N_CLASSES):
    count = val_counts.get(class_id, 0)
    percentage = (count / total_val) * 100 if total_val > 0 else 0
    val_data.append({
        'Class ID': class_id,
        'Validation Count': count,
        'Validation Percentage': f'{percentage:.2f}%'
    })

df_val = pd.DataFrame(val_data).set_index('Class ID')

# --- Combine and Print Results ---
df_combined = df_train.join(df_val)

print("--- Class Distribution (Train vs. Validation) ---")
print(df_combined)
print("-" * 50)
print(f"Total Samples in Training Set: {total_train}")
print(f"Total Samples in Validation Set: {total_val}")

--- Class Distribution (Train vs. Validation) ---
          Train Count Train Percentage  Validation Count Validation Percentage
Class ID                                                                      
0                  29           12.55%                 5                12.20%
1                  29           12.55%                 5                12.20%
2                  29           12.55%                 5                12.20%
3                  29           12.55%                 5                12.20%
4                  29           12.55%                 5                12.20%
5                  29           12.55%                 5                12.20%
6                  28           12.12%                 6                14.63%
7                  29           12.55%                 5                12.20%
--------------------------------------------------
Total Samples in Training Set: 231
Total Samples in Validation Set: 41


In [20]:
def objective(trial):
    # --- 1. General Hyperparameters ---
    
    batch_size = trial.suggest_categorical("batch_size", [4, 8, 16, 32, 64])
    learning_rate = trial.suggest_loguniform("learning_rate", 1e-6, 1e-3) 
    weight_decay = trial.suggest_loguniform("weight_decay", 1e-7, 1e-3)
    dropout_rate = trial.suggest_uniform("dropout_rate", 0.1, 0.7)
    lr_decay_factor = trial.suggest_uniform("lr_decay_factor", 0.1, 0.9)
    lr_patience = trial.suggest_int("lr_patience", 5, 50)
    max_patience = trial.suggest_int("max_patience", 20, 100)
    model_type = trial.suggest_categorical("model_type", ["ROI1DCNN", "MLP", "1DCNN"])
    
    # --- 2. Model-Specific Hyperparameters ---
    
    model_kwargs = {"dropout_rate": dropout_rate}
    if model_type == "ROI1DCNN":
        model_kwargs["n_filters_start"] = trial.suggest_categorical("roi_n_filters_start", [8, 16, 32, 64, 128])
    elif model_type == "1DCNN":
        model_kwargs["n_filters_start"] = trial.suggest_categorical("cnn_n_filters_start", [16, 32, 64, 128])
    elif model_type == "MLP":
        model_kwargs["n_fc_units"] = trial.suggest_categorical("mlp_n_fc_units", [16, 32, 64, 128, 256])
        
    # --- 3. Feature Engineering (Standardize -> PCA) ---
    
    # Apply Standardization using the globally fitted mean/std
    X_train_scaled = (X_TRAIN_RAW - TRAIN_MEAN) / (TRAIN_STD + 1e-8)
    X_val_scaled = (X_VAL_RAW - TRAIN_MEAN) / (TRAIN_STD + 1e-8)
    
    # Set final data arrays to the scaled data initially
    X_train_final = X_train_scaled
    X_val_final = X_val_scaled
    in_ch = INPUT_CHANNELS_RAW
    pca = None

    use_pca = trial.suggest_categorical("use_pca", [False, True])
    
    if use_pca:
        # PCA Fit (on scaled data)
        # Note: We can suggest up to the number of original features
        n_components = trial.suggest_int("n_components", 2, INPUT_CHANNELS_RAW)
        pca = PCA(n_components=n_components).fit(X_train_scaled)
        
        # Transform
        X_train_final = pca.transform(X_train_scaled)
        X_val_final = pca.transform(X_val_scaled)
        
        # Update Model Input size
        in_ch = pca.n_components_
    
    # --- 4. Data Loaders ---
    
    # ROIDataset now receives the FINAL processed data.
    train_loader = DataLoader(
        ROIDataset(X_train_final, Y_TRAIN),
        batch_size=batch_size, shuffle=True
    )
    val_loader = DataLoader(
        ROIDataset(X_val_final, Y_VAL),
        batch_size=batch_size, shuffle=False
    )
    
    # --- 5. Model Training and Evaluation ---
    
    num_classes = N_CLASSES # Use the globally defined number of classes
    
    model = get_model(model_type, num_classes=num_classes, in_channels=in_ch, **model_kwargs)

    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)

    # Train & evaluate 
    trained_model = train_model(model, train_loader, val_loader, criterion, optimizer,
                                num_epochs=1000, LR_DECAY_FACTOR=lr_decay_factor,
                                LR_PATIENCE=lr_patience, MAX_PATIENCE=max_patience)

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    val_loss, val_acc = evaluate_model(trained_model, val_loader, criterion, device)

    return val_loss



In [None]:
study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=500)

[I 2025-12-11 21:51:50,202] A new study created in memory with name: no-name-41290562-3cf9-4452-85db-b179a59f6bc3


Epoch 1000/1000 | Train Loss: 2.0760 | Train Acc: 0.1515 | Val Loss: 2.0848 | Val Acc: 0.1463

[I 2025-12-11 21:53:54,348] Trial 0 finished with value: 2.084769539716767 and parameters: {'batch_size': 8, 'learning_rate': 1.3225391290702208e-05, 'weight_decay': 1.3190622108677502e-05, 'dropout_rate': 0.5774054896007377, 'lr_decay_factor': 0.4965300303078065, 'lr_patience': 35, 'max_patience': 88, 'model_type': 'MLP', 'mlp_n_fc_units': 16, 'use_pca': False}. Best is trial 0 with value: 2.084769539716767.


Epoch 80/1000 | Train Loss: 2.0604 | Train Acc: 0.1645 | Val Loss: 2.0911 | Val Acc: 0.1220

[I 2025-12-11 21:54:06,762] Trial 1 finished with value: 2.0911407121797887 and parameters: {'batch_size': 8, 'learning_rate': 1.058926900230194e-06, 'weight_decay': 0.0003833007230473852, 'dropout_rate': 0.27462136722194147, 'lr_decay_factor': 0.5132111420159794, 'lr_patience': 35, 'max_patience': 75, 'model_type': 'ROI1DCNN', 'roi_n_filters_start': 64, 'use_pca': True, 'n_components': 25}. Best is trial 0 with value: 2.084769539716767.


Epoch 89/1000 | Train Loss: 1.9684 | Train Acc: 0.2641 | Val Loss: 2.1425 | Val Acc: 0.2195

[I 2025-12-11 21:54:13,140] Trial 2 finished with value: 2.142548002847811 and parameters: {'batch_size': 16, 'learning_rate': 0.00019222708282375103, 'weight_decay': 4.6830946190038084e-06, 'dropout_rate': 0.5429523527834272, 'lr_decay_factor': 0.10804375695359987, 'lr_patience': 31, 'max_patience': 77, 'model_type': '1DCNN', 'cnn_n_filters_start': 32, 'use_pca': False}. Best is trial 0 with value: 2.084769539716767.


Epoch 1000/1000 | Train Loss: 2.0715 | Train Acc: 0.1385 | Val Loss: 2.0999 | Val Acc: 0.0732

[I 2025-12-11 21:55:07,731] Trial 3 finished with value: 2.0998519629966923 and parameters: {'batch_size': 32, 'learning_rate': 1.0398890564437236e-05, 'weight_decay': 3.554206052867983e-06, 'dropout_rate': 0.4163086267905328, 'lr_decay_factor': 0.22150236638555, 'lr_patience': 43, 'max_patience': 79, 'model_type': 'ROI1DCNN', 'roi_n_filters_start': 32, 'use_pca': True, 'n_components': 22}. Best is trial 0 with value: 2.084769539716767.


Epoch 1000/1000 | Train Loss: 0.5562 | Train Acc: 0.6840 | Val Loss: 5.2247 | Val Acc: 0.1220

[I 2025-12-11 21:57:20,866] Trial 4 finished with value: 5.224745389891834 and parameters: {'batch_size': 8, 'learning_rate': 0.0003641802761074979, 'weight_decay': 2.577564365959445e-07, 'dropout_rate': 0.2246973541493988, 'lr_decay_factor': 0.5297152828941164, 'lr_patience': 36, 'max_patience': 61, 'model_type': '1DCNN', 'cnn_n_filters_start': 64, 'use_pca': True, 'n_components': 63}. Best is trial 0 with value: 2.084769539716767.


Epoch 256/1000 | Train Loss: 1.8880 | Train Acc: 0.3636 | Val Loss: 2.1724 | Val Acc: 0.1220

[I 2025-12-11 21:57:39,472] Trial 5 finished with value: 2.17238039505191 and parameters: {'batch_size': 16, 'learning_rate': 8.501368773702027e-05, 'weight_decay': 0.0002668398036877116, 'dropout_rate': 0.6136015869733055, 'lr_decay_factor': 0.8757635361573199, 'lr_patience': 34, 'max_patience': 78, 'model_type': '1DCNN', 'cnn_n_filters_start': 32, 'use_pca': True, 'n_components': 43}. Best is trial 0 with value: 2.084769539716767.


Epoch 1000/1000 | Train Loss: 2.0842 | Train Acc: 0.1082 | Val Loss: 2.0773 | Val Acc: 0.1707

[I 2025-12-11 22:02:39,663] Trial 6 finished with value: 2.0772791548473077 and parameters: {'batch_size': 4, 'learning_rate': 1.6213091870462766e-06, 'weight_decay': 0.000597856699338558, 'dropout_rate': 0.3658191378815091, 'lr_decay_factor': 0.34847612341704615, 'lr_patience': 45, 'max_patience': 33, 'model_type': 'ROI1DCNN', 'roi_n_filters_start': 16, 'use_pca': False}. Best is trial 6 with value: 2.0772791548473077.


Epoch 1000/1000 | Train Loss: 2.0736 | Train Acc: 0.1429 | Val Loss: 2.0867 | Val Acc: 0.1220

[I 2025-12-11 22:03:18,123] Trial 7 finished with value: 2.086724455763654 and parameters: {'batch_size': 32, 'learning_rate': 1.407639530825737e-05, 'weight_decay': 3.1082313410171903e-06, 'dropout_rate': 0.5297321198917267, 'lr_decay_factor': 0.5195216549869008, 'lr_patience': 12, 'max_patience': 64, 'model_type': '1DCNN', 'cnn_n_filters_start': 16, 'use_pca': False}. Best is trial 6 with value: 2.0772791548473077.


Epoch 1000/1000 | Train Loss: 0.5071 | Train Acc: 0.6623 | Val Loss: 4.6865 | Val Acc: 0.1707

[I 2025-12-11 22:04:21,999] Trial 8 finished with value: 4.686517006013451 and parameters: {'batch_size': 16, 'learning_rate': 0.00030251229777611936, 'weight_decay': 0.0001956750434411149, 'dropout_rate': 0.1606497719106886, 'lr_decay_factor': 0.44551628668209065, 'lr_patience': 49, 'max_patience': 80, 'model_type': '1DCNN', 'cnn_n_filters_start': 128, 'use_pca': False}. Best is trial 6 with value: 2.0772791548473077.


Epoch 1000/1000 | Train Loss: 1.1867 | Train Acc: 0.5758 | Val Loss: 2.9504 | Val Acc: 0.1463

[I 2025-12-11 22:05:32,278] Trial 9 finished with value: 2.9503907459538157 and parameters: {'batch_size': 16, 'learning_rate': 6.047103653877727e-05, 'weight_decay': 8.53229827794501e-05, 'dropout_rate': 0.2905674525942914, 'lr_decay_factor': 0.5583027624824298, 'lr_patience': 39, 'max_patience': 40, 'model_type': 'ROI1DCNN', 'roi_n_filters_start': 128, 'use_pca': True, 'n_components': 72}. Best is trial 6 with value: 2.0772791548473077.


Epoch 1000/1000 | Train Loss: 2.1032 | Train Acc: 0.1472 | Val Loss: 2.0827 | Val Acc: 0.0732

[I 2025-12-11 22:08:54,185] Trial 10 finished with value: 2.082659052639473 and parameters: {'batch_size': 4, 'learning_rate': 1.360634449214989e-06, 'weight_decay': 0.000865812400337987, 'dropout_rate': 0.39392082773237586, 'lr_decay_factor': 0.3180487199375168, 'lr_patience': 17, 'max_patience': 22, 'model_type': 'MLP', 'mlp_n_fc_units': 128, 'use_pca': False}. Best is trial 6 with value: 2.0772791548473077.


Epoch 1000/1000 | Train Loss: 2.0835 | Train Acc: 0.1385 | Val Loss: 2.1112 | Val Acc: 0.1463

[I 2025-12-11 22:12:12,278] Trial 11 finished with value: 2.1111999139553164 and parameters: {'batch_size': 4, 'learning_rate': 1.1823735267093768e-06, 'weight_decay': 0.0009298099058307266, 'dropout_rate': 0.4025437874002135, 'lr_decay_factor': 0.3196302746099355, 'lr_patience': 18, 'max_patience': 21, 'model_type': 'MLP', 'mlp_n_fc_units': 128, 'use_pca': False}. Best is trial 6 with value: 2.0772791548473077.


Epoch 1000/1000 | Train Loss: 2.0768 | Train Acc: 0.1212 | Val Loss: 2.0880 | Val Acc: 0.1951

[I 2025-12-11 22:15:30,788] Trial 12 finished with value: 2.0880125499353177 and parameters: {'batch_size': 4, 'learning_rate': 3.243076640810157e-06, 'weight_decay': 4.094761713053124e-05, 'dropout_rate': 0.3606957035552464, 'lr_decay_factor': 0.3270516497666379, 'lr_patience': 22, 'max_patience': 20, 'model_type': 'MLP', 'mlp_n_fc_units': 128, 'use_pca': False}. Best is trial 6 with value: 2.0772791548473077.


Epoch 1000/1000 | Train Loss: 2.0977 | Train Acc: 0.1039 | Val Loss: 2.0751 | Val Acc: 0.1463

[I 2025-12-11 22:15:52,430] Trial 13 finished with value: 2.0750961303710938 and parameters: {'batch_size': 64, 'learning_rate': 3.852489180727263e-06, 'weight_decay': 0.0008913514217222117, 'dropout_rate': 0.4662255213425802, 'lr_decay_factor': 0.6722840239941166, 'lr_patience': 7, 'max_patience': 36, 'model_type': 'MLP', 'mlp_n_fc_units': 256, 'use_pca': False}. Best is trial 13 with value: 2.0750961303710938.


Epoch 1000/1000 | Train Loss: 2.0818 | Train Acc: 0.1299 | Val Loss: 2.0792 | Val Acc: 0.1707

[I 2025-12-11 22:16:21,237] Trial 14 finished with value: 2.079191207885742 and parameters: {'batch_size': 64, 'learning_rate': 3.995392868898899e-06, 'weight_decay': 5.03600541208108e-05, 'dropout_rate': 0.4758413100373214, 'lr_decay_factor': 0.7089233619392589, 'lr_patience': 5, 'max_patience': 40, 'model_type': 'ROI1DCNN', 'roi_n_filters_start': 16, 'use_pca': False}. Best is trial 13 with value: 2.0750961303710938.


Epoch 1000/1000 | Train Loss: 2.1258 | Train Acc: 0.1126 | Val Loss: 2.0802 | Val Acc: 0.2195

[I 2025-12-11 22:16:42,792] Trial 15 finished with value: 2.0801587104797363 and parameters: {'batch_size': 64, 'learning_rate': 4.171131839450803e-06, 'weight_decay': 4.4473063852874705e-07, 'dropout_rate': 0.6938320828912303, 'lr_decay_factor': 0.6703520199682383, 'lr_patience': 27, 'max_patience': 39, 'model_type': 'MLP', 'mlp_n_fc_units': 256, 'use_pca': False}. Best is trial 13 with value: 2.0750961303710938.


Epoch 127/1000 | Train Loss: 1.4754 | Train Acc: 0.4848 | Val Loss: 2.5744 | Val Acc: 0.0976

[I 2025-12-11 22:16:46,528] Trial 16 finished with value: 2.5744030475616455 and parameters: {'batch_size': 64, 'learning_rate': 0.0009624039605021209, 'weight_decay': 1.695222844636561e-05, 'dropout_rate': 0.3232585913214464, 'lr_decay_factor': 0.7741478321820809, 'lr_patience': 50, 'max_patience': 49, 'model_type': 'ROI1DCNN', 'roi_n_filters_start': 16, 'use_pca': False}. Best is trial 13 with value: 2.0750961303710938.


Epoch 1000/1000 | Train Loss: 2.0819 | Train Acc: 0.1126 | Val Loss: 2.0706 | Val Acc: 0.1707

[I 2025-12-11 22:20:03,028] Trial 17 finished with value: 2.0705924324873015 and parameters: {'batch_size': 4, 'learning_rate': 2.5790876492928337e-06, 'weight_decay': 0.00012872441203657222, 'dropout_rate': 0.47382462212579046, 'lr_decay_factor': 0.41697914851502504, 'lr_patience': 5, 'max_patience': 30, 'model_type': 'MLP', 'mlp_n_fc_units': 256, 'use_pca': False}. Best is trial 17 with value: 2.0705924324873015.


Epoch 57/1000 | Train Loss: 2.0594 | Train Acc: 0.1861 | Val Loss: 2.0809 | Val Acc: 0.0732

[I 2025-12-11 22:20:04,340] Trial 18 finished with value: 2.0808823108673096 and parameters: {'batch_size': 64, 'learning_rate': 2.7511897743259826e-05, 'weight_decay': 0.00012830684222522255, 'dropout_rate': 0.4692756048604667, 'lr_decay_factor': 0.6545431870628201, 'lr_patience': 5, 'max_patience': 54, 'model_type': 'MLP', 'mlp_n_fc_units': 256, 'use_pca': False}. Best is trial 17 with value: 2.0705924324873015.


Epoch 79/1000 | Train Loss: 2.1101 | Train Acc: 0.1212 | Val Loss: 2.0874 | Val Acc: 0.0976

[I 2025-12-11 22:20:19,815] Trial 19 finished with value: 2.0873768620374724 and parameters: {'batch_size': 4, 'learning_rate': 8.178793862022142e-06, 'weight_decay': 3.315647014351398e-05, 'dropout_rate': 0.65133461204248, 'lr_decay_factor': 0.8869132155986137, 'lr_patience': 11, 'max_patience': 29, 'model_type': 'MLP', 'mlp_n_fc_units': 64, 'use_pca': False}. Best is trial 17 with value: 2.0705924324873015.


Epoch 1000/1000 | Train Loss: 2.0978 | Train Acc: 0.1169 | Val Loss: 2.0861 | Val Acc: 0.1220

[I 2025-12-11 22:20:41,137] Trial 20 finished with value: 2.0861129760742188 and parameters: {'batch_size': 64, 'learning_rate': 2.7982482778832464e-06, 'weight_decay': 8.142165542242897e-07, 'dropout_rate': 0.45868645999962165, 'lr_decay_factor': 0.40431120410440113, 'lr_patience': 10, 'max_patience': 48, 'model_type': 'MLP', 'mlp_n_fc_units': 32, 'use_pca': False}. Best is trial 17 with value: 2.0705924324873015.


Epoch 1000/1000 | Train Loss: 2.0744 | Train Acc: 0.1299 | Val Loss: 2.0877 | Val Acc: 0.1463

[I 2025-12-11 22:25:43,768] Trial 21 finished with value: 2.0876598852436716 and parameters: {'batch_size': 4, 'learning_rate': 2.294998341743475e-06, 'weight_decay': 0.0004160814591869095, 'dropout_rate': 0.5112113315432991, 'lr_decay_factor': 0.22352823928911508, 'lr_patience': 26, 'max_patience': 31, 'model_type': 'ROI1DCNN', 'roi_n_filters_start': 8, 'use_pca': False}. Best is trial 17 with value: 2.0705924324873015.


Epoch 94/1000 | Train Loss: 2.0754 | Train Acc: 0.1342 | Val Loss: 2.0542 | Val Acc: 0.1951

[I 2025-12-11 22:26:07,658] Trial 22 finished with value: 2.054228852434856 and parameters: {'batch_size': 4, 'learning_rate': 5.718377265540766e-06, 'weight_decay': 0.000960325743686248, 'dropout_rate': 0.3332401875365453, 'lr_decay_factor': 0.6046018524795926, 'lr_patience': 16, 'max_patience': 31, 'model_type': 'MLP', 'mlp_n_fc_units': 256, 'use_pca': False}. Best is trial 22 with value: 2.054228852434856.


Epoch 1000/1000 | Train Loss: 1.9838 | Train Acc: 0.3766 | Val Loss: 2.1045 | Val Acc: 0.1463

[I 2025-12-11 22:30:26,156] Trial 23 finished with value: 2.1044543138364467 and parameters: {'batch_size': 4, 'learning_rate': 6.574236425093657e-06, 'weight_decay': 0.0001413309061302247, 'dropout_rate': 0.10569036427286305, 'lr_decay_factor': 0.6088959479857214, 'lr_patience': 15, 'max_patience': 29, 'model_type': 'MLP', 'mlp_n_fc_units': 256, 'use_pca': False}. Best is trial 22 with value: 2.054228852434856.


Epoch 1000/1000 | Train Loss: 1.9847 | Train Acc: 0.3377 | Val Loss: 2.0769 | Val Acc: 0.1220

[I 2025-12-11 22:34:51,474] Trial 24 finished with value: 2.0768750935066036 and parameters: {'batch_size': 4, 'learning_rate': 1.8651159089930162e-05, 'weight_decay': 0.00029012865281115893, 'dropout_rate': 0.23237665106374564, 'lr_decay_factor': 0.7158322080412881, 'lr_patience': 8, 'max_patience': 46, 'model_type': 'MLP', 'mlp_n_fc_units': 256, 'use_pca': False}. Best is trial 22 with value: 2.054228852434856.


Epoch 1000/1000 | Train Loss: 2.0576 | Train Acc: 0.1299 | Val Loss: 2.0929 | Val Acc: 0.1463

[I 2025-12-11 22:35:41,898] Trial 25 finished with value: 2.092949431116988 and parameters: {'batch_size': 32, 'learning_rate': 5.555197789938541e-06, 'weight_decay': 0.0009951341849973089, 'dropout_rate': 0.4370496951030009, 'lr_decay_factor': 0.7945100051731104, 'lr_patience': 21, 'max_patience': 36, 'model_type': 'MLP', 'mlp_n_fc_units': 256, 'use_pca': False}. Best is trial 22 with value: 2.054228852434856.


Epoch 1000/1000 | Train Loss: 2.0734 | Train Acc: 0.1299 | Val Loss: 2.0883 | Val Acc: 0.0976

[I 2025-12-11 22:36:08,626] Trial 26 finished with value: 2.0882527828216553 and parameters: {'batch_size': 64, 'learning_rate': 2.1149662924374635e-06, 'weight_decay': 8.071696650733322e-05, 'dropout_rate': 0.33769972562017736, 'lr_decay_factor': 0.6001845517342281, 'lr_patience': 13, 'max_patience': 26, 'model_type': 'MLP', 'mlp_n_fc_units': 256, 'use_pca': True, 'n_components': 113}. Best is trial 22 with value: 2.054228852434856.


Epoch 1000/1000 | Train Loss: 2.0780 | Train Acc: 0.1602 | Val Loss: 2.0991 | Val Acc: 0.1220

[I 2025-12-11 22:43:55,467] Trial 27 finished with value: 2.0990989208221436 and parameters: {'batch_size': 4, 'learning_rate': 2.0713352725648663e-05, 'weight_decay': 0.0004075854657386073, 'dropout_rate': 0.5001681838931002, 'lr_decay_factor': 0.4452751111612899, 'lr_patience': 8, 'max_patience': 68, 'model_type': 'MLP', 'mlp_n_fc_units': 256, 'use_pca': False}. Best is trial 22 with value: 2.054228852434856.


Epoch 131/1000 | Train Loss: 2.0566 | Train Acc: 0.1688 | Val Loss: 2.0849 | Val Acc: 0.1463

[I 2025-12-11 22:44:42,811] Trial 28 finished with value: 2.0848670878061433 and parameters: {'batch_size': 4, 'learning_rate': 5.5075190243655794e-05, 'weight_decay': 0.0002003232640907942, 'dropout_rate': 0.5732523695192809, 'lr_decay_factor': 0.6102301365849061, 'lr_patience': 8, 'max_patience': 100, 'model_type': 'MLP', 'mlp_n_fc_units': 64, 'use_pca': False}. Best is trial 22 with value: 2.054228852434856.


Epoch 1000/1000 | Train Loss: 2.1062 | Train Acc: 0.0996 | Val Loss: 2.0950 | Val Acc: 0.1220

[I 2025-12-11 22:48:02,348] Trial 29 finished with value: 2.094997720020573 and parameters: {'batch_size': 8, 'learning_rate': 5.302412338325724e-06, 'weight_decay': 1.6564454304740534e-05, 'dropout_rate': 0.5525418643311314, 'lr_decay_factor': 0.7839308672514573, 'lr_patience': 20, 'max_patience': 45, 'model_type': 'MLP', 'mlp_n_fc_units': 16, 'use_pca': False}. Best is trial 22 with value: 2.054228852434856.


Epoch 1000/1000 | Train Loss: 2.1057 | Train Acc: 0.1039 | Val Loss: 2.0963 | Val Acc: 0.1220

[I 2025-12-11 22:48:43,894] Trial 30 finished with value: 2.096273183822632 and parameters: {'batch_size': 64, 'learning_rate': 1.0140122963866422e-05, 'weight_decay': 0.0005743348573924996, 'dropout_rate': 0.6142816196446085, 'lr_decay_factor': 0.44514609845559366, 'lr_patience': 15, 'max_patience': 26, 'model_type': 'MLP', 'mlp_n_fc_units': 32, 'use_pca': False}. Best is trial 22 with value: 2.054228852434856.


Epoch 69/1000 | Train Loss: 1.9971 | Train Acc: 0.3247 | Val Loss: 2.1026 | Val Acc: 0.1220

[I 2025-12-11 22:49:16,306] Trial 31 finished with value: 2.1025942302331693 and parameters: {'batch_size': 4, 'learning_rate': 1.915247580616157e-05, 'weight_decay': 0.0002926789135283355, 'dropout_rate': 0.24239681482970826, 'lr_decay_factor': 0.7277608822640884, 'lr_patience': 8, 'max_patience': 53, 'model_type': 'MLP', 'mlp_n_fc_units': 256, 'use_pca': False}. Best is trial 22 with value: 2.054228852434856.


Epoch 1000/1000 | Train Loss: 2.0230 | Train Acc: 0.2554 | Val Loss: 2.0899 | Val Acc: 0.2195

[I 2025-12-11 22:57:11,011] Trial 32 finished with value: 2.089931709010427 and parameters: {'batch_size': 4, 'learning_rate': 1.4621915393569575e-05, 'weight_decay': 0.0005289991368067277, 'dropout_rate': 0.20622413948635915, 'lr_decay_factor': 0.6721706725522791, 'lr_patience': 5, 'max_patience': 42, 'model_type': 'MLP', 'mlp_n_fc_units': 256, 'use_pca': False}. Best is trial 22 with value: 2.054228852434856.


Epoch 1000/1000 | Train Loss: 1.8686 | Train Acc: 0.4892 | Val Loss: 2.1097 | Val Acc: 0.1951

[I 2025-12-11 23:06:23,298] Trial 33 finished with value: 2.109693213206966 and parameters: {'batch_size': 4, 'learning_rate': 3.451542875453364e-05, 'weight_decay': 9.640391532376866e-05, 'dropout_rate': 0.28159300853259056, 'lr_decay_factor': 0.7310693962289193, 'lr_patience': 9, 'max_patience': 35, 'model_type': 'MLP', 'mlp_n_fc_units': 256, 'use_pca': False}. Best is trial 22 with value: 2.054228852434856.


Epoch 82/1000 | Train Loss: 2.0349 | Train Acc: 0.2251 | Val Loss: 2.1025 | Val Acc: 0.0488

[I 2025-12-11 23:06:48,364] Trial 34 finished with value: 2.1025444472708354 and parameters: {'batch_size': 8, 'learning_rate': 7.773130376512952e-06, 'weight_decay': 6.918149956423188e-06, 'dropout_rate': 0.1839109526984177, 'lr_decay_factor': 0.8305967978175591, 'lr_patience': 14, 'max_patience': 45, 'model_type': 'MLP', 'mlp_n_fc_units': 256, 'use_pca': True, 'n_components': 114}. Best is trial 22 with value: 2.054228852434856.


Epoch 1000/1000 | Train Loss: 2.0952 | Train Acc: 0.1126 | Val Loss: 2.0909 | Val Acc: 0.1220

[I 2025-12-11 23:08:34,104] Trial 35 finished with value: 2.0909238908349015 and parameters: {'batch_size': 32, 'learning_rate': 1.8255176430650632e-06, 'weight_decay': 0.00024368552223963825, 'dropout_rate': 0.3069107116595323, 'lr_decay_factor': 0.5569793610933377, 'lr_patience': 6, 'max_patience': 57, 'model_type': 'MLP', 'mlp_n_fc_units': 256, 'use_pca': False}. Best is trial 22 with value: 2.054228852434856.


Epoch 1000/1000 | Train Loss: 2.1584 | Train Acc: 0.1429 | Val Loss: 2.0738 | Val Acc: 0.1707

[I 2025-12-11 23:17:04,692] Trial 36 finished with value: 2.073775785725291 and parameters: {'batch_size': 4, 'learning_rate': 1.0125264306352175e-06, 'weight_decay': 1.1405410552790383e-07, 'dropout_rate': 0.2549777466069584, 'lr_decay_factor': 0.48739373147356996, 'lr_patience': 24, 'max_patience': 36, 'model_type': 'MLP', 'mlp_n_fc_units': 256, 'use_pca': True, 'n_components': 2}. Best is trial 22 with value: 2.054228852434856.


Epoch 1000/1000 | Train Loss: 2.0776 | Train Acc: 0.1645 | Val Loss: 2.0984 | Val Acc: 0.0976

[I 2025-12-11 23:21:27,738] Trial 37 finished with value: 2.098425946584562 and parameters: {'batch_size': 8, 'learning_rate': 1.1305572010089856e-06, 'weight_decay': 1.4735673693596533e-07, 'dropout_rate': 0.371410949671939, 'lr_decay_factor': 0.4739747481883791, 'lr_patience': 23, 'max_patience': 26, 'model_type': '1DCNN', 'cnn_n_filters_start': 64, 'use_pca': True, 'n_components': 3}. Best is trial 22 with value: 2.054228852434856.


Epoch 1000/1000 | Train Loss: 2.0650 | Train Acc: 0.1385 | Val Loss: 2.0670 | Val Acc: 0.2195

[I 2025-12-11 23:28:48,798] Trial 38 finished with value: 2.067019538181584 and parameters: {'batch_size': 4, 'learning_rate': 2.439627445686571e-06, 'weight_decay': 1.4401423073502215e-06, 'dropout_rate': 0.24907714420004637, 'lr_decay_factor': 0.37077819413303426, 'lr_patience': 25, 'max_patience': 35, 'model_type': 'MLP', 'mlp_n_fc_units': 256, 'use_pca': True, 'n_components': 82}. Best is trial 22 with value: 2.054228852434856.


Epoch 1000/1000 | Train Loss: 2.0957 | Train Acc: 0.1429 | Val Loss: 2.0898 | Val Acc: 0.1220

[I 2025-12-11 23:37:26,432] Trial 39 finished with value: 2.0897786210222944 and parameters: {'batch_size': 4, 'learning_rate': 1.0065375087817158e-06, 'weight_decay': 1.1618809484005799e-06, 'dropout_rate': 0.25570555248279814, 'lr_decay_factor': 0.3727701776091892, 'lr_patience': 30, 'max_patience': 34, 'model_type': '1DCNN', 'cnn_n_filters_start': 16, 'use_pca': True, 'n_components': 88}. Best is trial 22 with value: 2.054228852434856.


Epoch 1000/1000 | Train Loss: 2.0894 | Train Acc: 0.1385 | Val Loss: 2.0807 | Val Acc: 0.1707

[I 2025-12-11 23:45:05,664] Trial 40 finished with value: 2.0806728456078507 and parameters: {'batch_size': 4, 'learning_rate': 1.7801288813315735e-06, 'weight_decay': 1.3596795138431986e-06, 'dropout_rate': 0.14596641873011557, 'lr_decay_factor': 0.2629563140953837, 'lr_patience': 25, 'max_patience': 30, 'model_type': 'MLP', 'mlp_n_fc_units': 16, 'use_pca': True, 'n_components': 86}. Best is trial 22 with value: 2.054228852434856.


Epoch 1000/1000 | Train Loss: 2.0709 | Train Acc: 0.1255 | Val Loss: 2.1132 | Val Acc: 0.1463

[I 2025-12-11 23:47:37,985] Trial 41 finished with value: 2.113229850443398 and parameters: {'batch_size': 16, 'learning_rate': 3.0420576221520736e-06, 'weight_decay': 1.1873532111405244e-07, 'dropout_rate': 0.4322616970878578, 'lr_decay_factor': 0.49458404188733407, 'lr_patience': 30, 'max_patience': 37, 'model_type': 'MLP', 'mlp_n_fc_units': 256, 'use_pca': True, 'n_components': 40}. Best is trial 22 with value: 2.054228852434856.


Epoch 1000/1000 | Train Loss: 2.0472 | Train Acc: 0.1688 | Val Loss: 2.0637 | Val Acc: 0.2439

[I 2025-12-11 23:56:10,024] Trial 42 finished with value: 2.063654597212629 and parameters: {'batch_size': 4, 'learning_rate': 4.252541312931439e-06, 'weight_decay': 2.4315049235248315e-06, 'dropout_rate': 0.33958233099222346, 'lr_decay_factor': 0.39808120888491916, 'lr_patience': 19, 'max_patience': 25, 'model_type': 'MLP', 'mlp_n_fc_units': 256, 'use_pca': True, 'n_components': 85}. Best is trial 22 with value: 2.054228852434856.


Epoch 1000/1000 | Train Loss: 2.0523 | Train Acc: 0.1948 | Val Loss: 2.1035 | Val Acc: 0.1220

[I 2025-12-12 00:04:15,468] Trial 43 finished with value: 2.103532174738442 and parameters: {'batch_size': 4, 'learning_rate': 2.414901322902848e-06, 'weight_decay': 2.449409915668302e-06, 'dropout_rate': 0.33740512107293513, 'lr_decay_factor': 0.13236011169363482, 'lr_patience': 18, 'max_patience': 25, 'model_type': 'MLP', 'mlp_n_fc_units': 256, 'use_pca': True, 'n_components': 89}. Best is trial 22 with value: 2.054228852434856.


Epoch 1000/1000 | Train Loss: 2.0761 | Train Acc: 0.1602 | Val Loss: 2.0674 | Val Acc: 0.2195

[I 2025-12-12 00:12:47,810] Trial 44 finished with value: 2.067440253932302 and parameters: {'batch_size': 4, 'learning_rate': 1.4876126128310077e-06, 'weight_decay': 3.837804014745002e-07, 'dropout_rate': 0.2766230775691823, 'lr_decay_factor': 0.38698755707665133, 'lr_patience': 24, 'max_patience': 24, 'model_type': 'MLP', 'mlp_n_fc_units': 256, 'use_pca': True, 'n_components': 75}. Best is trial 22 with value: 2.054228852434856.


Epoch 1000/1000 | Train Loss: 2.0442 | Train Acc: 0.1688 | Val Loss: 2.1104 | Val Acc: 0.0732

[I 2025-12-12 00:23:32,445] Trial 45 finished with value: 2.110424800616939 and parameters: {'batch_size': 4, 'learning_rate': 1.5607620250128261e-06, 'weight_decay': 4.701276223202516e-07, 'dropout_rate': 0.29904016496699776, 'lr_decay_factor': 0.3922864884259078, 'lr_patience': 33, 'max_patience': 20, 'model_type': '1DCNN', 'cnn_n_filters_start': 128, 'use_pca': True, 'n_components': 76}. Best is trial 22 with value: 2.054228852434856.


Epoch 1000/1000 | Train Loss: 2.0896 | Train Acc: 0.1429 | Val Loss: 2.0913 | Val Acc: 0.1463

[I 2025-12-12 00:32:56,936] Trial 46 finished with value: 2.0912568859937712 and parameters: {'batch_size': 4, 'learning_rate': 5.350140730046624e-06, 'weight_decay': 2.2995080313532354e-06, 'dropout_rate': 0.37656508795800325, 'lr_decay_factor': 0.2643274101811674, 'lr_patience': 28, 'max_patience': 23, 'model_type': 'MLP', 'mlp_n_fc_units': 64, 'use_pca': True, 'n_components': 101}. Best is trial 22 with value: 2.054228852434856.


Epoch 111/1000 | Train Loss: 2.0306 | Train Acc: 0.2165 | Val Loss: 2.1065 | Val Acc: 0.1220

[I 2025-12-12 00:33:23,417] Trial 47 finished with value: 2.106507435077574 and parameters: {'batch_size': 16, 'learning_rate': 4.247388203513255e-06, 'weight_decay': 5.288005285151915e-06, 'dropout_rate': 0.2723740100005403, 'lr_decay_factor': 0.404051291972446, 'lr_patience': 19, 'max_patience': 32, 'model_type': 'ROI1DCNN', 'roi_n_filters_start': 64, 'use_pca': True, 'n_components': 55}. Best is trial 22 with value: 2.054228852434856.


Epoch 1000/1000 | Train Loss: 2.1061 | Train Acc: 0.1255 | Val Loss: 2.0811 | Val Acc: 0.0976

[I 2025-12-12 00:44:01,179] Trial 48 finished with value: 2.0810722432485442 and parameters: {'batch_size': 4, 'learning_rate': 1.4435529870056305e-06, 'weight_decay': 2.3591174583206712e-07, 'dropout_rate': 0.4068394466759821, 'lr_decay_factor': 0.3565155345170096, 'lr_patience': 38, 'max_patience': 24, 'model_type': 'MLP', 'mlp_n_fc_units': 32, 'use_pca': True, 'n_components': 73}. Best is trial 22 with value: 2.054228852434856.


Epoch 1000/1000 | Train Loss: 2.0986 | Train Acc: 0.1169 | Val Loss: 2.0939 | Val Acc: 0.1220

[I 2025-12-12 00:46:08,037] Trial 49 finished with value: 2.093854752982535 and parameters: {'batch_size': 32, 'learning_rate': 3.142621127797967e-06, 'weight_decay': 5.667127119559954e-07, 'dropout_rate': 0.21595754559485494, 'lr_decay_factor': 0.27017407792912357, 'lr_patience': 16, 'max_patience': 86, 'model_type': 'MLP', 'mlp_n_fc_units': 16, 'use_pca': True, 'n_components': 101}. Best is trial 22 with value: 2.054228852434856.


Epoch 51/1000 | Train Loss: 2.0674 | Train Acc: 0.1775 | Val Loss: 2.0736 | Val Acc: 0.0976

[I 2025-12-12 00:46:44,989] Trial 50 finished with value: 2.0736255064243223 and parameters: {'batch_size': 4, 'learning_rate': 9.852913603097554e-06, 'weight_decay': 1.006102052002967e-05, 'dropout_rate': 0.34202556877345636, 'lr_decay_factor': 0.542605821046386, 'lr_patience': 32, 'max_patience': 28, 'model_type': '1DCNN', 'cnn_n_filters_start': 32, 'use_pca': True, 'n_components': 58}. Best is trial 22 with value: 2.054228852434856.


Epoch 53/1000 | Train Loss: 2.0738 | Train Acc: 0.1602 | Val Loss: 2.0720 | Val Acc: 0.0732

[I 2025-12-12 00:47:39,697] Trial 51 finished with value: 2.0719504356384277 and parameters: {'batch_size': 4, 'learning_rate': 9.182844339261581e-06, 'weight_decay': 1.6413132297979068e-06, 'dropout_rate': 0.3505818465041091, 'lr_decay_factor': 0.5531184760404567, 'lr_patience': 28, 'max_patience': 29, 'model_type': '1DCNN', 'cnn_n_filters_start': 32, 'use_pca': True, 'n_components': 58}. Best is trial 22 with value: 2.054228852434856.


Epoch 1000/1000 | Train Loss: 2.0563 | Train Acc: 0.1688 | Val Loss: 2.0996 | Val Acc: 0.0732

[I 2025-12-12 00:57:43,595] Trial 52 finished with value: 2.0996144341259466 and parameters: {'batch_size': 4, 'learning_rate': 4.061897870119359e-06, 'weight_decay': 1.569273498264121e-06, 'dropout_rate': 0.3132511555421764, 'lr_decay_factor': 0.43266436037441136, 'lr_patience': 28, 'max_patience': 32, 'model_type': '1DCNN', 'cnn_n_filters_start': 32, 'use_pca': True, 'n_components': 80}. Best is trial 22 with value: 2.054228852434856.


Epoch 1000/1000 | Train Loss: 2.0691 | Train Acc: 0.1602 | Val Loss: 2.0854 | Val Acc: 0.1707

[I 2025-12-12 01:04:29,178] Trial 53 finished with value: 2.0854337448027076 and parameters: {'batch_size': 4, 'learning_rate': 2.401060419278497e-06, 'weight_decay': 2.9195879561170533e-07, 'dropout_rate': 0.35822274778198326, 'lr_decay_factor': 0.3006619640898425, 'lr_patience': 22, 'max_patience': 42, 'model_type': '1DCNN', 'cnn_n_filters_start': 32, 'use_pca': True, 'n_components': 64}. Best is trial 22 with value: 2.054228852434856.


Epoch 138/1000 | Train Loss: 2.0453 | Train Acc: 0.2121 | Val Loss: 2.0777 | Val Acc: 0.0976

[I 2025-12-12 01:05:24,881] Trial 54 finished with value: 2.077720665350193 and parameters: {'batch_size': 4, 'learning_rate': 7.68696204385526e-06, 'weight_decay': 3.324236239379185e-06, 'dropout_rate': 0.3890743309033783, 'lr_decay_factor': 0.5839001965883911, 'lr_patience': 35, 'max_patience': 20, 'model_type': '1DCNN', 'cnn_n_filters_start': 64, 'use_pca': True, 'n_components': 99}. Best is trial 22 with value: 2.054228852434856.


Epoch 1000/1000 | Train Loss: 1.5534 | Train Acc: 0.4935 | Val Loss: 2.4620 | Val Acc: 0.1707

[I 2025-12-12 01:12:46,558] Trial 55 finished with value: 2.4619641536619605 and parameters: {'batch_size': 4, 'learning_rate': 0.00016153194771342436, 'weight_decay': 7.251232520717534e-07, 'dropout_rate': 0.29217365307270865, 'lr_decay_factor': 0.4711693716629541, 'lr_patience': 25, 'max_patience': 29, 'model_type': 'ROI1DCNN', 'roi_n_filters_start': 32, 'use_pca': True, 'n_components': 48}. Best is trial 22 with value: 2.054228852434856.


Epoch 1000/1000 | Train Loss: 2.0630 | Train Acc: 0.1688 | Val Loss: 2.1048 | Val Acc: 0.1951

[I 2025-12-12 01:22:00,168] Trial 56 finished with value: 2.1048125813647016 and parameters: {'batch_size': 4, 'learning_rate': 5.764162258279614e-06, 'weight_decay': 2.6116778133764527e-05, 'dropout_rate': 0.43000127411901135, 'lr_decay_factor': 0.5158373698964366, 'lr_patience': 21, 'max_patience': 23, 'model_type': '1DCNN', 'cnn_n_filters_start': 128, 'use_pca': True, 'n_components': 70}. Best is trial 22 with value: 2.054228852434856.


Epoch 1000/1000 | Train Loss: 2.0631 | Train Acc: 0.1732 | Val Loss: 2.1082 | Val Acc: 0.1463

[I 2025-12-12 01:31:19,933] Trial 57 finished with value: 2.108238604010605 and parameters: {'batch_size': 4, 'learning_rate': 3.488793519671016e-06, 'weight_decay': 9.86414095073367e-07, 'dropout_rate': 0.27052112603510425, 'lr_decay_factor': 0.35431880046029646, 'lr_patience': 27, 'max_patience': 39, 'model_type': 'MLP', 'mlp_n_fc_units': 128, 'use_pca': True, 'n_components': 81}. Best is trial 22 with value: 2.054228852434856.


Epoch 54/1000 | Train Loss: 2.0553 | Train Acc: 0.1775 | Val Loss: 2.0928 | Val Acc: 0.1220

[I 2025-12-12 01:31:31,156] Trial 58 finished with value: 2.092757416934502 and parameters: {'batch_size': 16, 'learning_rate': 1.3885466969781447e-05, 'weight_decay': 2.353632658085107e-06, 'dropout_rate': 0.3229273760610432, 'lr_decay_factor': 0.4238334968637663, 'lr_patience': 12, 'max_patience': 32, 'model_type': 'MLP', 'mlp_n_fc_units': 256, 'use_pca': True, 'n_components': 92}. Best is trial 22 with value: 2.054228852434856.


Epoch 755/1000 | Train Loss: 2.0451 | Train Acc: 0.1861 | Val Loss: 2.1141 | Val Acc: 0.1951