In [155]:
import torch
import random
import pandas as pd
import numpy as np
import joblib
from pathlib import Path
import os
from torch import nn

from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
import cv2

import json
from sklearn.preprocessing import LabelEncoder
from torch.utils.data import DataLoader, TensorDataset

from sklearn.utils.class_weight import compute_class_weight

from sklearn.preprocessing import LabelEncoder, MinMaxScaler, OneHotEncoder, StandardScaler

# Local application/library imports
from utils import load_search_space

import optuna

from sklearn.metrics import (
    RocCurveDisplay, PrecisionRecallDisplay,
    ConfusionMatrixDisplay, roc_auc_score, average_precision_score
)

## DATASET

In [156]:
SEED = 64

# Set random seeds
torch.manual_seed(SEED)
random.seed(SEED)
np.random.seed(SEED)

In [157]:
# Dataset Info
# adult_income_cleaned, framingham_cleaned, preprocessed_heloc, diabetes
dataset_name = 'boston'        
dataset_subpath = 'Regression/boston'       
task_type = 'Regression'

In [158]:
df = pd.read_csv(f"./data/{dataset_subpath}/{dataset_name}.csv")

In [159]:
df.shape

(506, 14)

In [160]:
df.head()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,MEDV
0,0.00632,18.0,2.31,0,0.538,6.575,65.2,4.09,1,296.0,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0,0.469,6.421,78.9,4.9671,2,242.0,17.8,396.9,9.14,21.6
2,0.02729,0.0,7.07,0,0.469,7.185,61.1,4.9671,2,242.0,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0,0.458,6.998,45.8,6.0622,3,222.0,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0,0.458,7.147,54.2,6.0622,3,222.0,18.7,396.9,5.33,36.2


## LOAD AND PREPROCESS

In [161]:
def prepare_target_tensor(y, task):
    task = task.lower()
    if isinstance(y, pd.Series):
        y = y.to_numpy()
    elif isinstance(y, list):
        y = np.array(y)
        
    if task == "regression" or task == "binary":
        return torch.as_tensor(y, dtype=torch.float32).reshape(-1, 1)
    elif task == "multiclass":
        return torch.as_tensor(y, dtype=torch.long)
    else:
        raise ValueError(f"Unsupported task type: {task}")

In [162]:
def load_and_preprocess_data(df, dataset_name, images_folder, 
                             problem_type, task_type, seed=42, batch_size=32, device='cpu'):
    task_type = task_type.lower()

    # Load config
    with open(f"./configs/preprocess/{dataset_name}.json") as f:
        config = json.load(f)

    categorical_cols = config["categorical_cols"]
    numerical_cols = config["numerical_cols"]
    encoding = config["encoding"]

    # Extract features and target
    X = df[numerical_cols + categorical_cols].copy()
    y = df.iloc[:, -1].copy()

    # Encode target if needed
    le = None
    if encoding.get("target") == "label":
        le = LabelEncoder()
        y = le.fit_transform(y)
        label_mapping = dict(zip(le.classes_, le.transform(le.classes_)))
    else:
        label_mapping = None

    # Split raw data before transformation
    if task_type == "regression":
        # For regression, we can use a simple split
        X_train_raw, X_temp_raw, y_train, y_temp = train_test_split(
            X, y, test_size=0.3, random_state=seed
        )
        X_val_raw, X_test_raw, y_val, y_test = train_test_split(
            X_temp_raw, y_temp, test_size=0.5, random_state=seed
        )
    else:
        # For classification, we need stratified splits
        X_train_raw, X_temp_raw, y_train, y_temp = train_test_split(
            X, y, test_size=0.3, random_state=seed, stratify=y
        )
        X_val_raw, X_test_raw, y_val, y_test = train_test_split(
            X_temp_raw, y_temp, test_size=0.5, random_state=seed, stratify=y_temp
        )

    # Compute class weights for classification
    class_weight = None
    if task_type in ["binary", "multiclass"]:
        # Compute raw weights
        class_weight_values = compute_class_weight(class_weight="balanced", classes=np.unique(y_train), y=y_train)
        classes_sorted = np.sort(np.unique(y_train))
        
        if task_type == "binary":
            # Compute pos_weight = weight for class 1 / weight for class 0
            weight_dict = dict(zip(classes_sorted, class_weight_values))
            pos_weight = weight_dict[1] / weight_dict[0]
            class_weight = torch.tensor(pos_weight, dtype=torch.float32).to(device)
            print(f"Binary pos_weight (for BCEWithLogitsLoss): {class_weight.item()}")

        elif task_type == "multiclass":
            class_weight = torch.tensor(class_weight_values, dtype=torch.float32).to(device)
            print(f"Multiclass class weights (for CrossEntropyLoss): {class_weight.tolist()}")

    # Transform numerical and categorical features
    transformers = []

    if encoding["numerical_features"] == "minmax":
        transformers.append(("num", MinMaxScaler(), numerical_cols))
    elif encoding["numerical_features"] == "standard":
        transformers.append(("num", StandardScaler(), numerical_cols))

    if categorical_cols and encoding["categorical_features"] == "onehot":
        transformers.append(("cat", OneHotEncoder(sparse_output=False, handle_unknown="ignore"), categorical_cols))

    if transformers:
        preprocessor = ColumnTransformer(transformers=transformers)
        X_train = preprocessor.fit_transform(X_train_raw)
        X_val = preprocessor.transform(X_val_raw)
        X_test = preprocessor.transform(X_test_raw)

        # Recover transformed column names
        if "cat" in preprocessor.named_transformers_:
            cat_feature_names = preprocessor.named_transformers_["cat"].get_feature_names_out(categorical_cols)
            all_feature_names = numerical_cols + list(cat_feature_names)
        else:
            all_feature_names = numerical_cols + categorical_cols

        X_train_num = pd.DataFrame(X_train, columns=all_feature_names, index=X_train_raw.index)
        X_val_num = pd.DataFrame(X_val, columns=all_feature_names, index=X_val_raw.index)
        X_test_num = pd.DataFrame(X_test, columns=all_feature_names, index=X_test_raw.index)
    else:
        all_feature_names = numerical_cols + categorical_cols  # or keep original order
        X_train_num = pd.DataFrame(X_train_raw, columns=all_feature_names, index=X_train_raw.index)
        X_val_num = pd.DataFrame(X_val_raw, columns=all_feature_names, index=X_val_raw.index)
        X_test_num = pd.DataFrame(X_test_raw, columns=all_feature_names, index=X_test_raw.index)


    print(f"Shapes — Train: {X_train_num.shape}, Val: {X_val_num.shape}, Test: {X_test_num.shape}")
    print(f"Numerical features: {len(numerical_cols)} — {numerical_cols}")
    print(f"Categorical features: {len(categorical_cols)} — {categorical_cols}")
    print(f"Total features: {X_train_num.shape[1]}")
    if label_mapping:
        print(f"Target label mapping: {label_mapping}")

    # -------------------------------
    # Image generation
    # -------------------------------
    ### X_train
    img_paths = os.path.join(f'{images_folder}/train',problem_type+".csv")

    print(img_paths)
    
    imgs = pd.read_csv(img_paths)
    
    # Update image paths
    imgs["images"] = images_folder + "/train/" + imgs["images"]

    # Image data
    X_train_img = np.array([cv2.cvtColor(cv2.imread(img), cv2.COLOR_BGR2RGB) for img in imgs["images"]])
        
    ### X_val
    img_paths = os.path.join(f'{images_folder}/val',problem_type+".csv")

    print(img_paths)
    
    imgs = pd.read_csv(img_paths)

    # Update image paths
    imgs["images"] = images_folder + "/val/" + imgs["images"]

    # Image data
    X_val_img = np.array([cv2.cvtColor(cv2.imread(img), cv2.COLOR_BGR2RGB) for img in imgs["images"]])

    ### test

    img_paths = os.path.join(f'{images_folder}/test',problem_type+".csv")

    print(img_paths)
    
    imgs = pd.read_csv(img_paths)

    # Update image paths
    imgs["images"] = images_folder + "/test/" + imgs["images"]

    # Image data
    X_test_img = np.array([cv2.cvtColor(cv2.imread(img), cv2.COLOR_BGR2RGB) for img in imgs["images"]])
    

    attributes = len(X_train_num.columns)
    height, width, channels = X_train_img[0].shape
    imgs_shape = (channels, height, width)

    print("Images shape: ", imgs_shape)
    print("Attributes: ", attributes)
    # Convert data to PyTorch tensors
    X_train_num_tensor = torch.as_tensor(X_train_num.values, dtype=torch.float32)
    X_val_num_tensor = torch.as_tensor(X_val_num.values, dtype=torch.float32)
    X_test_num_tensor = torch.as_tensor(X_test_num.values, dtype=torch.float32)
    X_train_img_tensor = torch.as_tensor(X_train_img, dtype=torch.float32).permute(0, 3, 1, 2)
    X_val_img_tensor = torch.as_tensor(X_val_img, dtype=torch.float32).permute(0, 3, 1, 2)
    X_test_img_tensor = torch.as_tensor(X_test_img, dtype=torch.float32).permute(0, 3, 1, 2)
    y_train_tensor = prepare_target_tensor(y_train, task_type)
    y_val_tensor = prepare_target_tensor(y_val, task_type)
    y_test_tensor = prepare_target_tensor(y_test, task_type)

    # Normalize to [0, 1]
    #X_train_img_tensor = X_train_img_tensor / 255.0
    #X_val_img_tensor = X_val_img_tensor / 255.0
    #X_test_img_tensor = X_test_img_tensor / 255.0

    # Create DataLoaders
    train_dataset = TensorDataset(X_train_num_tensor, X_train_img_tensor, y_train_tensor)
    val_dataset = TensorDataset(X_val_num_tensor, X_val_img_tensor, y_val_tensor)
    test_dataset = TensorDataset(X_test_num_tensor, X_test_img_tensor, y_test_tensor)

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, pin_memory=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, pin_memory=True)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, pin_memory=True)

    return train_loader, val_loader, test_loader, attributes, imgs_shape, le, class_weight

## MODEL ARCHITECTURES

In [163]:
def find_divisors(n):
    divisors = []
    for i in range(1, int(n**0.5) + 1):
        if n % i == 0:
            divisors.append(i)
            if i != n // i:  # Check to include both divisors if they are not the same
                divisors.append(n // i)
    divisors.sort()
    return divisors

### Vision Transformer

In [164]:
from models.vit_pytorch.simple_vit_with_register_tokens import ViT

In [165]:
class ViTMLP(nn.Module):
    def __init__(self, imgs_shape, num_input_dim, params, task, num_classes=None):
        super(ViTMLP, self).__init__()

        # Vision Transformer branch
        self.vit = ViT(
            image_size=imgs_shape,
            patch_size=params["patch_size"],
            dim=params["dim"],
            depth=params["depth"],
            heads=params["heads"],
            mlp_dim=params["mlp_dim"]
        )

        # Tabular MLP branch
        mlp_layers = []
        input_dim = num_input_dim
        for hidden_dim in params["mlp_hidden_dims"]:
            mlp_layers.append(nn.Linear(input_dim, hidden_dim))
            mlp_layers.append(nn.ReLU())
            input_dim = hidden_dim
        self.tabular_mlp = nn.Sequential(*mlp_layers)

        # Fusion MLP (Final classifier head)
        fusion_input_dim = params["dim"] + input_dim
        fusion_layers = []
        for hidden_dim in params["fusion_hidden_dims"]:
            fusion_layers.append(nn.Linear(fusion_input_dim, hidden_dim))
            fusion_layers.append(nn.ReLU())
            fusion_input_dim = hidden_dim

        output_dim = 1 if task in ['regression', 'binary'] else num_classes
        fusion_layers.append(nn.Linear(fusion_input_dim, output_dim))
        self.fusion_mlp = nn.Sequential(*fusion_layers)

        # Change identity to something else if needed
        self.activation = nn.Identity()

    def forward(self, num_input, vit_input):
        vit_feat = self.vit(vit_input)
        tabular_feat = self.tabular_mlp(num_input)
        fusion = torch.cat([vit_feat, tabular_feat], dim=1)
        output = self.fusion_mlp(fusion)
        return self.activation(output)


### CNN

In [166]:
class CNNMLP(nn.Module):
    def __init__(self, imgs_shap, num_input_dim, params, task, num_classes=None):
        super(CNNMLP, self).__init__()
        
        # CNN branch
        if params["model"] == "model1":
            self.cnn_branch = nn.Sequential(
                nn.Conv2d(imgs_shape[0], 16, kernel_size=2),
                nn.ReLU(),
                nn.Flatten()
            )
        else:  # "model2"
            self.cnn_branch = nn.Sequential(
                nn.Conv2d(imgs_shape[0], out_channels=16, kernel_size=3),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, kernel_size=3, padding=1),
                nn.ReLU(),
                nn.Conv2d(32, 64, kernel_size=3, padding=1),
                nn.ReLU(),
                nn.Flatten()
            )

        # Calculate the size of the flattened output
        self.flat_size = self._get_flat_size(imgs_shape)
        
        # Tabular MLP branch
        mlp_layers = []
        input_dim = num_input_dim
        for hidden_dim in params["mlp_hidden_dims"]:
            mlp_layers.append(nn.Linear(input_dim, hidden_dim))
            mlp_layers.append(nn.ReLU())
            input_dim = hidden_dim
        self.tabular_mlp = nn.Sequential(*mlp_layers)

        # Fusion MLP (Final classifier head)
        fusion_input_dim = self.flat_size + input_dim
        fusion_layers = []
        for hidden_dim in params["fusion_hidden_dims"]:
            fusion_layers.append(nn.Linear(fusion_input_dim, hidden_dim))
            fusion_layers.append(nn.ReLU())
            fusion_input_dim = hidden_dim

        # Determine output layer
        output_dim = 1 if task in ['regression', 'binary'] else num_classes
        fusion_layers.append(nn.Linear(fusion_input_dim, output_dim))
        self.fusion_mlp = nn.Sequential(*fusion_layers)

        # Change identity to something else if needed
        self.activation = nn.Identity()
        
    def _get_flat_size(self, imgs_shape):
        # Forward pass with dummy input to calculate flat size
        dummy_input = torch.zeros(4, *imgs_shape)
        x = self.cnn_branch(dummy_input)
        return x.size(1)

    def forward(self, num_input, cnn_input):
        cnn_feat = self.cnn_branch(cnn_input)
        tabular_feat = self.tabular_mlp(num_input)
        fusion = torch.cat([cnn_feat, tabular_feat], dim=1)
        output = self.fusion_mlp(fusion)
        return self.activation(output)

### Resnet50

In [167]:
import torchvision.models as models

In [168]:
class ResNetMLP(nn.Module):
    def __init__(self, imgs_shape, num_input_dim, params, task_type, num_classes=None):
        super(ResNetMLP, self).__init__()

        # Load a ResNet50 with or without pretrained weights
        base_resnet = models.resnet50(weights=None)

        self.resnet_backbone = nn.Sequential(*list(base_resnet.children())[:-1])  # (B, 2048, 1, 1)
        self.flatten = nn.Flatten()  # Converts (B, 2048, 1, 1) → (B, 2048)

        # Tabular MLP branch
        tabular_layers = []
        input_dim = num_input_dim
        for hidden_dim in params["mlp_hidden_dims"]:
            tabular_layers.append(nn.Linear(input_dim, hidden_dim))
            tabular_layers.append(nn.ReLU())
            input_dim = hidden_dim
        self.tabular_mlp = nn.Sequential(*tabular_layers)

        # Create a dummy image based on the input image shape to calculate the output size
        dummy_img = torch.randn(4, *imgs_shape)  # (B, 3, H, W)
        with torch.no_grad():
            # Pass the dummy image through ResNet to get feature map
            img_feat = self.resnet_backbone(dummy_img)  
            resnet_output_dim = self.flatten(img_feat)

        # Fusion MLP head (ResNet features + Tabular MLP)
        fusion_input_dim = resnet_output_dim.shape[1] + input_dim
        fusion_layers = []
        for hidden_dim in params.get("fusion_hidden_dims", [128]):
            fusion_layers.append(nn.Linear(fusion_input_dim, hidden_dim))
            fusion_layers.append(nn.ReLU())
            fusion_input_dim = hidden_dim

        output_dim = 1 if task_type in ["regression", "binary"] else num_classes
        fusion_layers.append(nn.Linear(fusion_input_dim, output_dim))
        self.fusion_mlp = nn.Sequential(*fusion_layers)

        # Output activation
        self.activation = nn.Identity()

    def forward(self, num_input, img_input):
        # ResNet feature extraction
        img_feat = self.resnet_backbone(img_input)  # (B, 2048, 1, 1)
        img_feat = self.flatten(img_feat)           # (B, 2048)

        # Tabular feature extraction
        tab_feat = self.tabular_mlp(num_input)      # (B, D_tabular)

        # Fusion and classification
        fusion = torch.cat([img_feat, tab_feat], dim=1)

        output = self.fusion_mlp(fusion)

        return self.activation(output)

## COMPILE AND FIT

In [169]:
from sklearn.metrics import accuracy_score, roc_auc_score
from sklearn.metrics import roc_curve, auc
import matplotlib.pyplot as plt
import joblib
import os
import gc
import copy

from models.utils import get_loss_fn, calculate_metrics, calculate_metrics_hybrid, calculate_metrics_hybrid_manuel, calculate_metrics_from_numpy, get_class_weighted_loss_fn

In [170]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from torch.optim.lr_scheduler import OneCycleLR
import matplotlib.pyplot as plt
import time
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import os

def compile_and_fit(model, train_loader, val_loader, test_loader, dataset_name, 
                    model_name, image_name, trial_name=None, task='regression', epochs=200, max_lr=1, 
                    div_factor=10, final_div_factor=1, device='cuda', weight_decay=1e-2, save_model=False, class_weights=None, save_dir=None, study=None, patch=None, verbose=False):
    model = model.to(device)
    
    if class_weights != None:
        loss_fn = get_class_weighted_loss_fn(task, class_weights)
    else:
        loss_fn = get_loss_fn(task)

    # Compute min_lr from max_lr and div_factor
    min_lr = max_lr / div_factor

    optimizer = optim.AdamW(model.parameters(), lr=min_lr, weight_decay=weight_decay)
    
    total_steps = epochs * len(train_loader)
    scheduler = OneCycleLR(optimizer, max_lr=max_lr, div_factor=div_factor, final_div_factor=final_div_factor, total_steps=total_steps, pct_start=0.3, anneal_strategy="cos")
    
    best_val_loss = float('inf')
    best_model = None
    best_epoch = 0
    #early_stopping_counter = 0
    #patience = 10  # Early stopping patience

    history = {'train_loss': [], 'val_loss': [], 'learning_rate': [], 'epoch_time': []}

    if task == 'regression':
        history.update({'train_mse': [],  'val_mse': [], 'train_mae': [],  'val_mae': [], 'train_rmse': [], 'val_rmse': [], 'train_r2': [], 'val_r2': []})
    elif task in ['binary', 'multiclass']:
        history.update({'train_accuracy': [], 'val_accuracy': [], 'train_precision': [], 'val_precision': [], 'train_recall': [], 'val_recall': [], 'train_f1': [], 'val_f1': []})

    start_time = time.time()
    
    for epoch in range(epochs):
        epoch_start_time = time.time()

        model.train()
        train_loss = 0.0
        train_preds = []
        train_targets = []

        for num_data, img_data, targets in train_loader:
            num_data, img_data, targets = num_data.to(device, non_blocking=True), img_data.to(device, non_blocking=True), targets.to(device, non_blocking=True)
            
            optimizer.zero_grad()
            outputs = model(num_data, img_data)
            loss = loss_fn(outputs, targets)
            loss.backward()
            optimizer.step()
            scheduler.step()
            
            train_loss += loss.item()
            train_preds.extend(outputs.cpu().detach().numpy())
            train_targets.extend(targets.cpu().numpy())


        train_loss /= len(train_loader)
        if task == 'multiclass':
            y_train_pred = np.vstack(train_preds)
            y_train_true = train_targets
        else:
            y_train_pred = np.concatenate(train_preds)
            y_train_true = np.concatenate(train_targets)
        train_metrics = calculate_metrics_from_numpy(y_train_true, y_train_pred, task)

        model.eval()
        val_loss = 0.0
        val_preds = []
        val_targets = []
        with torch.no_grad():
            for num_data, img_data, targets in val_loader:
                num_data, img_data, targets = num_data.to(device, non_blocking=True), img_data.to(device, non_blocking=True), targets.to(device, non_blocking=True)
                outputs = model(num_data, img_data)
                loss = loss_fn(outputs, targets)
                
                val_loss += loss.item()
                val_preds.extend(outputs.cpu().numpy())
                val_targets.extend(targets.cpu().numpy())

        val_loss /= len(val_loader)
        if task == 'multiclass':
            y_val_pred = np.vstack(val_preds)
            y_val_true = val_targets
        else:
            y_val_pred = np.concatenate(val_preds)
            y_val_true = np.concatenate(val_targets)
        val_metrics = calculate_metrics_from_numpy(y_val_true, y_val_pred, task)
        
        # Get the current learning rate
        current_lr = scheduler.get_last_lr()

        epoch_time = time.time() - epoch_start_time

        history['train_loss'].append(train_loss)
        history['val_loss'].append(val_loss)
        history['learning_rate'].append(current_lr)
        history['epoch_time'].append(epoch_time)

        for k, v in train_metrics.items():
            history[f'train_{k}'].append(v)
        for k, v in val_metrics.items():
            history[f'val_{k}'].append(v)

        if val_loss < best_val_loss:
            best_val_loss = val_loss
            best_model = copy.deepcopy(model.state_dict())
            best_epoch = epoch + 1
            #early_stopping_counter = 0
        #else:
        #    early_stopping_counter += 1
        #    if early_stopping_counter >= patience:
        #        print(f"Early stopping at epoch {epoch + 1}")
        #        break

    total_time = time.time() - start_time
    model.load_state_dict(best_model)

    # Recompute metrics using the best model
    train_metrics, y_true_train, y_pred_train, y_prob_train = calculate_metrics_hybrid(model, train_loader, device, class_weights, task)
    val_metrics, y_true_val, y_pred_val, y_prob_val  = calculate_metrics_hybrid(model, val_loader, device, class_weights, task)
    test_metrics, y_true_test, y_pred_test, y_prob_test = calculate_metrics_hybrid(model, test_loader, device, class_weights, task)

    # Store recomputed metrics
    metrics = {
        'train_loss': train_metrics['loss'],
        'val_loss': val_metrics['loss'],
        'test_loss': test_metrics['loss'],
        'min_lr': min_lr,
        'max_lr': max_lr,
        'total_time': total_time,
        'average_epoch_time': sum(history['epoch_time']) / len(history['epoch_time'])
    }

    # Add task-specific metrics
    for k in train_metrics:
        if k != 'loss':
            metrics[f'train_{k}'] = train_metrics[k]
    for k in val_metrics:
        if k != 'loss':
            metrics[f'val_{k}'] = val_metrics[k]
    for k in test_metrics:
        if k != 'loss':
            metrics[f'test_{k}'] = test_metrics[k]
            
    if verbose:     
        print(f"\nTraining completed in {total_time:.2f} seconds")
        print(f"Best model found at epoch {best_epoch}/{epochs}")
        print(f"Best Train Loss: {metrics['train_loss']:.4f}, Best Val Loss: {metrics['val_loss']:.4f}")
        print(metrics)
    
    if save_model:
        if model_name == "CNN_hybrid":
            save_path = os.path.join(save_dir, f"{model_name}/{image_name}/best_model/{trial_name}")
        else:
            save_path = os.path.join(save_dir, f"{model_name}/{image_name}/best_model/{trial_name}{patch}")
        os.makedirs(save_path, exist_ok=True)

        plot_metric(history['train_loss'], history['val_loss'], 'Loss', save_path)
        if task == 'regression':
            plot_metric(history['train_mse'], history['val_mse'], 'MSE', save_path)
            plot_metric(history['train_rmse'], history['val_rmse'], 'RMSE', save_path)
        else:
            plot_metric(history['train_accuracy'], history['val_accuracy'], 'Accuracy', save_path)
            plot_metric(history['train_f1'], history['val_f1'], 'F1', save_path)

        plot_learning_rate(history['learning_rate'], save_path)

        # Save metrics
        os.makedirs(save_path, exist_ok=True)
        with open(f'{save_path}/best_model_metrics.txt', 'w') as f:
            for key, value in metrics.items():
                f.write(f'{key}: {value}\n')

        # Save model
        torch.save(best_model, f"{save_path}/best_model.pth")
        print(f"Best model saved to {save_path}/best_model.pth")

        # Additional plots for classification
        if task in ["binary"]:
            plot_extra("Train", y_true_train, y_pred_train, y_prob_train, save_path)
            plot_extra("Validation", y_true_val, y_pred_val, y_prob_val, save_path)
            plot_extra("Test", y_true_test, y_pred_test, y_prob_test, save_path)

    del model
    torch.cuda.empty_cache()
    gc.collect()

    return metrics


def plot_extra(split_name, y_true, y_pred, y_prob, save_path):
    y_true = y_true.ravel()
    y_pred = y_pred.ravel()

    # ROC Curve
    RocCurveDisplay.from_predictions(y_true, y_prob)
    auc_score = roc_auc_score(y_true, y_prob)
    plt.plot([0, 1], [0, 1], linestyle='--', color='gray', label='Random')
    plt.title(f"{split_name} ROC Curve (AUC = {auc_score:.2f})")
    plt.xlabel("False Positive Rate")
    plt.ylabel("True Positive Rate")
    plt.legend()
    plt.grid(True)
    plt.savefig(os.path.join(save_path, f"{split_name.lower()}_roc_curve.png"))
    plt.close("all")

    # Precision-Recall Curve
    PrecisionRecallDisplay.from_predictions(y_true, y_prob)
    avg_prec = average_precision_score(y_true, y_prob)
    plt.title(f"{split_name} PR Curve (AP = {avg_prec:.2f})")
    plt.xlabel("Recall")
    plt.ylabel("Precision")
    plt.grid(True)
    plt.savefig(os.path.join(save_path, f"{split_name.lower()}_pr_curve.png"))
    plt.close("all")

    # Normalized confusion matrix
    ConfusionMatrixDisplay.from_predictions(y_true, y_pred, normalize='true').plot(cmap='Blues')
    plt.title(f"{split_name} Confusion Matrix (Normalized)")
    plt.grid(False)
    plt.xlabel("Predicted Label")
    plt.ylabel("True Label")
    plt.savefig(os.path.join(save_path, f"{split_name.lower()}_confusion_matrix_normalized.png"))
    plt.close("all")

    # Raw confusion matrix
    ConfusionMatrixDisplay.from_predictions(y_true, y_pred, normalize=None).plot(cmap='Blues')
    plt.title(f"{split_name} Confusion Matrix (Counts)")
    plt.grid(False)
    plt.xlabel("Predicted Label")
    plt.ylabel("True Label")
    plt.savefig(os.path.join(save_path, f"{split_name.lower()}_confusion_matrix_counts.png"))
    plt.close("all")


def plot_metric(train_metric, val_metric, metric_name, save_path):
    plt.figure()
    plt.plot(train_metric, label=f'Train {metric_name}')
    plt.plot(val_metric, label=f'Validation {metric_name}')
    plt.xlabel('Epoch')
    plt.ylabel(metric_name)
    plt.legend()
    plt.title(f'{metric_name} vs. Epoch')
    save_path = f"{save_path}/{metric_name.lower()}_plot.png"
    plt.savefig(save_path)
    plt.close("all")

def plot_learning_rate(learning_rates, save_path):
    plt.figure()
    plt.plot(learning_rates)
    plt.xlabel('Epoch')
    plt.ylabel('Learning Rate')
    plt.title('Learning Rate vs. Epoch')
    save_path = f"{save_path}/learning_rate_plot.png"
    plt.savefig(save_path)
    plt.close("all")

# EXPERIMENTS

## HyViT

In [171]:
save_dir =  os.path.join("logs", task_type, dataset_name)
model_name = "ViT_hybrid"

# Load config
with open(f"./configs/preprocess/{dataset_name}.json") as f:
    config = json.load(f)
batch_size = config["batch_size"]

epochs = [100,200]
n_trials = 100

if task_type.lower() == 'multiclass':
    num_classes = df.iloc[:,-1].nunique()
else:
    num_classes = 1

device='cuda:0' if torch.cuda.is_available() else 'cpu'

In [181]:
save_dir =  os.path.join("logs", task_type, dataset_name)
model_name = "CNN_hybrid"

# Load config
with open(f"./configs/preprocess/{dataset_name}.json") as f:
    config = json.load(f)
batch_size = config["batch_size"]

epochs = [100,200]
n_trials = 50

if task_type.lower() == 'multiclass':
    num_classes = df.iloc[:,-1].nunique()
else:
    num_classes = 1

device='cuda:0' if torch.cuda.is_available() else 'cpu'

In [172]:
def objective(trial, model_name, image_name, task_type, 
              train_loader, val_loader, test_loader,
              divisors, attributes, imgs_shape, num_classes=None,
              device='cuda', save_dir=None, class_weight=None, epochs=100):
    
    if model_name == "ViT_hybrid":
        task = task_type.lower()

        params = load_search_space(model_name, trial)

        params["patch_size"] = trial.suggest_categorical("patch_size", divisors)

        params["mlp_hidden_dims"] = json.loads(params["mlp_hidden_dims"])

        params["fusion_hidden_dims"] = json.loads(params["fusion_hidden_dims"])

        if params["dim"] % params["heads"] != 0:
            raise optuna.exceptions.TrialPruned()

        with open(f"configs/optuna_search/{model_name}.json", "r") as f:
            full_config = json.load(f)

        config = full_config[model_name]["fit"]  # Access the model key

        # Initialize model
        model = ViTMLP(imgs_shape[1], attributes, params, task, num_classes)
    else:
        task = task_type.lower()

        params = load_search_space(model_name, trial)
        
        params["mlp_hidden_dims"] = json.loads(params["mlp_hidden_dims"])
        
        params["fusion_hidden_dims"] = json.loads(params["fusion_hidden_dims"])
        
        with open(f"configs/optuna_search/{model_name}.json", "r") as f:
            full_config = json.load(f)
            
        config = full_config[model_name]["fit"]  # Access the model key
        
        # Build and train model
        model = CNNMLP(imgs_shape, attributes, params, task, num_classes)
        
    metrics = compile_and_fit(
        model,
        train_loader, val_loader, test_loader,
        dataset_name=dataset_name,
        model_name=f"trial_{trial.number}",
        image_name=image_name,
        task=task,  # assumed to be defined externally
        max_lr=trial.suggest_float("max_lr", config["max_lr"][1], config["max_lr"][2], log=True),
        div_factor=trial.suggest_int("div_factor", config["div_factor"][1], config["div_factor"][2]),
        final_div_factor=trial.suggest_int("final_div_factor", config["final_div_factor"][1], config["final_div_factor"][2]),
        weight_decay=trial.suggest_float("weight_decay", config["weight_decay"][1], config["weight_decay"][2], log=True),
        epochs=trial.suggest_categorical("epochs", [100, 200]),
        save_model=False,
        class_weights=class_weight
    )

    save_dir = os.path.join(save_dir, model_name, image_name, "optuna")
    os.makedirs(save_dir, exist_ok=True)

    if task == 'regression':
        score = metrics["val_rmse"]
        with open(f"{save_dir}/optuna_trials_log.txt", "a") as f:
            f.write(f"Trial {trial.number} - VAL-RMSE: {score:.4f}, Params: {params}\n")
            f.write("=" * 60 + "\n")
    
    elif task == 'binary':
        score = metrics["val_roc_auc"]
        with open(f"{save_dir}/optuna_trials_log.txt", "a") as f:
            f.write(f"Trial {trial.number} - VAL-AUC: {score:.4f}, Params: {params}\n")
            f.write("=" * 60 + "\n")

    elif task == 'multiclass':
        score = metrics["val_accuracy"]
        with open(f"{save_dir}/optuna_trials_log.txt", "a") as f:
            f.write(f"Trial {trial.number} - VAL-Accuracy: {score:.4f}, Params: {params}\n")
            f.write("=" * 60 + "\n")
    else:
        raise ValueError(f"Unsupported task type: {task_type}")
    
    return score


In [173]:
def evaluate_best_model(best_trial, train_loader, val_loader, test_loader, 
                        dataset_name, image_name, task_type, save_dir, attributes, imgs_shape, trial_name, 
                        class_weight=None, num_classes=None, epochs=10):

    task = task_type.lower()
    best_params = best_trial.params

    print(f"\nBest Trial: {best_trial.number}")
    print(f"  Best Score: {best_trial.value:.4f}")
    print("  Best Hyperparameters:")
    for k, v in best_params.items():
        print(f"    {k}: {v}")
    
    if model_name == "ViT_hybrid":
        # Extract architecture-related parameters
        architecture_params = {
            k: v for k, v in best_params.items()
            if k in ["patch_size", "dim", "depth", "heads", "mlp_dim", "mlp_hidden_dims", "fusion_hidden_dims"]
        }

        # Convert JSON string to list if necessary
        if isinstance(architecture_params.get("mlp_hidden_dims"), str):
            architecture_params["mlp_hidden_dims"] = json.loads(architecture_params["mlp_hidden_dims"])

        if isinstance(architecture_params.get("fusion_hidden_dims"), str):
            architecture_params["fusion_hidden_dims"] = json.loads(architecture_params["fusion_hidden_dims"])
        
        patch = f"_patch{architecture_params['patch_size']}"
        # Initialize model
        model = ViTMLP(imgs_shape[1], attributes, architecture_params, task, num_classes)
    else:
        # Extract architecture-related parameters
        architecture_params = {
            k: v for k, v in best_params.items()
            if k in ["mlp_hidden_dims", "fusion_hidden_dims", "model"]
        }
        # Convert JSON string to list if necessary
        if isinstance(architecture_params.get("mlp_hidden_dims"), str):
            architecture_params["mlp_hidden_dims"] = json.loads(architecture_params["mlp_hidden_dims"])
            
        if isinstance(architecture_params.get("fusion_hidden_dims"), str):
            architecture_params["fusion_hidden_dims"] = json.loads(architecture_params["fusion_hidden_dims"])
        
        patch = ""
        
        model = CNNMLP(imgs_shape, attributes, architecture_params, task, num_classes)

    # Train and evaluate
    metrics = compile_and_fit(
        model,
        train_loader, val_loader, test_loader,
        dataset_name=dataset_name,
        image_name=image_name,
        model_name=model_name,
        trial_name=f"trial_{best_trial.number}",
        task=task,
        max_lr=best_params["max_lr"],
        div_factor=best_params["div_factor"],
        final_div_factor=best_params["final_div_factor"],
        weight_decay=best_params["weight_decay"],
        epochs=best_params["epochs"],
        save_model=True,
        class_weights=class_weight,
        save_dir=save_dir,
        patch=patch
    )

    # Save best hyperparameters
    params_file = os.path.join(save_dir, f"{model_name}/{image_name}/best_model/trial_{best_trial.number}{patch}", "best_params.json")
    os.makedirs(os.path.dirname(params_file), exist_ok=True)

    with open(params_file, "w") as f:
        json.dump(best_params, f, indent=4)

    return metrics

In [174]:
import random
import numpy as np
import torch

def set_model_seed(seed: int):
    # Python built-in RNG
    random.seed(seed)
    # NumPy RNG
    np.random.seed(seed)
    # Torch RNG
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)  # if you use multi-GPU
    
    # For reproducibility
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False


### EXPERIMENT: TINTO

In [98]:
#Select the model and the parameters
if task_type.lower() == "regression":
    problem_type = "regression"
else:
    problem_type = "supervised"
name = f"TINTO_blur"

#Define the dataset path and the folder where the images will be saved
images_folder = f"SyntheticImages/{task_type}/{dataset_name}/{name}"

In [99]:
train_loader, val_loader, test_loader, attributes, imgs_shape, label_encoder, class_weight  = load_and_preprocess_data(df, dataset_name, images_folder, problem_type, task_type, seed=SEED, batch_size=batch_size, device=device)

Shapes — Train: (354, 22), Val: (76, 22), Test: (76, 22)
Numerical features: 11 — ['CRIM', 'ZN', 'INDUS', 'NOX', 'RM', 'AGE', 'DIS', 'TAX', 'PTRATIO', 'B', 'LSTAT']
Categorical features: 2 — ['CHAS', 'RAD']
Total features: 22
SyntheticImages/Regression/boston/TINTO_blur/train/regression.csv
SyntheticImages/Regression/boston/TINTO_blur/val/regression.csv
SyntheticImages/Regression/boston/TINTO_blur/test/regression.csv
Images shape:  (3, 20, 20)
Attributes:  22


In [100]:
# Determine possible patch sizes for the Vision Transformer by finding divisors of the image width
divisors = find_divisors(imgs_shape[1])
divisors

[1, 2, 4, 5, 10, 20]

In [101]:
divisors = [5, 10, 20]

In [102]:
import optuna
study = optuna.create_study(direction="minimize" if task_type.lower() == "regression" else "maximize")
study.optimize(lambda trial: objective(
    trial=trial,
    model_name=model_name,
    image_name=name,
    task_type=task_type,
    num_classes=num_classes,
    train_loader=train_loader,
    val_loader=val_loader,
    test_loader=test_loader,
    divisors=divisors,
    attributes=attributes,
    imgs_shape=imgs_shape,
    device=device,
    save_dir=save_dir,
    class_weight=class_weight,
    epochs=epochs
), n_trials=n_trials)

[I 2025-09-14 10:27:46,440] A new study created in memory with name: no-name-8c3decf7-cb00-4a2e-91c6-b7bde9a5c0b1
[I 2025-09-14 10:28:03,236] Trial 0 finished with value: 3.730832258958103 and parameters: {'model': 'model2', 'mlp_hidden_dims': '[256, 128, 64]', 'fusion_hidden_dims': '[32, 16]', 'max_lr': 0.0002293340002060818, 'div_factor': 34, 'final_div_factor': 839, 'weight_decay': 5.66369279943869e-06, 'epochs': 200}. Best is trial 0 with value: 3.730832258958103.
[I 2025-09-14 10:28:11,093] Trial 1 finished with value: 3.577315726236053 and parameters: {'model': 'model1', 'mlp_hidden_dims': '[128, 64, 32]', 'fusion_hidden_dims': '[256, 128, 64]', 'max_lr': 0.019579167092847825, 'div_factor': 80, 'final_div_factor': 369, 'weight_decay': 0.00010276907301605183, 'epochs': 100}. Best is trial 1 with value: 3.577315726236053.
[I 2025-09-14 10:28:27,551] Trial 2 finished with value: 3.528627079800066 and parameters: {'model': 'model2', 'mlp_hidden_dims': '[256, 128, 64]', 'fusion_hidden

In [103]:
from numbers import Number

# --- Configure which seeds to use for stability reporting ---
model_seeds = [0, 1, 2, 3, 4]   # change as needed
numeric_keys = None  # we’ll infer from first run

# Determine study direction safely (minimize by default if unknown)
def is_minimize_study(study):
    try:
        return study.direction == optuna.study.StudyDirection.MINIMIZE
    except Exception:
        try:
            return study.directions[0] == optuna.study.StudyDirection.MINIMIZE
        except Exception:
            return True  # fallback

minimize = is_minimize_study(study)

# --- Pick the single best completed trial across ALL patch sizes ---
completed = [t for t in study.trials if t.state == optuna.trial.TrialState.COMPLETE]
if not completed:
    raise RuntimeError("No completed trials in the study.")

best_trial = (min if minimize else max)(completed, key=lambda t: t.value)

if model_name == "ViT_hybrid":
    # Extract its patch_size (may be None if not used in search)
    best_patch = best_trial.params.get("patch_size", None)
    trial_name = f"trial_{best_trial.number}_patch{best_patch}"
    print(f"\nEvaluating overall best trial "
      f"(Trial {best_trial.number}, ValObjective: {best_trial.value:.4f}, "
      f"patch_size={best_patch})")
else:
    trial_name = f"trial_{best_trial.number}"
    print(f"\nEvaluating overall best trial "
      f"(Trial {best_trial.number}, ValObjective: {best_trial.value:.4f}")
    
save_path = os.path.join(save_dir, f"{model_name}/{name}/best_model/{trial_name}")
os.makedirs(save_path, exist_ok=True)

per_seed_metrics = []

for s in model_seeds:
    set_model_seed(s)
    metrics = evaluate_best_model(
        best_trial,
        train_loader, val_loader, test_loader,
        dataset_name=dataset_name,
        image_name=name,          # keep your original arg
        task_type=task_type,
        save_dir=save_dir,
        attributes=attributes,
        imgs_shape=imgs_shape,
        class_weight=class_weight,
        num_classes=num_classes,
        epochs=epochs,
        trial_name=trial_name
        # If evaluate_best_model accepts a seed arg, pass model_seed=s
    )
    if not isinstance(metrics, dict):
        raise TypeError(f"evaluate_best_model must return dict, got: {type(metrics)}")

    # infer numeric keys once (ints, floats, numpy scalars)
    if numeric_keys is None:
        numeric_keys = [k for k, v in metrics.items()
                        if isinstance(v, (Number, np.floating, np.integer))]
    per_seed_metrics.append(metrics)

    # brief per-seed printout
    log_bits = []
    for k in ["test_loss", "val_loss", "train_loss"]:
        if k in metrics and isinstance(metrics[k], (Number, np.floating, np.integer)):
            log_bits.append(f"{k}={float(metrics[k]):.6f}")
    print(f"  Seed {s}: " + (", ".join(log_bits) if log_bits else str(metrics)))

# Aggregate mean/std per numeric key
aggregates = {}
for k in numeric_keys:
    vals = [float(m[k]) for m in per_seed_metrics]
    mean_k = float(np.mean(vals))
    std_k = float(np.std(vals, ddof=1)) if len(vals) > 1 else 0.0
    aggregates[k] = {"mean": mean_k, "std": std_k}

# Save YAML-like txt
out_file = os.path.join(save_path, "best_results_mean.txt")
with open(out_file, "w", encoding="utf-8") as f:
    f.write("# Overall best trial re-evaluation across model seeds\n")
    f.write(f"trial_number: {best_trial.number}\n")
    if model_name == "ViT_with_register_tokens":
        f.write(f"patch_size: {best_patch}\n")
    f.write(f"val_objective_best: {best_trial.value:.6f}\n")
    f.write(f"direction: {'minimize' if minimize else 'maximize'}\n")
    f.write(f"seeds: {model_seeds}\n")
    f.write("per_seed_metrics:\n")
    for s, m in zip(model_seeds, per_seed_metrics):
        f.write(f"  - seed: {s}\n")
        for k in numeric_keys:
            f.write(f"      {k}: {float(m[k]):.6f}\n")
    f.write("aggregates:\n")
    for k, mm in aggregates.items():
        f.write(f"  {k}:\n")
        f.write(f"    mean: {mm['mean']:.6f}\n")
        f.write(f"    std: {mm['std']:.6f}\n")

# Console summary
if "test_loss" in aggregates:
    print("  → test_loss Mean ± Std: "
          f"{aggregates['test_loss']['mean']:.6f} ± {aggregates['test_loss']['std']:.6f}")
elif "val_loss" in aggregates:
    print("  → val_loss Mean ± Std: "
          f"{aggregates['val_loss']['mean']:.6f} ± {aggregates['val_loss']['std']:.6f}")

print(f"Saved to: {out_file}")


Evaluating overall best trial (Trial 47, ValObjective: 3.3384

Best Trial: 47
  Best Score: 3.3384
  Best Hyperparameters:
    model: model2
    mlp_hidden_dims: [32, 16]
    fusion_hidden_dims: [256, 128, 64]
    max_lr: 0.0029687835319354306
    div_factor: 67
    final_div_factor: 631
    weight_decay: 6.9146029953649685e-06
    epochs: 200
Best model saved to logs/Regression/boston/CNN_hybrid/TINTO_blur/best_model/trial_47/best_model.pth
  Seed 0: test_loss=7.140422, val_loss=11.210828, train_loss=2.538118

Best Trial: 47
  Best Score: 3.3384
  Best Hyperparameters:
    model: model2
    mlp_hidden_dims: [32, 16]
    fusion_hidden_dims: [256, 128, 64]
    max_lr: 0.0029687835319354306
    div_factor: 67
    final_div_factor: 631
    weight_decay: 6.9146029953649685e-06
    epochs: 200
Best model saved to logs/Regression/boston/CNN_hybrid/TINTO_blur/best_model/trial_47/best_model.pth
  Seed 1: test_loss=15.777128, val_loss=13.903922, train_loss=9.168005

Best Trial: 47
  Best Score

### EXPERIMENT: IGTD

In [104]:
#Select the model and the parameters
if task_type.lower() == "regression":
    problem_type = "regression"
else:
    problem_type = "supervised"
name = f"IGTD"

#Define the dataset path and the folder where the images will be saved
images_folder = f"SyntheticImages/{task_type}/{dataset_name}/{name}"

In [105]:
train_loader, val_loader, test_loader, attributes, imgs_shape, label_encoder, class_weight  = load_and_preprocess_data(df, dataset_name, images_folder, problem_type, task_type, seed=SEED, batch_size=batch_size, device=device)

Shapes — Train: (354, 22), Val: (76, 22), Test: (76, 22)
Numerical features: 11 — ['CRIM', 'ZN', 'INDUS', 'NOX', 'RM', 'AGE', 'DIS', 'TAX', 'PTRATIO', 'B', 'LSTAT']
Categorical features: 2 — ['CHAS', 'RAD']
Total features: 22
SyntheticImages/Regression/boston/IGTD/train/regression.csv
SyntheticImages/Regression/boston/IGTD/val/regression.csv
SyntheticImages/Regression/boston/IGTD/test/regression.csv
Images shape:  (3, 5, 5)
Attributes:  22


In [106]:
# Determine possible patch sizes for the Vision Transformer by finding divisors of the image width
divisors = find_divisors(imgs_shape[1])
divisors

[1, 5]

In [107]:
divisors = [1, 5]

In [108]:
import optuna
study = optuna.create_study(direction="minimize" if task_type.lower() == "regression" else "maximize")
study.optimize(lambda trial: objective(
    trial=trial,
    model_name=model_name,
    image_name=name,
    task_type=task_type,
    num_classes=num_classes,
    train_loader=train_loader,
    val_loader=val_loader,
    test_loader=test_loader,
    divisors=divisors,
    attributes=attributes,
    imgs_shape=imgs_shape,
    device=device,
    save_dir=save_dir,
    class_weight=class_weight,
    epochs=epochs
), n_trials=n_trials)

[I 2025-09-14 10:41:43,845] A new study created in memory with name: no-name-5411d37d-257e-4fdb-9067-fdb241cb450d
[I 2025-09-14 10:41:58,150] Trial 0 finished with value: 4.358333777773502 and parameters: {'model': 'model1', 'mlp_hidden_dims': '[256, 128, 64]', 'fusion_hidden_dims': '[128, 64, 32]', 'max_lr': 0.0003451368516727994, 'div_factor': 43, 'final_div_factor': 807, 'weight_decay': 3.3282004097919553e-06, 'epochs': 200}. Best is trial 0 with value: 4.358333777773502.
[I 2025-09-14 10:42:05,090] Trial 1 finished with value: 4.512188616969852 and parameters: {'model': 'model1', 'mlp_hidden_dims': '[256, 128, 64]', 'fusion_hidden_dims': '[32, 16]', 'max_lr': 0.0010843518820103527, 'div_factor': 14, 'final_div_factor': 650, 'weight_decay': 0.005608433865874547, 'epochs': 100}. Best is trial 0 with value: 4.358333777773502.
[I 2025-09-14 10:42:12,964] Trial 2 finished with value: 4.101834765888952 and parameters: {'model': 'model2', 'mlp_hidden_dims': '[256, 128, 64]', 'fusion_hidde

In [109]:
from numbers import Number

# --- Configure which seeds to use for stability reporting ---
model_seeds = [0, 1, 2, 3, 4]   # change as needed
numeric_keys = None  # we’ll infer from first run

# Determine study direction safely (minimize by default if unknown)
def is_minimize_study(study):
    try:
        return study.direction == optuna.study.StudyDirection.MINIMIZE
    except Exception:
        try:
            return study.directions[0] == optuna.study.StudyDirection.MINIMIZE
        except Exception:
            return True  # fallback

minimize = is_minimize_study(study)

# --- Pick the single best completed trial across ALL patch sizes ---
completed = [t for t in study.trials if t.state == optuna.trial.TrialState.COMPLETE]
if not completed:
    raise RuntimeError("No completed trials in the study.")

best_trial = (min if minimize else max)(completed, key=lambda t: t.value)

if model_name == "ViT_hybrid":
    # Extract its patch_size (may be None if not used in search)
    best_patch = best_trial.params.get("patch_size", None)
    trial_name = f"trial_{best_trial.number}_patch{best_patch}"
    print(f"\nEvaluating overall best trial "
      f"(Trial {best_trial.number}, ValObjective: {best_trial.value:.4f}, "
      f"patch_size={best_patch})")
else:
    trial_name = f"trial_{best_trial.number}"
    print(f"\nEvaluating overall best trial "
      f"(Trial {best_trial.number}, ValObjective: {best_trial.value:.4f}")
    
save_path = os.path.join(save_dir, f"{model_name}/{name}/best_model/{trial_name}")
os.makedirs(save_path, exist_ok=True)

per_seed_metrics = []

for s in model_seeds:
    set_model_seed(s)
    metrics = evaluate_best_model(
        best_trial,
        train_loader, val_loader, test_loader,
        dataset_name=dataset_name,
        image_name=name,          # keep your original arg
        task_type=task_type,
        save_dir=save_dir,
        attributes=attributes,
        imgs_shape=imgs_shape,
        class_weight=class_weight,
        num_classes=num_classes,
        epochs=epochs,
        trial_name=trial_name
        # If evaluate_best_model accepts a seed arg, pass model_seed=s
    )
    if not isinstance(metrics, dict):
        raise TypeError(f"evaluate_best_model must return dict, got: {type(metrics)}")

    # infer numeric keys once (ints, floats, numpy scalars)
    if numeric_keys is None:
        numeric_keys = [k for k, v in metrics.items()
                        if isinstance(v, (Number, np.floating, np.integer))]
    per_seed_metrics.append(metrics)

    # brief per-seed printout
    log_bits = []
    for k in ["test_loss", "val_loss", "train_loss"]:
        if k in metrics and isinstance(metrics[k], (Number, np.floating, np.integer)):
            log_bits.append(f"{k}={float(metrics[k]):.6f}")
    print(f"  Seed {s}: " + (", ".join(log_bits) if log_bits else str(metrics)))

# Aggregate mean/std per numeric key
aggregates = {}
for k in numeric_keys:
    vals = [float(m[k]) for m in per_seed_metrics]
    mean_k = float(np.mean(vals))
    std_k = float(np.std(vals, ddof=1)) if len(vals) > 1 else 0.0
    aggregates[k] = {"mean": mean_k, "std": std_k}

# Save YAML-like txt
out_file = os.path.join(save_path, "best_results_mean.txt")
with open(out_file, "w", encoding="utf-8") as f:
    f.write("# Overall best trial re-evaluation across model seeds\n")
    f.write(f"trial_number: {best_trial.number}\n")
    if model_name == "ViT_with_register_tokens":
        f.write(f"patch_size: {best_patch}\n")
    f.write(f"val_objective_best: {best_trial.value:.6f}\n")
    f.write(f"direction: {'minimize' if minimize else 'maximize'}\n")
    f.write(f"seeds: {model_seeds}\n")
    f.write("per_seed_metrics:\n")
    for s, m in zip(model_seeds, per_seed_metrics):
        f.write(f"  - seed: {s}\n")
        for k in numeric_keys:
            f.write(f"      {k}: {float(m[k]):.6f}\n")
    f.write("aggregates:\n")
    for k, mm in aggregates.items():
        f.write(f"  {k}:\n")
        f.write(f"    mean: {mm['mean']:.6f}\n")
        f.write(f"    std: {mm['std']:.6f}\n")

# Console summary
if "test_loss" in aggregates:
    print("  → test_loss Mean ± Std: "
          f"{aggregates['test_loss']['mean']:.6f} ± {aggregates['test_loss']['std']:.6f}")
elif "val_loss" in aggregates:
    print("  → val_loss Mean ± Std: "
          f"{aggregates['val_loss']['mean']:.6f} ± {aggregates['val_loss']['std']:.6f}")

print(f"Saved to: {out_file}")


Evaluating overall best trial (Trial 39, ValObjective: 3.2595

Best Trial: 39
  Best Score: 3.2595
  Best Hyperparameters:
    model: model1
    mlp_hidden_dims: [128, 64, 32]
    fusion_hidden_dims: [256, 128, 64]
    max_lr: 0.010206934596553034
    div_factor: 59
    final_div_factor: 744
    weight_decay: 7.501393203667051e-06
    epochs: 200
Best model saved to logs/Regression/boston/CNN_hybrid/IGTD/best_model/trial_39/best_model.pth
  Seed 0: test_loss=10.129778, val_loss=16.677411, train_loss=7.032120

Best Trial: 39
  Best Score: 3.2595
  Best Hyperparameters:
    model: model1
    mlp_hidden_dims: [128, 64, 32]
    fusion_hidden_dims: [256, 128, 64]
    max_lr: 0.010206934596553034
    div_factor: 59
    final_div_factor: 744
    weight_decay: 7.501393203667051e-06
    epochs: 200
Best model saved to logs/Regression/boston/CNN_hybrid/IGTD/best_model/trial_39/best_model.pth
  Seed 1: test_loss=16.714024, val_loss=12.716216, train_loss=29.437077

Best Trial: 39
  Best Score: 3.

### REFINED

In [110]:
#Select the model and the parameters
if task_type.lower() == "regression":
    problem_type = "regression"
else:
    problem_type = "supervised"
name = f"REFINED"

#Define the dataset path and the folder where the images will be saved
images_folder = f"SyntheticImages/{task_type}/{dataset_name}/{name}"

In [111]:
train_loader, val_loader, test_loader, attributes, imgs_shape, label_encoder, class_weight  = load_and_preprocess_data(df, dataset_name, images_folder, problem_type, task_type, seed=SEED, batch_size=batch_size, device=device)

Shapes — Train: (354, 22), Val: (76, 22), Test: (76, 22)
Numerical features: 11 — ['CRIM', 'ZN', 'INDUS', 'NOX', 'RM', 'AGE', 'DIS', 'TAX', 'PTRATIO', 'B', 'LSTAT']
Categorical features: 2 — ['CHAS', 'RAD']
Total features: 22
SyntheticImages/Regression/boston/REFINED/train/regression.csv
SyntheticImages/Regression/boston/REFINED/val/regression.csv
SyntheticImages/Regression/boston/REFINED/test/regression.csv
Images shape:  (3, 5, 5)
Attributes:  22


In [112]:
# Determine possible patch sizes for the Vision Transformer by finding divisors of the image width
divisors = find_divisors(imgs_shape[1])
divisors

[1, 5]

In [113]:
divisors = [1, 5]

In [114]:
import optuna
study = optuna.create_study(direction="minimize" if task_type.lower() == "regression" else "maximize")
study.optimize(lambda trial: objective(
    trial=trial,
    model_name=model_name,
    image_name=name,
    task_type=task_type,
    num_classes=num_classes,
    train_loader=train_loader,
    val_loader=val_loader,
    test_loader=test_loader,
    divisors=divisors,
    attributes=attributes,
    imgs_shape=imgs_shape,
    device=device,
    save_dir=save_dir,
    class_weight=class_weight,
    epochs=epochs
), n_trials=n_trials)

[I 2025-09-14 10:54:35,838] A new study created in memory with name: no-name-641181e4-9868-483b-a3c6-1472e1acd464
[I 2025-09-14 10:54:59,469] Trial 0 finished with value: 3.9462554711203337 and parameters: {'model': 'model2', 'mlp_hidden_dims': '[256, 128, 64]', 'fusion_hidden_dims': '[32, 16]', 'max_lr': 0.07421323692029864, 'div_factor': 45, 'final_div_factor': 490, 'weight_decay': 1.2961812726190303e-06, 'epochs': 200}. Best is trial 0 with value: 3.9462554711203337.
[I 2025-09-14 10:55:13,386] Trial 1 finished with value: 3.9952279473921806 and parameters: {'model': 'model2', 'mlp_hidden_dims': '[256, 128, 64]', 'fusion_hidden_dims': '[128, 64, 32]', 'max_lr': 0.039462993712497584, 'div_factor': 83, 'final_div_factor': 341, 'weight_decay': 1.1850444158530227e-06, 'epochs': 200}. Best is trial 0 with value: 3.9462554711203337.
[I 2025-09-14 10:55:20,681] Trial 2 finished with value: 3.715108033736239 and parameters: {'model': 'model2', 'mlp_hidden_dims': '[128, 64, 32]', 'fusion_hid

In [115]:
best_trial.params

{'model': 'model1',
 'mlp_hidden_dims': '[128, 64, 32]',
 'fusion_hidden_dims': '[256, 128, 64]',
 'max_lr': 0.010206934596553034,
 'div_factor': 59,
 'final_div_factor': 744,
 'weight_decay': 7.501393203667051e-06,
 'epochs': 200}

In [116]:
from numbers import Number

# --- Configure which seeds to use for stability reporting ---
model_seeds = [0, 1, 2, 3, 4]   # change as needed
numeric_keys = None  # we’ll infer from first run

# Determine study direction safely (minimize by default if unknown)
def is_minimize_study(study):
    try:
        return study.direction == optuna.study.StudyDirection.MINIMIZE
    except Exception:
        try:
            return study.directions[0] == optuna.study.StudyDirection.MINIMIZE
        except Exception:
            return True  # fallback

minimize = is_minimize_study(study)

# --- Pick the single best completed trial across ALL patch sizes ---
completed = [t for t in study.trials if t.state == optuna.trial.TrialState.COMPLETE]
if not completed:
    raise RuntimeError("No completed trials in the study.")

best_trial = (min if minimize else max)(completed, key=lambda t: t.value)

if model_name == "ViT_hybrid":
    # Extract its patch_size (may be None if not used in search)
    best_patch = best_trial.params.get("patch_size", None)
    trial_name = f"trial_{best_trial.number}_patch{best_patch}"
    print(f"\nEvaluating overall best trial "
      f"(Trial {best_trial.number}, ValObjective: {best_trial.value:.4f}, "
      f"patch_size={best_patch})")
else:
    trial_name = f"trial_{best_trial.number}"
    print(f"\nEvaluating overall best trial "
      f"(Trial {best_trial.number}, ValObjective: {best_trial.value:.4f}")
    
save_path = os.path.join(save_dir, f"{model_name}/{name}/best_model/{trial_name}")
os.makedirs(save_path, exist_ok=True)

per_seed_metrics = []

for s in model_seeds:
    set_model_seed(s)
    metrics = evaluate_best_model(
        best_trial,
        train_loader, val_loader, test_loader,
        dataset_name=dataset_name,
        image_name=name,          # keep your original arg
        task_type=task_type,
        save_dir=save_dir,
        attributes=attributes,
        imgs_shape=imgs_shape,
        class_weight=class_weight,
        num_classes=num_classes,
        epochs=epochs,
        trial_name=trial_name
        # If evaluate_best_model accepts a seed arg, pass model_seed=s
    )
    if not isinstance(metrics, dict):
        raise TypeError(f"evaluate_best_model must return dict, got: {type(metrics)}")

    # infer numeric keys once (ints, floats, numpy scalars)
    if numeric_keys is None:
        numeric_keys = [k for k, v in metrics.items()
                        if isinstance(v, (Number, np.floating, np.integer))]
    per_seed_metrics.append(metrics)

    # brief per-seed printout
    log_bits = []
    for k in ["test_loss", "val_loss", "train_loss"]:
        if k in metrics and isinstance(metrics[k], (Number, np.floating, np.integer)):
            log_bits.append(f"{k}={float(metrics[k]):.6f}")
    print(f"  Seed {s}: " + (", ".join(log_bits) if log_bits else str(metrics)))

# Aggregate mean/std per numeric key
aggregates = {}
for k in numeric_keys:
    vals = [float(m[k]) for m in per_seed_metrics]
    mean_k = float(np.mean(vals))
    std_k = float(np.std(vals, ddof=1)) if len(vals) > 1 else 0.0
    aggregates[k] = {"mean": mean_k, "std": std_k}

# Save YAML-like txt
out_file = os.path.join(save_path, "best_results_mean.txt")
with open(out_file, "w", encoding="utf-8") as f:
    f.write("# Overall best trial re-evaluation across model seeds\n")
    f.write(f"trial_number: {best_trial.number}\n")
    if model_name == "ViT_with_register_tokens":
        f.write(f"patch_size: {best_patch}\n")
    f.write(f"val_objective_best: {best_trial.value:.6f}\n")
    f.write(f"direction: {'minimize' if minimize else 'maximize'}\n")
    f.write(f"seeds: {model_seeds}\n")
    f.write("per_seed_metrics:\n")
    for s, m in zip(model_seeds, per_seed_metrics):
        f.write(f"  - seed: {s}\n")
        for k in numeric_keys:
            f.write(f"      {k}: {float(m[k]):.6f}\n")
    f.write("aggregates:\n")
    for k, mm in aggregates.items():
        f.write(f"  {k}:\n")
        f.write(f"    mean: {mm['mean']:.6f}\n")
        f.write(f"    std: {mm['std']:.6f}\n")

# Console summary
if "test_loss" in aggregates:
    print("  → test_loss Mean ± Std: "
          f"{aggregates['test_loss']['mean']:.6f} ± {aggregates['test_loss']['std']:.6f}")
elif "val_loss" in aggregates:
    print("  → val_loss Mean ± Std: "
          f"{aggregates['val_loss']['mean']:.6f} ± {aggregates['val_loss']['std']:.6f}")

print(f"Saved to: {out_file}")


Evaluating overall best trial (Trial 21, ValObjective: 2.9518

Best Trial: 21
  Best Score: 2.9518
  Best Hyperparameters:
    model: model1
    mlp_hidden_dims: [32, 16]
    fusion_hidden_dims: [128, 64, 32]
    max_lr: 0.00926620802289801
    div_factor: 87
    final_div_factor: 989
    weight_decay: 0.0006205398683914304
    epochs: 200
Best model saved to logs/Regression/boston/CNN_hybrid/REFINED/best_model/trial_21/best_model.pth
  Seed 0: test_loss=5.454566, val_loss=10.339561, train_loss=1.648208

Best Trial: 21
  Best Score: 2.9518
  Best Hyperparameters:
    model: model1
    mlp_hidden_dims: [32, 16]
    fusion_hidden_dims: [128, 64, 32]
    max_lr: 0.00926620802289801
    div_factor: 87
    final_div_factor: 989
    weight_decay: 0.0006205398683914304
    epochs: 200
Best model saved to logs/Regression/boston/CNN_hybrid/REFINED/best_model/trial_21/best_model.pth
  Seed 1: test_loss=4.963115, val_loss=10.290258, train_loss=1.379813

Best Trial: 21
  Best Score: 2.9518
  Best

### EXPERIMENT: BarGraph

In [117]:
#Select the model and the parameters
if task_type.lower() == "regression":
    problem_type = "regression"
else:
    problem_type = "supervised"

name = f"BarGraph"

#Define the dataset path and the folder where the images will be saved
images_folder = f"SyntheticImages/{task_type}/{dataset_name}/{name}"

In [118]:
train_loader, val_loader, test_loader, attributes, imgs_shape, label_encoder, class_weight  = load_and_preprocess_data(df, dataset_name, images_folder, problem_type, task_type, seed=SEED, batch_size=batch_size, device=device)

Shapes — Train: (354, 22), Val: (76, 22), Test: (76, 22)
Numerical features: 11 — ['CRIM', 'ZN', 'INDUS', 'NOX', 'RM', 'AGE', 'DIS', 'TAX', 'PTRATIO', 'B', 'LSTAT']
Categorical features: 2 — ['CHAS', 'RAD']
Total features: 22
SyntheticImages/Regression/boston/BarGraph/train/regression.csv
SyntheticImages/Regression/boston/BarGraph/val/regression.csv
SyntheticImages/Regression/boston/BarGraph/test/regression.csv
Images shape:  (3, 22, 22)
Attributes:  22


In [119]:
# Determine possible patch sizes for the Vision Transformer by finding divisors of the image width
divisors = find_divisors(imgs_shape[1])
divisors

[1, 2, 11, 22]

In [120]:
divisors = [2, 11, 22]

In [121]:
import optuna
study = optuna.create_study(direction="minimize" if task_type.lower() == "regression" else "maximize")
study.optimize(lambda trial: objective(
    trial=trial,
    model_name=model_name,
    image_name=name,
    task_type=task_type,
    num_classes=num_classes,
    train_loader=train_loader,
    val_loader=val_loader,
    test_loader=test_loader,
    divisors=divisors,
    attributes=attributes,
    imgs_shape=imgs_shape,
    device=device,
    save_dir=save_dir,
    class_weight=class_weight,
    epochs=epochs
), n_trials=n_trials)

[I 2025-09-14 11:06:20,877] A new study created in memory with name: no-name-6037fe66-4140-4a6c-a9dc-add176f5ab23
[I 2025-09-14 11:06:28,296] Trial 0 finished with value: 3.232899647234562 and parameters: {'model': 'model1', 'mlp_hidden_dims': '[32, 16]', 'fusion_hidden_dims': '[128, 64, 32]', 'max_lr': 0.0027462098495296574, 'div_factor': 83, 'final_div_factor': 147, 'weight_decay': 0.004282533984619644, 'epochs': 100}. Best is trial 0 with value: 3.232899647234562.
[I 2025-09-14 11:06:44,868] Trial 1 finished with value: 4.037967024208919 and parameters: {'model': 'model2', 'mlp_hidden_dims': '[256, 128, 64]', 'fusion_hidden_dims': '[256, 128, 64]', 'max_lr': 0.05971308088854196, 'div_factor': 97, 'final_div_factor': 838, 'weight_decay': 0.0018126455400841986, 'epochs': 200}. Best is trial 0 with value: 3.232899647234562.
[I 2025-09-14 11:06:52,154] Trial 2 finished with value: 3.2910033092777717 and parameters: {'model': 'model1', 'mlp_hidden_dims': '[32, 16]', 'fusion_hidden_dims':

In [122]:
from numbers import Number

# --- Configure which seeds to use for stability reporting ---
model_seeds = [0, 1, 2, 3, 4]   # change as needed
numeric_keys = None  # we’ll infer from first run

# Determine study direction safely (minimize by default if unknown)
def is_minimize_study(study):
    try:
        return study.direction == optuna.study.StudyDirection.MINIMIZE
    except Exception:
        try:
            return study.directions[0] == optuna.study.StudyDirection.MINIMIZE
        except Exception:
            return True  # fallback

minimize = is_minimize_study(study)

# --- Pick the single best completed trial across ALL patch sizes ---
completed = [t for t in study.trials if t.state == optuna.trial.TrialState.COMPLETE]
if not completed:
    raise RuntimeError("No completed trials in the study.")

best_trial = (min if minimize else max)(completed, key=lambda t: t.value)

if model_name == "ViT_hybrid":
    # Extract its patch_size (may be None if not used in search)
    best_patch = best_trial.params.get("patch_size", None)
    trial_name = f"trial_{best_trial.number}_patch{best_patch}"
    print(f"\nEvaluating overall best trial "
      f"(Trial {best_trial.number}, ValObjective: {best_trial.value:.4f}, "
      f"patch_size={best_patch})")
else:
    trial_name = f"trial_{best_trial.number}"
    print(f"\nEvaluating overall best trial "
      f"(Trial {best_trial.number}, ValObjective: {best_trial.value:.4f}")
    
save_path = os.path.join(save_dir, f"{model_name}/{name}/best_model/{trial_name}")
os.makedirs(save_path, exist_ok=True)

per_seed_metrics = []

for s in model_seeds:
    set_model_seed(s)
    metrics = evaluate_best_model(
        best_trial,
        train_loader, val_loader, test_loader,
        dataset_name=dataset_name,
        image_name=name,          # keep your original arg
        task_type=task_type,
        save_dir=save_dir,
        attributes=attributes,
        imgs_shape=imgs_shape,
        class_weight=class_weight,
        num_classes=num_classes,
        epochs=epochs,
        trial_name=trial_name
        # If evaluate_best_model accepts a seed arg, pass model_seed=s
    )
    if not isinstance(metrics, dict):
        raise TypeError(f"evaluate_best_model must return dict, got: {type(metrics)}")

    # infer numeric keys once (ints, floats, numpy scalars)
    if numeric_keys is None:
        numeric_keys = [k for k, v in metrics.items()
                        if isinstance(v, (Number, np.floating, np.integer))]
    per_seed_metrics.append(metrics)

    # brief per-seed printout
    log_bits = []
    for k in ["test_loss", "val_loss", "train_loss"]:
        if k in metrics and isinstance(metrics[k], (Number, np.floating, np.integer)):
            log_bits.append(f"{k}={float(metrics[k]):.6f}")
    print(f"  Seed {s}: " + (", ".join(log_bits) if log_bits else str(metrics)))

# Aggregate mean/std per numeric key
aggregates = {}
for k in numeric_keys:
    vals = [float(m[k]) for m in per_seed_metrics]
    mean_k = float(np.mean(vals))
    std_k = float(np.std(vals, ddof=1)) if len(vals) > 1 else 0.0
    aggregates[k] = {"mean": mean_k, "std": std_k}

# Save YAML-like txt
out_file = os.path.join(save_path, "best_results_mean.txt")
with open(out_file, "w", encoding="utf-8") as f:
    f.write("# Overall best trial re-evaluation across model seeds\n")
    f.write(f"trial_number: {best_trial.number}\n")
    if model_name == "ViT_with_register_tokens":
        f.write(f"patch_size: {best_patch}\n")
    f.write(f"val_objective_best: {best_trial.value:.6f}\n")
    f.write(f"direction: {'minimize' if minimize else 'maximize'}\n")
    f.write(f"seeds: {model_seeds}\n")
    f.write("per_seed_metrics:\n")
    for s, m in zip(model_seeds, per_seed_metrics):
        f.write(f"  - seed: {s}\n")
        for k in numeric_keys:
            f.write(f"      {k}: {float(m[k]):.6f}\n")
    f.write("aggregates:\n")
    for k, mm in aggregates.items():
        f.write(f"  {k}:\n")
        f.write(f"    mean: {mm['mean']:.6f}\n")
        f.write(f"    std: {mm['std']:.6f}\n")

# Console summary
if "test_loss" in aggregates:
    print("  → test_loss Mean ± Std: "
          f"{aggregates['test_loss']['mean']:.6f} ± {aggregates['test_loss']['std']:.6f}")
elif "val_loss" in aggregates:
    print("  → val_loss Mean ± Std: "
          f"{aggregates['val_loss']['mean']:.6f} ± {aggregates['val_loss']['std']:.6f}")

print(f"Saved to: {out_file}")


Evaluating overall best trial (Trial 43, ValObjective: 2.9736

Best Trial: 43
  Best Score: 2.9736
  Best Hyperparameters:
    model: model1
    mlp_hidden_dims: [128, 64, 32]
    fusion_hidden_dims: [256, 128, 64]
    max_lr: 0.0003684298952007884
    div_factor: 78
    final_div_factor: 242
    weight_decay: 6.0435502226795816e-05
    epochs: 200
Best model saved to logs/Regression/boston/CNN_hybrid/BarGraph/best_model/trial_43/best_model.pth
  Seed 0: test_loss=6.044407, val_loss=10.498914, train_loss=3.260387

Best Trial: 43
  Best Score: 2.9736
  Best Hyperparameters:
    model: model1
    mlp_hidden_dims: [128, 64, 32]
    fusion_hidden_dims: [256, 128, 64]
    max_lr: 0.0003684298952007884
    div_factor: 78
    final_div_factor: 242
    weight_decay: 6.0435502226795816e-05
    epochs: 200
Best model saved to logs/Regression/boston/CNN_hybrid/BarGraph/best_model/trial_43/best_model.pth
  Seed 1: test_loss=8.423077, val_loss=10.709731, train_loss=4.821782

Best Trial: 43
  Best 

### EXPERIMENT: DistanceMatrix

In [123]:
#Select the model and the parameters
if task_type.lower() == "regression":
    problem_type = "regression"
else:
    problem_type = "supervised"

name = f"DistanceMatrix"

#Define the dataset path and the folder where the images will be saved
images_folder = f"SyntheticImages/{task_type}/{dataset_name}/{name}"

In [124]:
train_loader, val_loader, test_loader, attributes, imgs_shape, label_encoder, class_weight  = load_and_preprocess_data(df, dataset_name, images_folder, problem_type, task_type, seed=SEED, batch_size=batch_size, device=device)

Shapes — Train: (354, 22), Val: (76, 22), Test: (76, 22)
Numerical features: 11 — ['CRIM', 'ZN', 'INDUS', 'NOX', 'RM', 'AGE', 'DIS', 'TAX', 'PTRATIO', 'B', 'LSTAT']
Categorical features: 2 — ['CHAS', 'RAD']
Total features: 22
SyntheticImages/Regression/boston/DistanceMatrix/train/regression.csv
SyntheticImages/Regression/boston/DistanceMatrix/val/regression.csv
SyntheticImages/Regression/boston/DistanceMatrix/test/regression.csv
Images shape:  (3, 22, 22)
Attributes:  22


In [125]:
# Determine possible patch sizes for the Vision Transformer by finding divisors of the image width
divisors = find_divisors(imgs_shape[1])
divisors

[1, 2, 11, 22]

In [126]:
divisors = [2, 11, 22]

In [127]:
import optuna
study = optuna.create_study(direction="minimize" if task_type.lower() == "regression" else "maximize")
study.optimize(lambda trial: objective(
    trial=trial,
    model_name=model_name,
    image_name=name,
    task_type=task_type,
    num_classes=num_classes,
    train_loader=train_loader,
    val_loader=val_loader,
    test_loader=test_loader,
    divisors=divisors,
    attributes=attributes,
    imgs_shape=imgs_shape,
    device=device,
    save_dir=save_dir,
    class_weight=class_weight,
    epochs=epochs
), n_trials=n_trials)

[I 2025-09-14 11:18:25,064] A new study created in memory with name: no-name-9c20f77f-71b0-4174-b5dc-d9690cc7641b
[I 2025-09-14 11:18:32,621] Trial 0 finished with value: 3.9298115098319433 and parameters: {'model': 'model1', 'mlp_hidden_dims': '[32, 16]', 'fusion_hidden_dims': '[128, 64, 32]', 'max_lr': 0.0048037538022195365, 'div_factor': 96, 'final_div_factor': 688, 'weight_decay': 0.001580949470005536, 'epochs': 100}. Best is trial 0 with value: 3.9298115098319433.
[I 2025-09-14 11:18:40,630] Trial 1 finished with value: 3.5240124827498356 and parameters: {'model': 'model2', 'mlp_hidden_dims': '[32, 16]', 'fusion_hidden_dims': '[32, 16]', 'max_lr': 0.034493874705165205, 'div_factor': 24, 'final_div_factor': 883, 'weight_decay': 0.0001598757945559384, 'epochs': 100}. Best is trial 1 with value: 3.5240124827498356.
[I 2025-09-14 11:18:57,124] Trial 2 finished with value: 3.4139011790136515 and parameters: {'model': 'model2', 'mlp_hidden_dims': '[128, 64, 32]', 'fusion_hidden_dims': '

In [128]:
from numbers import Number

# --- Configure which seeds to use for stability reporting ---
model_seeds = [0, 1, 2, 3, 4]   # change as needed
numeric_keys = None  # we’ll infer from first run

# Determine study direction safely (minimize by default if unknown)
def is_minimize_study(study):
    try:
        return study.direction == optuna.study.StudyDirection.MINIMIZE
    except Exception:
        try:
            return study.directions[0] == optuna.study.StudyDirection.MINIMIZE
        except Exception:
            return True  # fallback

minimize = is_minimize_study(study)

# --- Pick the single best completed trial across ALL patch sizes ---
completed = [t for t in study.trials if t.state == optuna.trial.TrialState.COMPLETE]
if not completed:
    raise RuntimeError("No completed trials in the study.")

best_trial = (min if minimize else max)(completed, key=lambda t: t.value)

if model_name == "ViT_hybrid":
    # Extract its patch_size (may be None if not used in search)
    best_patch = best_trial.params.get("patch_size", None)
    trial_name = f"trial_{best_trial.number}_patch{best_patch}"
    print(f"\nEvaluating overall best trial "
      f"(Trial {best_trial.number}, ValObjective: {best_trial.value:.4f}, "
      f"patch_size={best_patch})")
else:
    trial_name = f"trial_{best_trial.number}"
    print(f"\nEvaluating overall best trial "
      f"(Trial {best_trial.number}, ValObjective: {best_trial.value:.4f}")
    
save_path = os.path.join(save_dir, f"{model_name}/{name}/best_model/{trial_name}")
os.makedirs(save_path, exist_ok=True)

per_seed_metrics = []

for s in model_seeds:
    set_model_seed(s)
    metrics = evaluate_best_model(
        best_trial,
        train_loader, val_loader, test_loader,
        dataset_name=dataset_name,
        image_name=name,          # keep your original arg
        task_type=task_type,
        save_dir=save_dir,
        attributes=attributes,
        imgs_shape=imgs_shape,
        class_weight=class_weight,
        num_classes=num_classes,
        epochs=epochs,
        trial_name=trial_name
        # If evaluate_best_model accepts a seed arg, pass model_seed=s
    )
    if not isinstance(metrics, dict):
        raise TypeError(f"evaluate_best_model must return dict, got: {type(metrics)}")

    # infer numeric keys once (ints, floats, numpy scalars)
    if numeric_keys is None:
        numeric_keys = [k for k, v in metrics.items()
                        if isinstance(v, (Number, np.floating, np.integer))]
    per_seed_metrics.append(metrics)

    # brief per-seed printout
    log_bits = []
    for k in ["test_loss", "val_loss", "train_loss"]:
        if k in metrics and isinstance(metrics[k], (Number, np.floating, np.integer)):
            log_bits.append(f"{k}={float(metrics[k]):.6f}")
    print(f"  Seed {s}: " + (", ".join(log_bits) if log_bits else str(metrics)))

# Aggregate mean/std per numeric key
aggregates = {}
for k in numeric_keys:
    vals = [float(m[k]) for m in per_seed_metrics]
    mean_k = float(np.mean(vals))
    std_k = float(np.std(vals, ddof=1)) if len(vals) > 1 else 0.0
    aggregates[k] = {"mean": mean_k, "std": std_k}

# Save YAML-like txt
out_file = os.path.join(save_path, "best_results_mean.txt")
with open(out_file, "w", encoding="utf-8") as f:
    f.write("# Overall best trial re-evaluation across model seeds\n")
    f.write(f"trial_number: {best_trial.number}\n")
    if model_name == "ViT_with_register_tokens":
        f.write(f"patch_size: {best_patch}\n")
    f.write(f"val_objective_best: {best_trial.value:.6f}\n")
    f.write(f"direction: {'minimize' if minimize else 'maximize'}\n")
    f.write(f"seeds: {model_seeds}\n")
    f.write("per_seed_metrics:\n")
    for s, m in zip(model_seeds, per_seed_metrics):
        f.write(f"  - seed: {s}\n")
        for k in numeric_keys:
            f.write(f"      {k}: {float(m[k]):.6f}\n")
    f.write("aggregates:\n")
    for k, mm in aggregates.items():
        f.write(f"  {k}:\n")
        f.write(f"    mean: {mm['mean']:.6f}\n")
        f.write(f"    std: {mm['std']:.6f}\n")

# Console summary
if "test_loss" in aggregates:
    print("  → test_loss Mean ± Std: "
          f"{aggregates['test_loss']['mean']:.6f} ± {aggregates['test_loss']['std']:.6f}")
elif "val_loss" in aggregates:
    print("  → val_loss Mean ± Std: "
          f"{aggregates['val_loss']['mean']:.6f} ± {aggregates['val_loss']['std']:.6f}")

print(f"Saved to: {out_file}")


Evaluating overall best trial (Trial 9, ValObjective: 3.1797

Best Trial: 9
  Best Score: 3.1797
  Best Hyperparameters:
    model: model1
    mlp_hidden_dims: [32, 16]
    fusion_hidden_dims: [128, 64, 32]
    max_lr: 0.0007963190674188467
    div_factor: 21
    final_div_factor: 932
    weight_decay: 0.0009009590161594901
    epochs: 200
Best model saved to logs/Regression/boston/CNN_hybrid/DistanceMatrix/best_model/trial_9/best_model.pth
  Seed 0: test_loss=8.082563, val_loss=10.814391, train_loss=4.625947

Best Trial: 9
  Best Score: 3.1797
  Best Hyperparameters:
    model: model1
    mlp_hidden_dims: [32, 16]
    fusion_hidden_dims: [128, 64, 32]
    max_lr: 0.0007963190674188467
    div_factor: 21
    final_div_factor: 932
    weight_decay: 0.0009009590161594901
    epochs: 200
Best model saved to logs/Regression/boston/CNN_hybrid/DistanceMatrix/best_model/trial_9/best_model.pth
  Seed 1: test_loss=7.405877, val_loss=11.468610, train_loss=4.305085

Best Trial: 9
  Best Score: 3

### EXPERIMENT: Combination

In [129]:
#Select the model and the parameters
if task_type.lower() == "regression":
    problem_type = "regression"
else:
    problem_type = "supervised"

name = f"Combination"

#Define the dataset path and the folder where the images will be saved
images_folder = f"SyntheticImages/{task_type}/{dataset_name}/{name}"

In [130]:
train_loader, val_loader, test_loader, attributes, imgs_shape, label_encoder, class_weight  = load_and_preprocess_data(df, dataset_name, images_folder, problem_type, task_type, seed=SEED, batch_size=batch_size, device=device)

Shapes — Train: (354, 22), Val: (76, 22), Test: (76, 22)
Numerical features: 11 — ['CRIM', 'ZN', 'INDUS', 'NOX', 'RM', 'AGE', 'DIS', 'TAX', 'PTRATIO', 'B', 'LSTAT']
Categorical features: 2 — ['CHAS', 'RAD']
Total features: 22
SyntheticImages/Regression/boston/Combination/train/regression.csv
SyntheticImages/Regression/boston/Combination/val/regression.csv
SyntheticImages/Regression/boston/Combination/test/regression.csv
Images shape:  (3, 22, 22)
Attributes:  22


In [131]:
# Determine possible patch sizes for the Vision Transformer by finding divisors of the image width
divisors = find_divisors(imgs_shape[1])
divisors

[1, 2, 11, 22]

In [132]:
divisors = [2, 11, 22]

In [133]:
import optuna
study = optuna.create_study(direction="minimize" if task_type.lower() == "regression" else "maximize")
study.optimize(lambda trial: objective(
    trial=trial,
    model_name=model_name,
    image_name=name,
    task_type=task_type,
    num_classes=num_classes,
    train_loader=train_loader,
    val_loader=val_loader,
    test_loader=test_loader,
    divisors=divisors,
    attributes=attributes,
    imgs_shape=imgs_shape,
    device=device,
    save_dir=save_dir,
    class_weight=class_weight,
    epochs=epochs
), n_trials=n_trials)

[I 2025-09-14 11:31:02,194] A new study created in memory with name: no-name-3238a7e3-293f-4954-a139-1eeaf9e71c58
[I 2025-09-14 11:31:14,333] Trial 0 finished with value: 3.406160965902047 and parameters: {'model': 'model1', 'mlp_hidden_dims': '[256, 128, 64]', 'fusion_hidden_dims': '[32, 16]', 'max_lr': 0.005921090196946735, 'div_factor': 10, 'final_div_factor': 609, 'weight_decay': 1.299594428056685e-06, 'epochs': 200}. Best is trial 0 with value: 3.406160965902047.
[I 2025-09-14 11:31:20,623] Trial 1 finished with value: 3.239411491587417 and parameters: {'model': 'model1', 'mlp_hidden_dims': '[128, 64, 32]', 'fusion_hidden_dims': '[128, 64, 32]', 'max_lr': 0.001744237368750976, 'div_factor': 64, 'final_div_factor': 305, 'weight_decay': 0.009223875849562151, 'epochs': 100}. Best is trial 1 with value: 3.239411491587417.
[I 2025-09-14 11:31:27,867] Trial 2 finished with value: 3.4398518233212094 and parameters: {'model': 'model1', 'mlp_hidden_dims': '[32, 16]', 'fusion_hidden_dims': 

In [134]:
from numbers import Number

# --- Configure which seeds to use for stability reporting ---
model_seeds = [0, 1, 2, 3, 4]   # change as needed
numeric_keys = None  # we’ll infer from first run

# Determine study direction safely (minimize by default if unknown)
def is_minimize_study(study):
    try:
        return study.direction == optuna.study.StudyDirection.MINIMIZE
    except Exception:
        try:
            return study.directions[0] == optuna.study.StudyDirection.MINIMIZE
        except Exception:
            return True  # fallback

minimize = is_minimize_study(study)

# --- Pick the single best completed trial across ALL patch sizes ---
completed = [t for t in study.trials if t.state == optuna.trial.TrialState.COMPLETE]
if not completed:
    raise RuntimeError("No completed trials in the study.")

best_trial = (min if minimize else max)(completed, key=lambda t: t.value)

if model_name == "ViT_hybrid":
    # Extract its patch_size (may be None if not used in search)
    best_patch = best_trial.params.get("patch_size", None)
    trial_name = f"trial_{best_trial.number}_patch{best_patch}"
    print(f"\nEvaluating overall best trial "
      f"(Trial {best_trial.number}, ValObjective: {best_trial.value:.4f}, "
      f"patch_size={best_patch})")
else:
    trial_name = f"trial_{best_trial.number}"
    print(f"\nEvaluating overall best trial "
      f"(Trial {best_trial.number}, ValObjective: {best_trial.value:.4f}")
    
save_path = os.path.join(save_dir, f"{model_name}/{name}/best_model/{trial_name}")
os.makedirs(save_path, exist_ok=True)

per_seed_metrics = []

for s in model_seeds:
    set_model_seed(s)
    metrics = evaluate_best_model(
        best_trial,
        train_loader, val_loader, test_loader,
        dataset_name=dataset_name,
        image_name=name,          # keep your original arg
        task_type=task_type,
        save_dir=save_dir,
        attributes=attributes,
        imgs_shape=imgs_shape,
        class_weight=class_weight,
        num_classes=num_classes,
        epochs=epochs,
        trial_name=trial_name
        # If evaluate_best_model accepts a seed arg, pass model_seed=s
    )
    if not isinstance(metrics, dict):
        raise TypeError(f"evaluate_best_model must return dict, got: {type(metrics)}")

    # infer numeric keys once (ints, floats, numpy scalars)
    if numeric_keys is None:
        numeric_keys = [k for k, v in metrics.items()
                        if isinstance(v, (Number, np.floating, np.integer))]
    per_seed_metrics.append(metrics)

    # brief per-seed printout
    log_bits = []
    for k in ["test_loss", "val_loss", "train_loss"]:
        if k in metrics and isinstance(metrics[k], (Number, np.floating, np.integer)):
            log_bits.append(f"{k}={float(metrics[k]):.6f}")
    print(f"  Seed {s}: " + (", ".join(log_bits) if log_bits else str(metrics)))

# Aggregate mean/std per numeric key
aggregates = {}
for k in numeric_keys:
    vals = [float(m[k]) for m in per_seed_metrics]
    mean_k = float(np.mean(vals))
    std_k = float(np.std(vals, ddof=1)) if len(vals) > 1 else 0.0
    aggregates[k] = {"mean": mean_k, "std": std_k}

# Save YAML-like txt
out_file = os.path.join(save_path, "best_results_mean.txt")
with open(out_file, "w", encoding="utf-8") as f:
    f.write("# Overall best trial re-evaluation across model seeds\n")
    f.write(f"trial_number: {best_trial.number}\n")
    if model_name == "ViT_with_register_tokens":
        f.write(f"patch_size: {best_patch}\n")
    f.write(f"val_objective_best: {best_trial.value:.6f}\n")
    f.write(f"direction: {'minimize' if minimize else 'maximize'}\n")
    f.write(f"seeds: {model_seeds}\n")
    f.write("per_seed_metrics:\n")
    for s, m in zip(model_seeds, per_seed_metrics):
        f.write(f"  - seed: {s}\n")
        for k in numeric_keys:
            f.write(f"      {k}: {float(m[k]):.6f}\n")
    f.write("aggregates:\n")
    for k, mm in aggregates.items():
        f.write(f"  {k}:\n")
        f.write(f"    mean: {mm['mean']:.6f}\n")
        f.write(f"    std: {mm['std']:.6f}\n")

# Console summary
if "test_loss" in aggregates:
    print("  → test_loss Mean ± Std: "
          f"{aggregates['test_loss']['mean']:.6f} ± {aggregates['test_loss']['std']:.6f}")
elif "val_loss" in aggregates:
    print("  → val_loss Mean ± Std: "
          f"{aggregates['val_loss']['mean']:.6f} ± {aggregates['val_loss']['std']:.6f}")

print(f"Saved to: {out_file}")


Evaluating overall best trial (Trial 30, ValObjective: 3.0656

Best Trial: 30
  Best Score: 3.0656
  Best Hyperparameters:
    model: model1
    mlp_hidden_dims: [32, 16]
    fusion_hidden_dims: [32, 16]
    max_lr: 0.011602490799744314
    div_factor: 35
    final_div_factor: 549
    weight_decay: 0.00020702472794764584
    epochs: 100
Best model saved to logs/Regression/boston/CNN_hybrid/Combination/best_model/trial_30/best_model.pth
  Seed 0: test_loss=5.006338, val_loss=10.452549, train_loss=1.679147

Best Trial: 30
  Best Score: 3.0656
  Best Hyperparameters:
    model: model1
    mlp_hidden_dims: [32, 16]
    fusion_hidden_dims: [32, 16]
    max_lr: 0.011602490799744314
    div_factor: 35
    final_div_factor: 549
    weight_decay: 0.00020702472794764584
    epochs: 100
Best model saved to logs/Regression/boston/CNN_hybrid/Combination/best_model/trial_30/best_model.pth
  Seed 1: test_loss=19.252723, val_loss=11.748518, train_loss=10.421394

Best Trial: 30
  Best Score: 3.0656
  

### EXPERIMENT: SuperTML

In [135]:
#Select the model and the parameters
if task_type.lower() == "regression":
    problem_type = "regression"
else:
    problem_type = "supervised"

name = f"SuperTML"

#Define the dataset path and the folder where the images will be saved
images_folder = f"SyntheticImages/{task_type}/{dataset_name}/{name}"

In [136]:
train_loader, val_loader, test_loader, attributes, imgs_shape, label_encoder, class_weight  = load_and_preprocess_data(df, dataset_name, images_folder, problem_type, task_type, seed=SEED, batch_size=batch_size, device=device)

Shapes — Train: (354, 22), Val: (76, 22), Test: (76, 22)
Numerical features: 11 — ['CRIM', 'ZN', 'INDUS', 'NOX', 'RM', 'AGE', 'DIS', 'TAX', 'PTRATIO', 'B', 'LSTAT']
Categorical features: 2 — ['CHAS', 'RAD']
Total features: 22
SyntheticImages/Regression/boston/SuperTML/train/regression.csv
SyntheticImages/Regression/boston/SuperTML/val/regression.csv
SyntheticImages/Regression/boston/SuperTML/test/regression.csv
Images shape:  (3, 224, 224)
Attributes:  22


In [137]:
# Determine possible patch sizes for the Vision Transformer by finding divisors of the image width
divisors = find_divisors(imgs_shape[1])
divisors

[1, 2, 4, 7, 8, 14, 16, 28, 32, 56, 112, 224]

In [138]:
divisors = [16, 28, 32, 56, 112, 224]

In [139]:
import optuna
study = optuna.create_study(direction="minimize" if task_type.lower() == "regression" else "maximize")
study.optimize(lambda trial: objective(
    trial=trial,
    model_name=model_name,
    image_name=name,
    task_type=task_type,
    num_classes=num_classes,
    train_loader=train_loader,
    val_loader=val_loader,
    test_loader=test_loader,
    divisors=divisors,
    attributes=attributes,
    imgs_shape=imgs_shape,
    device=device,
    save_dir=save_dir,
    class_weight=class_weight,
    epochs=epochs
), n_trials=n_trials)

[I 2025-09-14 11:39:04,792] A new study created in memory with name: no-name-0039fe15-f1b0-4395-a7cc-7f7bb1a1bb20
[I 2025-09-14 11:39:55,710] Trial 0 finished with value: 4.5570694467770645 and parameters: {'model': 'model1', 'mlp_hidden_dims': '[32, 16]', 'fusion_hidden_dims': '[256, 128, 64]', 'max_lr': 0.0327206610643215, 'div_factor': 66, 'final_div_factor': 178, 'weight_decay': 0.00099995127208892, 'epochs': 100}. Best is trial 0 with value: 4.5570694467770645.
[I 2025-09-14 11:40:29,845] Trial 1 finished with value: 5.218189140836685 and parameters: {'model': 'model1', 'mlp_hidden_dims': '[32, 16]', 'fusion_hidden_dims': '[32, 16]', 'max_lr': 0.00013375434336693445, 'div_factor': 87, 'final_div_factor': 386, 'weight_decay': 0.007869675127958815, 'epochs': 200}. Best is trial 0 with value: 4.5570694467770645.
[I 2025-09-14 11:42:10,536] Trial 2 finished with value: 4.948986550825231 and parameters: {'model': 'model1', 'mlp_hidden_dims': '[256, 128, 64]', 'fusion_hidden_dims': '[25

In [140]:
from numbers import Number

# --- Configure which seeds to use for stability reporting ---
model_seeds = [0, 1, 2, 3, 4]   # change as needed
numeric_keys = None  # we’ll infer from first run

# Determine study direction safely (minimize by default if unknown)
def is_minimize_study(study):
    try:
        return study.direction == optuna.study.StudyDirection.MINIMIZE
    except Exception:
        try:
            return study.directions[0] == optuna.study.StudyDirection.MINIMIZE
        except Exception:
            return True  # fallback

minimize = is_minimize_study(study)

# --- Pick the single best completed trial across ALL patch sizes ---
completed = [t for t in study.trials if t.state == optuna.trial.TrialState.COMPLETE]
if not completed:
    raise RuntimeError("No completed trials in the study.")

best_trial = (min if minimize else max)(completed, key=lambda t: t.value)

if model_name == "ViT_hybrid":
    # Extract its patch_size (may be None if not used in search)
    best_patch = best_trial.params.get("patch_size", None)
    trial_name = f"trial_{best_trial.number}_patch{best_patch}"
    print(f"\nEvaluating overall best trial "
      f"(Trial {best_trial.number}, ValObjective: {best_trial.value:.4f}, "
      f"patch_size={best_patch})")
else:
    trial_name = f"trial_{best_trial.number}"
    print(f"\nEvaluating overall best trial "
      f"(Trial {best_trial.number}, ValObjective: {best_trial.value:.4f}")
    
save_path = os.path.join(save_dir, f"{model_name}/{name}/best_model/{trial_name}")
os.makedirs(save_path, exist_ok=True)

per_seed_metrics = []

for s in model_seeds:
    set_model_seed(s)
    metrics = evaluate_best_model(
        best_trial,
        train_loader, val_loader, test_loader,
        dataset_name=dataset_name,
        image_name=name,          # keep your original arg
        task_type=task_type,
        save_dir=save_dir,
        attributes=attributes,
        imgs_shape=imgs_shape,
        class_weight=class_weight,
        num_classes=num_classes,
        epochs=epochs,
        trial_name=trial_name
        # If evaluate_best_model accepts a seed arg, pass model_seed=s
    )
    if not isinstance(metrics, dict):
        raise TypeError(f"evaluate_best_model must return dict, got: {type(metrics)}")

    # infer numeric keys once (ints, floats, numpy scalars)
    if numeric_keys is None:
        numeric_keys = [k for k, v in metrics.items()
                        if isinstance(v, (Number, np.floating, np.integer))]
    per_seed_metrics.append(metrics)

    # brief per-seed printout
    log_bits = []
    for k in ["test_loss", "val_loss", "train_loss"]:
        if k in metrics and isinstance(metrics[k], (Number, np.floating, np.integer)):
            log_bits.append(f"{k}={float(metrics[k]):.6f}")
    print(f"  Seed {s}: " + (", ".join(log_bits) if log_bits else str(metrics)))

# Aggregate mean/std per numeric key
aggregates = {}
for k in numeric_keys:
    vals = [float(m[k]) for m in per_seed_metrics]
    mean_k = float(np.mean(vals))
    std_k = float(np.std(vals, ddof=1)) if len(vals) > 1 else 0.0
    aggregates[k] = {"mean": mean_k, "std": std_k}

# Save YAML-like txt
out_file = os.path.join(save_path, "best_results_mean.txt")
with open(out_file, "w", encoding="utf-8") as f:
    f.write("# Overall best trial re-evaluation across model seeds\n")
    f.write(f"trial_number: {best_trial.number}\n")
    if model_name == "ViT_with_register_tokens":
        f.write(f"patch_size: {best_patch}\n")
    f.write(f"val_objective_best: {best_trial.value:.6f}\n")
    f.write(f"direction: {'minimize' if minimize else 'maximize'}\n")
    f.write(f"seeds: {model_seeds}\n")
    f.write("per_seed_metrics:\n")
    for s, m in zip(model_seeds, per_seed_metrics):
        f.write(f"  - seed: {s}\n")
        for k in numeric_keys:
            f.write(f"      {k}: {float(m[k]):.6f}\n")
    f.write("aggregates:\n")
    for k, mm in aggregates.items():
        f.write(f"  {k}:\n")
        f.write(f"    mean: {mm['mean']:.6f}\n")
        f.write(f"    std: {mm['std']:.6f}\n")

# Console summary
if "test_loss" in aggregates:
    print("  → test_loss Mean ± Std: "
          f"{aggregates['test_loss']['mean']:.6f} ± {aggregates['test_loss']['std']:.6f}")
elif "val_loss" in aggregates:
    print("  → val_loss Mean ± Std: "
          f"{aggregates['val_loss']['mean']:.6f} ± {aggregates['val_loss']['std']:.6f}")

print(f"Saved to: {out_file}")


Evaluating overall best trial (Trial 44, ValObjective: 3.3444

Best Trial: 44
  Best Score: 3.3444
  Best Hyperparameters:
    model: model2
    mlp_hidden_dims: [256, 128, 64]
    fusion_hidden_dims: [128, 64, 32]
    max_lr: 0.005626285407491036
    div_factor: 99
    final_div_factor: 375
    weight_decay: 1.0875623097851136e-05
    epochs: 200
Best model saved to logs/Regression/boston/CNN_hybrid/SuperTML/best_model/trial_44/best_model.pth
  Seed 0: test_loss=13.792780, val_loss=13.493293, train_loss=1.380149

Best Trial: 44
  Best Score: 3.3444
  Best Hyperparameters:
    model: model2
    mlp_hidden_dims: [256, 128, 64]
    fusion_hidden_dims: [128, 64, 32]
    max_lr: 0.005626285407491036
    div_factor: 99
    final_div_factor: 375
    weight_decay: 1.0875623097851136e-05
    epochs: 200
Best model saved to logs/Regression/boston/CNN_hybrid/SuperTML/best_model/trial_44/best_model.pth
  Seed 1: test_loss=10.327184, val_loss=12.035148, train_loss=3.422039

Best Trial: 44
  Best 

### EXPERIMENT: FeatureWrap

In [141]:
#Select the model and the parameters
if task_type.lower() == "regression":
    problem_type = "regression"
else:
    problem_type = "supervised"

name = f"FeatureWrap"

#Define the dataset path and the folder where the images will be saved
images_folder = f"SyntheticImages/{task_type}/{dataset_name}/{name}"

In [142]:
train_loader, val_loader, test_loader, attributes, imgs_shape, label_encoder, class_weight  = load_and_preprocess_data(df, dataset_name, images_folder, problem_type, task_type, seed=SEED, batch_size=batch_size, device=device)

Shapes — Train: (354, 22), Val: (76, 22), Test: (76, 22)
Numerical features: 11 — ['CRIM', 'ZN', 'INDUS', 'NOX', 'RM', 'AGE', 'DIS', 'TAX', 'PTRATIO', 'B', 'LSTAT']
Categorical features: 2 — ['CHAS', 'RAD']
Total features: 22
SyntheticImages/Regression/boston/FeatureWrap/train/regression.csv
SyntheticImages/Regression/boston/FeatureWrap/val/regression.csv
SyntheticImages/Regression/boston/FeatureWrap/test/regression.csv
Images shape:  (3, 8, 8)
Attributes:  22


In [143]:
# Determine possible patch sizes for the Vision Transformer by finding divisors of the image width
divisors = find_divisors(imgs_shape[1])
divisors

[1, 2, 4, 8]

In [144]:
divisors = [2, 4, 8]

In [145]:
import optuna
study = optuna.create_study(direction="minimize" if task_type.lower() == "regression" else "maximize")
study.optimize(lambda trial: objective(
    trial=trial,
    model_name=model_name,
    image_name=name,
    task_type=task_type,
    num_classes=num_classes,
    train_loader=train_loader,
    val_loader=val_loader,
    test_loader=test_loader,
    divisors=divisors,
    attributes=attributes,
    imgs_shape=imgs_shape,
    device=device,
    save_dir=save_dir,
    class_weight=class_weight,
    epochs=epochs
), n_trials=n_trials)

[I 2025-09-14 12:46:32,520] A new study created in memory with name: no-name-1cc09252-3c93-4ada-9c46-45f3022eaf44
[I 2025-09-14 12:46:41,734] Trial 0 finished with value: 3.988215491698955 and parameters: {'model': 'model1', 'mlp_hidden_dims': '[32, 16]', 'fusion_hidden_dims': '[128, 64, 32]', 'max_lr': 0.009989641949744405, 'div_factor': 28, 'final_div_factor': 152, 'weight_decay': 1.4949469496120247e-06, 'epochs': 100}. Best is trial 0 with value: 3.988215491698955.
[I 2025-09-14 12:47:03,944] Trial 1 finished with value: 4.721181281056536 and parameters: {'model': 'model2', 'mlp_hidden_dims': '[32, 16]', 'fusion_hidden_dims': '[256, 128, 64]', 'max_lr': 0.08429556942019817, 'div_factor': 36, 'final_div_factor': 258, 'weight_decay': 5.783961555323382e-06, 'epochs': 200}. Best is trial 0 with value: 3.988215491698955.
[I 2025-09-14 12:47:14,019] Trial 2 finished with value: 5.050426261210223 and parameters: {'model': 'model2', 'mlp_hidden_dims': '[32, 16]', 'fusion_hidden_dims': '[128

In [146]:
from numbers import Number

# --- Configure which seeds to use for stability reporting ---
model_seeds = [0, 1, 2, 3, 4]   # change as needed
numeric_keys = None  # we’ll infer from first run

# Determine study direction safely (minimize by default if unknown)
def is_minimize_study(study):
    try:
        return study.direction == optuna.study.StudyDirection.MINIMIZE
    except Exception:
        try:
            return study.directions[0] == optuna.study.StudyDirection.MINIMIZE
        except Exception:
            return True  # fallback

minimize = is_minimize_study(study)

# --- Pick the single best completed trial across ALL patch sizes ---
completed = [t for t in study.trials if t.state == optuna.trial.TrialState.COMPLETE]
if not completed:
    raise RuntimeError("No completed trials in the study.")

best_trial = (min if minimize else max)(completed, key=lambda t: t.value)

if model_name == "ViT_hybrid":
    # Extract its patch_size (may be None if not used in search)
    best_patch = best_trial.params.get("patch_size", None)
    trial_name = f"trial_{best_trial.number}_patch{best_patch}"
    print(f"\nEvaluating overall best trial "
      f"(Trial {best_trial.number}, ValObjective: {best_trial.value:.4f}, "
      f"patch_size={best_patch})")
else:
    trial_name = f"trial_{best_trial.number}"
    print(f"\nEvaluating overall best trial "
      f"(Trial {best_trial.number}, ValObjective: {best_trial.value:.4f}")
    
save_path = os.path.join(save_dir, f"{model_name}/{name}/best_model/{trial_name}")
os.makedirs(save_path, exist_ok=True)

per_seed_metrics = []

for s in model_seeds:
    set_model_seed(s)
    metrics = evaluate_best_model(
        best_trial,
        train_loader, val_loader, test_loader,
        dataset_name=dataset_name,
        image_name=name,          # keep your original arg
        task_type=task_type,
        save_dir=save_dir,
        attributes=attributes,
        imgs_shape=imgs_shape,
        class_weight=class_weight,
        num_classes=num_classes,
        epochs=epochs,
        trial_name=trial_name
        # If evaluate_best_model accepts a seed arg, pass model_seed=s
    )
    if not isinstance(metrics, dict):
        raise TypeError(f"evaluate_best_model must return dict, got: {type(metrics)}")

    # infer numeric keys once (ints, floats, numpy scalars)
    if numeric_keys is None:
        numeric_keys = [k for k, v in metrics.items()
                        if isinstance(v, (Number, np.floating, np.integer))]
    per_seed_metrics.append(metrics)

    # brief per-seed printout
    log_bits = []
    for k in ["test_loss", "val_loss", "train_loss"]:
        if k in metrics and isinstance(metrics[k], (Number, np.floating, np.integer)):
            log_bits.append(f"{k}={float(metrics[k]):.6f}")
    print(f"  Seed {s}: " + (", ".join(log_bits) if log_bits else str(metrics)))

# Aggregate mean/std per numeric key
aggregates = {}
for k in numeric_keys:
    vals = [float(m[k]) for m in per_seed_metrics]
    mean_k = float(np.mean(vals))
    std_k = float(np.std(vals, ddof=1)) if len(vals) > 1 else 0.0
    aggregates[k] = {"mean": mean_k, "std": std_k}

# Save YAML-like txt
out_file = os.path.join(save_path, "best_results_mean.txt")
with open(out_file, "w", encoding="utf-8") as f:
    f.write("# Overall best trial re-evaluation across model seeds\n")
    f.write(f"trial_number: {best_trial.number}\n")
    if model_name == "ViT_with_register_tokens":
        f.write(f"patch_size: {best_patch}\n")
    f.write(f"val_objective_best: {best_trial.value:.6f}\n")
    f.write(f"direction: {'minimize' if minimize else 'maximize'}\n")
    f.write(f"seeds: {model_seeds}\n")
    f.write("per_seed_metrics:\n")
    for s, m in zip(model_seeds, per_seed_metrics):
        f.write(f"  - seed: {s}\n")
        for k in numeric_keys:
            f.write(f"      {k}: {float(m[k]):.6f}\n")
    f.write("aggregates:\n")
    for k, mm in aggregates.items():
        f.write(f"  {k}:\n")
        f.write(f"    mean: {mm['mean']:.6f}\n")
        f.write(f"    std: {mm['std']:.6f}\n")

# Console summary
if "test_loss" in aggregates:
    print("  → test_loss Mean ± Std: "
          f"{aggregates['test_loss']['mean']:.6f} ± {aggregates['test_loss']['std']:.6f}")
elif "val_loss" in aggregates:
    print("  → val_loss Mean ± Std: "
          f"{aggregates['val_loss']['mean']:.6f} ± {aggregates['val_loss']['std']:.6f}")

print(f"Saved to: {out_file}")


Evaluating overall best trial (Trial 47, ValObjective: 3.2669

Best Trial: 47
  Best Score: 3.2669
  Best Hyperparameters:
    model: model2
    mlp_hidden_dims: [32, 16]
    fusion_hidden_dims: [128, 64, 32]
    max_lr: 0.002215302390270786
    div_factor: 70
    final_div_factor: 275
    weight_decay: 7.875018674141369e-05
    epochs: 200
Best model saved to logs/Regression/boston/CNN_hybrid/FeatureWrap/best_model/trial_47/best_model.pth
  Seed 0: test_loss=13.219134, val_loss=13.527248, train_loss=8.586879

Best Trial: 47
  Best Score: 3.2669
  Best Hyperparameters:
    model: model2
    mlp_hidden_dims: [32, 16]
    fusion_hidden_dims: [128, 64, 32]
    max_lr: 0.002215302390270786
    div_factor: 70
    final_div_factor: 275
    weight_decay: 7.875018674141369e-05
    epochs: 200
Best model saved to logs/Regression/boston/CNN_hybrid/FeatureWrap/best_model/trial_47/best_model.pth
  Seed 1: test_loss=8.632610, val_loss=13.324207, train_loss=5.177723

Best Trial: 47
  Best Score: 3.

### EXPERIMENT: BIE

In [182]:
#Select the model and the parameters
if task_type.lower() == "regression":
    problem_type = "regression"
else:
    problem_type = "supervised"

name = f"BIE"

#Define the dataset path and the folder where the images will be saved
images_folder = f"SyntheticImages/{task_type}/{dataset_name}/{name}"

In [183]:
train_loader, val_loader, test_loader, attributes, imgs_shape, label_encoder, class_weight  = load_and_preprocess_data(df, dataset_name, images_folder, problem_type, task_type, seed=SEED, batch_size=batch_size, device=device)

Shapes — Train: (354, 22), Val: (76, 22), Test: (76, 22)
Numerical features: 11 — ['CRIM', 'ZN', 'INDUS', 'NOX', 'RM', 'AGE', 'DIS', 'TAX', 'PTRATIO', 'B', 'LSTAT']
Categorical features: 2 — ['CHAS', 'RAD']
Total features: 22
SyntheticImages/Regression/boston/BIE/train/regression.csv
SyntheticImages/Regression/boston/BIE/val/regression.csv
SyntheticImages/Regression/boston/BIE/test/regression.csv
Images shape:  (3, 64, 64)
Attributes:  22


In [184]:
# Determine possible patch sizes for the Vision Transformer by finding divisors of the image width
divisors = find_divisors(imgs_shape[1])
divisors

[1, 2, 4, 8, 16, 32, 64]

In [185]:
divisors = [4, 8, 16, 32]

In [186]:
import optuna
study = optuna.create_study(direction="minimize" if task_type.lower() == "regression" else "maximize")
study.optimize(lambda trial: objective(
    trial=trial,
    model_name=model_name,
    image_name=name,
    task_type=task_type,
    num_classes=num_classes,
    train_loader=train_loader,
    val_loader=val_loader,
    test_loader=test_loader,
    divisors=divisors,
    attributes=attributes,
    imgs_shape=imgs_shape,
    device=device,
    save_dir=save_dir,
    class_weight=class_weight,
    epochs=epochs
), n_trials=n_trials)

[I 2025-09-16 12:11:30,251] A new study created in memory with name: no-name-e213b718-e761-4eb3-b470-f119734c4ca9
[I 2025-09-16 12:11:48,409] Trial 0 finished with value: 6.225936034886485 and parameters: {'model': 'model2', 'mlp_hidden_dims': '[32, 16]', 'fusion_hidden_dims': '[32, 16]', 'max_lr': 0.0007341902290139219, 'div_factor': 34, 'final_div_factor': 947, 'weight_decay': 1.8270777248437804e-05, 'epochs': 200}. Best is trial 0 with value: 6.225936034886485.
[I 2025-09-16 12:12:08,629] Trial 1 finished with value: 6.081440391578965 and parameters: {'model': 'model1', 'mlp_hidden_dims': '[128, 64, 32]', 'fusion_hidden_dims': '[256, 128, 64]', 'max_lr': 0.005576498489807954, 'div_factor': 79, 'final_div_factor': 814, 'weight_decay': 1.024235003186395e-05, 'epochs': 200}. Best is trial 1 with value: 6.081440391578965.
[I 2025-09-16 12:12:16,344] Trial 2 finished with value: 6.196664823737374 and parameters: {'model': 'model1', 'mlp_hidden_dims': '[128, 64, 32]', 'fusion_hidden_dims'

In [187]:
from numbers import Number

# --- Configure which seeds to use for stability reporting ---
model_seeds = [0, 1, 2, 3, 4]   # change as needed
numeric_keys = None  # we’ll infer from first run

# Determine study direction safely (minimize by default if unknown)
def is_minimize_study(study):
    try:
        return study.direction == optuna.study.StudyDirection.MINIMIZE
    except Exception:
        try:
            return study.directions[0] == optuna.study.StudyDirection.MINIMIZE
        except Exception:
            return True  # fallback

minimize = is_minimize_study(study)

# --- Pick the single best completed trial across ALL patch sizes ---
completed = [t for t in study.trials if t.state == optuna.trial.TrialState.COMPLETE]
if not completed:
    raise RuntimeError("No completed trials in the study.")

best_trial = (min if minimize else max)(completed, key=lambda t: t.value)

if model_name == "ViT_hybrid":
    # Extract its patch_size (may be None if not used in search)
    best_patch = best_trial.params.get("patch_size", None)
    trial_name = f"trial_{best_trial.number}_patch{best_patch}"
    print(f"\nEvaluating overall best trial "
      f"(Trial {best_trial.number}, ValObjective: {best_trial.value:.4f}, "
      f"patch_size={best_patch})")
else:
    trial_name = f"trial_{best_trial.number}"
    print(f"\nEvaluating overall best trial "
      f"(Trial {best_trial.number}, ValObjective: {best_trial.value:.4f}")
    
save_path = os.path.join(save_dir, f"{model_name}/{name}/best_model/{trial_name}")
os.makedirs(save_path, exist_ok=True)

per_seed_metrics = []

for s in model_seeds:
    set_model_seed(s)
    metrics = evaluate_best_model(
        best_trial,
        train_loader, val_loader, test_loader,
        dataset_name=dataset_name,
        image_name=name,          # keep your original arg
        task_type=task_type,
        save_dir=save_dir,
        attributes=attributes,
        imgs_shape=imgs_shape,
        class_weight=class_weight,
        num_classes=num_classes,
        epochs=epochs,
        trial_name=trial_name
        # If evaluate_best_model accepts a seed arg, pass model_seed=s
    )
    if not isinstance(metrics, dict):
        raise TypeError(f"evaluate_best_model must return dict, got: {type(metrics)}")

    # infer numeric keys once (ints, floats, numpy scalars)
    if numeric_keys is None:
        numeric_keys = [k for k, v in metrics.items()
                        if isinstance(v, (Number, np.floating, np.integer))]
    per_seed_metrics.append(metrics)

    # brief per-seed printout
    log_bits = []
    for k in ["test_loss", "val_loss", "train_loss"]:
        if k in metrics and isinstance(metrics[k], (Number, np.floating, np.integer)):
            log_bits.append(f"{k}={float(metrics[k]):.6f}")
    print(f"  Seed {s}: " + (", ".join(log_bits) if log_bits else str(metrics)))

# Aggregate mean/std per numeric key
aggregates = {}
for k in numeric_keys:
    vals = [float(m[k]) for m in per_seed_metrics]
    mean_k = float(np.mean(vals))
    std_k = float(np.std(vals, ddof=1)) if len(vals) > 1 else 0.0
    aggregates[k] = {"mean": mean_k, "std": std_k}

# Save YAML-like txt
out_file = os.path.join(save_path, "best_results_mean.txt")
with open(out_file, "w", encoding="utf-8") as f:
    f.write("# Overall best trial re-evaluation across model seeds\n")
    f.write(f"trial_number: {best_trial.number}\n")
    if model_name == "ViT_with_register_tokens":
        f.write(f"patch_size: {best_patch}\n")
    f.write(f"val_objective_best: {best_trial.value:.6f}\n")
    f.write(f"direction: {'minimize' if minimize else 'maximize'}\n")
    f.write(f"seeds: {model_seeds}\n")
    f.write("per_seed_metrics:\n")
    for s, m in zip(model_seeds, per_seed_metrics):
        f.write(f"  - seed: {s}\n")
        for k in numeric_keys:
            f.write(f"      {k}: {float(m[k]):.6f}\n")
    f.write("aggregates:\n")
    for k, mm in aggregates.items():
        f.write(f"  {k}:\n")
        f.write(f"    mean: {mm['mean']:.6f}\n")
        f.write(f"    std: {mm['std']:.6f}\n")

# Console summary
if "test_loss" in aggregates:
    print("  → test_loss Mean ± Std: "
          f"{aggregates['test_loss']['mean']:.6f} ± {aggregates['test_loss']['std']:.6f}")
elif "val_loss" in aggregates:
    print("  → val_loss Mean ± Std: "
          f"{aggregates['val_loss']['mean']:.6f} ± {aggregates['val_loss']['std']:.6f}")

print(f"Saved to: {out_file}")


Evaluating overall best trial (Trial 5, ValObjective: 3.5529

Best Trial: 5
  Best Score: 3.5529
  Best Hyperparameters:
    model: model2
    mlp_hidden_dims: [128, 64, 32]
    fusion_hidden_dims: [256, 128, 64]
    max_lr: 0.014517338103116808
    div_factor: 93
    final_div_factor: 922
    weight_decay: 4.836783766043952e-05
    epochs: 200
Best model saved to logs/Regression/boston/CNN_hybrid/BIE/best_model/trial_5/best_model.pth
  Seed 0: test_loss=11.021610, val_loss=15.015516, train_loss=6.778717

Best Trial: 5
  Best Score: 3.5529
  Best Hyperparameters:
    model: model2
    mlp_hidden_dims: [128, 64, 32]
    fusion_hidden_dims: [256, 128, 64]
    max_lr: 0.014517338103116808
    div_factor: 93
    final_div_factor: 922
    weight_decay: 4.836783766043952e-05
    epochs: 200
Best model saved to logs/Regression/boston/CNN_hybrid/BIE/best_model/trial_5/best_model.pth
  Seed 1: test_loss=16.616910, val_loss=14.426194, train_loss=2.750998

Best Trial: 5
  Best Score: 3.5529
  Be

### EXPERIMENT: MANUEL

In [76]:
#Select the model and the parameters
if task_type.lower() == "regression":
    problem_type = "regression"
else:
    problem_type = "supervised"

name = f"REFINED"

#Define the dataset path and the folder where the images will be saved
images_folder = f"SyntheticImages/{task_type}/{dataset_name}/{name}"

In [None]:
train_loader, val_loader, test_loader, attributes, imgs_shape, label_encoder, class_weight  = load_and_preprocess_data(df, dataset_name, images_folder, problem_type, task_type, seed=SEED, batch_size=batch_size, device=device)

In [None]:
# Determine possible patch sizes for the Vision Transformer by finding divisors of the image width
divisors1 = find_divisors(imgs_shape[1])
divisors1 = [d for d in divisors1  if d >= 16][:-1]
divisors1

In [None]:
divisors2 = find_divisors(4)
divisors2

In [None]:
import optuna
study = optuna.create_study(direction="minimize" if task_type.lower() == "regression" else "maximize")
study.optimize(lambda trial: objective(
    trial=trial,
    model_name=model_name,
    image_name=name,
    task_type=task_type,
    num_classes=num_classes,
    train_loader=train_loader,
    val_loader=val_loader,
    test_loader=test_loader,
    divisors1=divisors1,
    divisors2=divisors2,
    attributes=attributes,
    imgs_shape=imgs_shape,
    device=device,
    save_dir=save_dir,
    class_weight=class_weight,
    epochs=epochs
), n_trials=n_trials)

In [None]:
# Get all unique patch sizes tried in the study
patch_sizes = sorted({t.params.get("patch_size") for t in study.trials 
                      if t.state == optuna.trial.TrialState.COMPLETE and "patch_size" in t.params})

# For each patch size, find the best trial and evaluate
for patch in patch_sizes:
    best_trial = max(
        [t for t in study.trials if t.state == optuna.trial.TrialState.COMPLETE 
         and t.params.get("patch_size") == patch],
        key=lambda t: t.value,
        default=None
    )

    if best_trial is not None:
        print(f"\nEvaluating best trial for patch_size = {patch} (Trial {best_trial.number}, Value: {best_trial.value:.4f})")
        evaluate_best_model(
            best_trial,
            train_loader, val_loader, test_loader,
            dataset_name=dataset_name,
            image_name=name,
            task_type=task_type,
            save_dir=save_dir,
            attributes=attributes,
            imgs_shape=imgs_shape,
            class_weight=class_weight,
            num_classes=num_classes,
            epochs=epochs
        )
    else:
        print(f"No completed trial found for patch_size = {patch}")

In [None]:
import optuna
study = optuna.create_study(direction="minimize" if task_type.lower() == "regression" else "maximize")
study.optimize(lambda trial: objective(
    trial=trial,
    model_name=model_name,
    image_name=name,
    task_type=task_type,
    num_classes=num_classes,
    train_loader=train_loader,
    val_loader=val_loader,
    test_loader=test_loader,
    divisors1=divisors1,
    divisors2=divisors2,
    attributes=attributes,
    imgs_shape=imgs_shape,
    device=device,
    save_dir=save_dir,
    class_weight=class_weight,
    epochs=epochs,
    image_mode='refined_both'
), n_trials=n_trials)

In [None]:
# Get all unique patch sizes tried in the study
patch_sizes = sorted({t.params.get("patch_size") for t in study.trials 
                      if t.state == optuna.trial.TrialState.COMPLETE and "patch_size" in t.params})

# For each patch size, find the best trial and evaluate
for patch in patch_sizes:
    best_trial = max(
        [t for t in study.trials if t.state == optuna.trial.TrialState.COMPLETE 
         and t.params.get("patch_size") == patch],
        key=lambda t: t.value,
        default=None
    )

    if best_trial is not None:
        print(f"\nEvaluating best trial for patch_size = {patch} (Trial {best_trial.number}, Value: {best_trial.value:.4f})")
        evaluate_best_model(
            best_trial,
            train_loader, val_loader, test_loader,
            dataset_name=dataset_name,
            image_name=name,
            task_type=task_type,
            save_dir=save_dir,
            attributes=attributes,
            imgs_shape=imgs_shape,
            class_weight=class_weight,
            num_classes=num_classes,
            epochs=epochs,
            image_mode='refined_both'
        )
    else:
        print(f"No completed trial found for patch_size = {patch}")

## HyCNN

In [None]:
save_dir =  os.path.join("logs", task_type, dataset_name)
model_name = "resnet_hybrid"

# Load config
with open(f"./configs/preprocess/{dataset_name}.json") as f:
    config = json.load(f)
batch_size = config["batch_size"]
epochs = config["epochs"]

n_trials = 50

device='cuda:0' if torch.cuda.is_available() else 'cpu'

In [None]:
def objective(trial, model_name, image_name, task_type, 
              train_loader, val_loader, test_loader, attributes, imgs_shape,
              num_classes=None, device='cuda', save_dir=None, class_weight=None, epochs=100):
    
    task = task_type.lower()
    
    params = load_search_space(model_name, trial)

    params["mlp_hidden_dims"] = json.loads(params["mlp_hidden_dims"])

    params["fusion_hidden_dims"] = json.loads(params["fusion_hidden_dims"])
    
    with open(f"configs/optuna_search/{model_name}.json", "r") as f:
        full_config = json.load(f)

    config = full_config[model_name]["fit"]  # Access the model key

    # Build and train model
    model = ResNetMLP(imgs_shape, attributes, params, task_type=task_type.lower(), num_classes=num_classes)
    metrics = compile_and_fit(
        model,
        train_loader, val_loader, test_loader,
        dataset_name=dataset_name,
        model_name=f"trial_{trial.number}",
        image_name=image_name,
        task=task,  # assumed to be defined externally
        max_lr=trial.suggest_float("max_lr", config["max_lr"][1], config["max_lr"][2], log=True),
        div_factor=trial.suggest_int("div_factor", config["div_factor"][1], config["div_factor"][2]),
        final_div_factor=trial.suggest_int("final_div_factor", config["final_div_factor"][1], config["final_div_factor"][2]),
        weight_decay=trial.suggest_float("weight_decay", config["weight_decay"][1], config["weight_decay"][2], log=True),
        epochs=epochs,
        save_model=False,
        class_weights=class_weight
    )

    save_dir = os.path.join(save_dir, model_name, image_name, "optuna")
    os.makedirs(save_dir, exist_ok=True)

    if task == 'regression':
        score = metrics["val_rmse"]
        with open(f"{save_dir}/optuna_trials_log.txt", "a") as f:
            f.write(f"Trial {trial.number} - VAL-RMSE: {score:.4f}, Params: {params}\n")
            f.write("=" * 60 + "\n")
    
    elif task == 'binary':
        score = metrics["val_roc_auc"]
        with open(f"{save_dir}/optuna_trials_log.txt", "a") as f:
            f.write(f"Trial {trial.number} - VAL-AUC: {score:.4f}, Params: {params}\n")
            f.write("=" * 60 + "\n")

    elif task == 'multiclass':
        score = metrics["val_accuracy"]
        with open(f"{save_dir}/optuna_trials_log.txt", "a") as f:
            f.write(f"Trial {trial.number} - VAL-Accuracy: {score:.4f}, Params: {params}\n")
            f.write("=" * 60 + "\n")
    else:
        raise ValueError(f"Unsupported task type: {task_type}")
    
    return score


In [None]:
def evaluate_best_model(best_trial, train_loader, val_loader, test_loader, 
                        dataset_name, image_name, task_type, save_dir, attributes, imgs_shape, 
                        class_weight=None, num_classes=None, epochs=10):

    task = task_type.lower()
    best_params = best_trial.params

    print(f"\nBest Trial: {best_trial.number}")
    print(f"  Best Score: {best_trial.value:.4f}")
    print("  Best Hyperparameters:")
    for k, v in best_params.items():
        print(f"    {k}: {v}")

    # Extract architecture-related parameters
    architecture_params = {
        k: v for k, v in best_params.items()
        if k in ["mlp_hidden_dims", "fusion_hidden_dims"]
    }

    # Convert JSON string to list if necessary
    if isinstance(architecture_params.get("mlp_hidden_dims"), str):
        architecture_params["mlp_hidden_dims"] = json.loads(architecture_params["mlp_hidden_dims"])
    
    if isinstance(architecture_params.get("fusion_hidden_dims"), str):
        architecture_params["fusion_hidden_dims"] = json.loads(architecture_params["fusion_hidden_dims"])

    # Initialize model
    model = ResNetMLP(imgs_shape, attributes, architecture_params, task_type=task_type.lower(), num_classes=num_classes)

    # Train and evaluate~
    metrics = compile_and_fit(
        model,
        train_loader, val_loader, test_loader,
        dataset_name=dataset_name,
        image_name=image_name,
        model_name=model_name,
        trial_name=f"trial_{best_trial.number}",
        task=task,
        max_lr=best_params["max_lr"],
        div_factor=best_params["div_factor"],
        final_div_factor=best_params["final_div_factor"],
        weight_decay=best_params["weight_decay"],
        epochs=epochs,
        save_model=True,
        class_weights=class_weight,
        save_dir=save_dir
    )

    # Add training-related fixed params
    best_params["epochs"] = epochs
    best_params["class_weight"] = class_weight.tolist() if class_weight is not None else None    

    # Save best hyperparameters
    params_file = os.path.join(save_dir, f"{model_name}/{image_name}/best_model/trial_{best_trial.number}", "best_params.json")
    os.makedirs(os.path.dirname(params_file), exist_ok=True)

    with open(params_file, "w") as f:
        json.dump(best_params, f, indent=4)

    return metrics

### EXPERIMENT: TINTO

In [None]:
#Select the model and the parameters
problem_type = "supervised"
name = f"TINTO_blur"

#Define the dataset path and the folder where the images will be saved
images_folder = f"SyntheticImages/Binary/{dataset_name}/{name}"

In [None]:
train_loader, val_loader, test_loader, attributes, imgs_shape, label_encoder, class_weight  = load_and_preprocess_data(df, dataset_name, images_folder, problem_type, task_type, seed=SEED, batch_size=batch_size, device=device)

In [None]:
import optuna
study = optuna.create_study(direction="minimize" if task_type.lower() == "regression" else "maximize")
study.optimize(lambda trial: objective(
    trial=trial,
    model_name=model_name,
    image_name=name,
    task_type=task_type,
    num_classes=1,
    attributes=attributes,
    imgs_shape=imgs_shape,
    train_loader=train_loader,
    val_loader=val_loader,
    test_loader=test_loader,
    device=device,
    save_dir=save_dir,
    class_weight=class_weight,
    epochs=epochs
), n_trials=n_trials)

In [None]:
# Get the best completed trial
best_trial = max(
    [t for t in study.trials if t.state == optuna.trial.TrialState.COMPLETE],
    key=lambda t: t.value,
    default=None
)

if best_trial is not None:
    print(f"Evaluating best trial (Trial {best_trial.number}, Value: {best_trial.value:.4f})")
    
    evaluate_best_model(
        best_trial,
        train_loader, val_loader, test_loader,
        dataset_name=dataset_name,
        image_name=name,
        task_type=task_type,
        save_dir=save_dir,
        attributes=attributes,
        imgs_shape=imgs_shape,
        class_weight=class_weight,
        num_classes=1,
        epochs=epochs
    )
else:
    print("No completed trials found.")

### EXPERIMENT: IGTD

In [None]:
#Select the model and the parameters
problem_type = "supervised"
name = f"IGTD"

#Define the dataset path and the folder where the images will be saved
images_folder = f"SyntheticImages/Binary/{dataset_name}/{name}"

In [None]:
train_loader, val_loader, test_loader, attributes, imgs_shape, label_encoder, class_weight  = load_and_preprocess_data(df, dataset_name, images_folder, problem_type, task_type, seed=SEED, batch_size=batch_size, device=device)

In [None]:
import optuna
study = optuna.create_study(direction="minimize" if task_type.lower() == "regression" else "maximize")
study.optimize(lambda trial: objective(
    trial=trial,
    model_name=model_name,
    image_name=name,
    task_type=task_type,
    num_classes=1,
    attributes=attributes,
    imgs_shape=imgs_shape,
    train_loader=train_loader,
    val_loader=val_loader,
    test_loader=test_loader,
    device=device,
    save_dir=save_dir,
    class_weight=class_weight,
    epochs=epochs
), n_trials=n_trials)

In [None]:
# Get the best completed trial
best_trial = max(
    [t for t in study.trials if t.state == optuna.trial.TrialState.COMPLETE],
    key=lambda t: t.value,
    default=None
)

if best_trial is not None:
    print(f"Evaluating best trial (Trial {best_trial.number}, Value: {best_trial.value:.4f})")
    
    evaluate_best_model(
        best_trial,
        train_loader, val_loader, test_loader,
        dataset_name=dataset_name,
        image_name=name,
        task_type=task_type,
        save_dir=save_dir,
        attributes=attributes,
        imgs_shape=imgs_shape,
        class_weight=class_weight,
        num_classes=1,
        epochs=epochs
    )
else:
    print("No completed trials found.")

### EXPERIMENT: REFINED

In [None]:
#Select the model and the parameters
problem_type = "supervised"
name = f"REFINED"

#Define the dataset path and the folder where the images will be saved
images_folder = f"SyntheticImages/Binary/{dataset_name}/{name}"

In [None]:
train_loader, val_loader, test_loader, attributes, imgs_shape, label_encoder, class_weight  = load_and_preprocess_data(df, dataset_name, images_folder, problem_type, task_type, seed=SEED, batch_size=batch_size, device=device)

In [None]:
import optuna
study = optuna.create_study(direction="minimize" if task_type.lower() == "regression" else "maximize")
study.optimize(lambda trial: objective(
    trial=trial,
    model_name=model_name,
    image_name=name,
    task_type=task_type,
    num_classes=1,
    attributes=attributes,
    imgs_shape=imgs_shape,
    train_loader=train_loader,
    val_loader=val_loader,
    test_loader=test_loader,
    device=device,
    save_dir=save_dir,
    class_weight=class_weight,
    epochs=epochs
), n_trials=n_trials)

In [None]:
# Get the best completed trial
best_trial = max(
    [t for t in study.trials if t.state == optuna.trial.TrialState.COMPLETE],
    key=lambda t: t.value,
    default=None
)

if best_trial is not None:
    print(f"Evaluating best trial (Trial {best_trial.number}, Value: {best_trial.value:.4f})")
    
    evaluate_best_model(
        best_trial,
        train_loader, val_loader, test_loader,
        dataset_name=dataset_name,
        image_name=name,
        task_type=task_type,
        save_dir=save_dir,
        attributes=attributes,
        imgs_shape=imgs_shape,
        class_weight=class_weight,
        num_classes=1,
        epochs=epochs
    )
else:
    print("No completed trials found.")