In [1]:
import sys
import os

# Add the path to your ML directory
sys.path.append(r'C:\Users\admin\Documents\Masters\ES_Masters\Masters-Processing\ML')

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split, StratifiedKFold, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import RFE
from sklearn.ensemble import RandomForestClassifier
from mrmr_wrapper import MRMRTransformer
from sklearn.svm import SVC
import optuna
import optuna.visualization as vis
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
from sklearn.metrics import (accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix, roc_curve, precision_recall_curve, average_precision_score)
from sklearn.decomposition import PCA
from sklearn.inspection import DecisionBoundaryDisplay
from sklearn.inspection import permutation_importance

# Import Data

In [2]:
# Load the data
file_path = "features-Master.csv"
data = pd.read_csv(file_path)

# Shuffle the data
shuffled = data.sample(frac=1, random_state=42).reset_index(drop=True)
data_shuffled = shuffled.iloc[:, 4:]
labels_shuffled = shuffled["Comfort Score"]

# Create binary labels (1,2 = 0; 4,5 = 1; exclude 3 for clearer separation)
binary_labels = labels_shuffled.apply(lambda x: 0 if x <= 2 else (1 if x >=4 else np.nan))
binary_data = data_shuffled[~binary_labels.isna()]
binary_labels = binary_labels[~binary_labels.isna()] 

# Train/Test Split

In [3]:
# For binary classification
X_train, X_test, y_train, y_test = train_test_split(
    binary_data,
    binary_labels,
    test_size=0.2,
    stratify=binary_labels,
    random_state=42
)

# Handle Missing Values

In [4]:
# Impute missing values (median)
imputer = SimpleImputer(strategy='median')
X_train = pd.DataFrame(imputer.fit_transform(X_train), columns=binary_data.columns, index=X_train.index)
X_test = pd.DataFrame(imputer.transform(X_test), columns=binary_data.columns, index=X_test.index)

# Scale Data

In [5]:
# Scale (keep column names)
scaler = StandardScaler()
X_train_scaled = pd.DataFrame(scaler.fit_transform(X_train), columns=X_train.columns, index=X_train.index)
X_test_scaled = pd.DataFrame(scaler.transform(X_test), columns=X_test.columns, index=X_test.index)

# Optimize Feature Selection and SVM Parameters

In [6]:
# Use these for optimization
X = X_train_scaled.copy()
y = y_train.copy()

def binary_classification_objective(trial):
    # Feature selection method
    fs_method = trial.suggest_categorical('feature_selection', ['MRMR', 'RFE', 'None'])

    # Robust k_features bounds
    n_total_features = X.shape[1]
    max_k_allowed = min(105, n_total_features)
    min_k_allowed = 1 if n_total_features < 5 else 5
    if fs_method != 'None' and max_k_allowed >= min_k_allowed:
        # step 10 only if range large enough; otherwise just search full int range
        if max_k_allowed - min_k_allowed >= 10:
            k_features = trial.suggest_int('k_features', min_k_allowed, max_k_allowed, step=10)
        else:
            k_features = trial.suggest_int('k_features', min_k_allowed, max_k_allowed)
        if fs_method == 'RFE':
            estimator = RandomForestClassifier(n_estimators=100, random_state=42)
            selector = RFE(estimator, n_features_to_select=k_features)
        else:
            selector = MRMRTransformer(k_features=k_features)
    else:
        selector = 'passthrough'

    # SVM hyperparameters
    kernel = trial.suggest_categorical('kernel', ['linear', 'rbf', 'poly'])
    params = {
        'C': trial.suggest_float('C', 0.1, 100, log=True),
        'kernel': kernel,
        'gamma': trial.suggest_categorical('gamma', ['scale', 'auto']),
        'degree': trial.suggest_int('degree', 2, 5) if kernel == 'poly' else 3,
        'probability': True,
        'random_state': 42
    }
    model = SVC(**params)

    pipeline = Pipeline([
        ('feature_selection', selector),
        ('model', model)
    ])

    cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
    try:
        scores = cross_val_score(pipeline, X, y, cv=cv, scoring='roc_auc', n_jobs=1)
        return np.mean(scores)
    except Exception as e:
        # Print the error for debugging and return a very poor score
        print(f"Error in trial: {e}")
        return -np.inf

# Run Optimization 10 Times and Collect Top Features and Best Parameters

In [None]:
# Storage
all_results = []
top_features_all_runs = []

# Repeat optimization + evaluation 10 times
for run in range(10):
    print(f"\n{'='*50}")
    print(f"STARTING RUN {run + 1}/10")
    print(f"{'='*50}")

    study = optuna.create_study(direction='maximize')
    study.optimize(binary_classification_objective, n_trials=5, show_progress_bar=True, n_jobs=4)

    print("\nBinary Classification Optimization Results:")
    print(f"Best ROC AUC Score: {study.best_value:.4f}")
    print("Best Parameters:")
    for key, value in study.best_params.items():
        print(f"  {key}: {value}")

    # ---------------------------
    # Recreate selector using best params and extract selected feature names
    # ---------------------------
    best_fs_method = study.best_params.get('feature_selection', 'None')
    selected_features = None

    if best_fs_method != 'None':
        k_features = study.best_params['k_features']
        if best_fs_method == 'RFE':
            estimator = RandomForestClassifier()
            selector = RFE(estimator, n_features_to_select=k_features)
        elif best_fs_method == 'MRMR':
            selector = MRMRTransformer(k_features=k_features)
        
        selector.fit(X, y)
        if hasattr(selector, 'get_support'):  # For RFE
            selected_features = X.columns[selector.get_support()]
        else:  # For MRMR
            selected_features = selector.selected_features
        X_best = X[selected_features]
    else:
        X_best = X
        selected_features = X.columns

    # Apply the same feature selection to test data
    if best_fs_method != 'None':
        if best_fs_method == 'MRMR':
            X_test_final = X_test_scaled[selected_features]  # This should be DataFrame
        else:  # RFE
            X_test_transformed = selector.transform(X_test_scaled)
            # Convert back to DataFrame with feature names
            X_test_final = pd.DataFrame(X_test_transformed, columns=selected_features)
    else:
        X_test_final = X_test_scaled  # DataFrame

    # ---------------------------
    # Train final SVM with best params and evaluate on test set
    # ---------------------------
    best_model_params = {
        'C': study.best_params['C'],
        'kernel': study.best_params['kernel'],
        'gamma': study.best_params['gamma'],
        'probability': True,
        'random_state': 42
    }
    if best_model_params['kernel'] == 'poly':
        best_model_params['degree'] = study.best_params.get('degree', 3)

    best_model = SVC(**best_model_params)
    best_model.fit(X_best, y)

    # Predictions
    y_pred = best_model.predict(X_test_final)
    y_pred_proba = best_model.predict_proba(X_test_final)[:, 1]

    # Metrics
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, zero_division=0)
    recall = recall_score(y_test, y_pred, zero_division=0)
    f1 = f1_score(y_test, y_pred, zero_division=0)
    roc_auc = roc_auc_score(y_test, y_pred_proba)

    # Save run result
    run_result = {
        'run_number': run + 1,
        'best_cv_score': study.best_value,
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'f1': f1,
        'roc_auc': roc_auc,
        'selected_features': ";".join(selected_features),
        'feature_selection_method': best_fs_method
    }
    for key, value in study.best_params.items():
        run_result[f'param_{key}'] = value

    all_results.append(run_result)

    print("\nFinal Model Evaluation on Test Set:")
    print(f"Accuracy: {accuracy:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1 Score: {f1:.4f}")
    print(f"ROC AUC: {roc_auc:.4f}")

    # ---------------------------
    # FEATURE IMPORTANCE for this run
    # ---------------------------
    print("\n" + "="*30)
    print(f"FEATURE IMPORTANCE (run {run+1})")
    print("="*30)

    # Always convert selected_features → indices relative to binary_data.columns
    if best_fs_method != 'None':
        if isinstance(selected_features[0], str):
            # MRMR gave names → get indices
            selected_indices = [binary_data.columns.get_loc(feat) for feat in selected_features]
        else:
            # Already indices (or boolean mask converted earlier)
            selected_indices = list(selected_features)
    else:
        selected_indices = list(range(binary_data.shape[1]))

    # Convert back indices → names
    final_feature_names = binary_data.columns[selected_indices].tolist()

    n_features_in_model = len(final_feature_names)
    print(f"Number of features in final model: {n_features_in_model}")

    if best_model.kernel == 'linear':
        # Use coefficients
        try:
            coef = best_model.coef_.ravel()
        except Exception as e:
            print(f"Couldn't extract coef_ for linear kernel: {e}")
            coef = np.zeros(n_features_in_model)

        feature_importance_df = pd.DataFrame({
            'feature': final_feature_names,
            'index': selected_indices,
            'coefficient': coef
        })
        feature_importance_df['abs_importance'] = feature_importance_df['coefficient'].abs()
        feature_importance_df = feature_importance_df.sort_values('abs_importance', ascending=False).reset_index(drop=True)

        # Top-20
        top_n = min(20, feature_importance_df.shape[0])
        top_features = feature_importance_df.head(top_n).copy()
        top_features['run'] = run + 1
        top_features['best_cv_score'] = study.best_value

    else:
        # Non-linear → permutation importance
        try:
            perm = permutation_importance(
                best_model,
                X_test_final,
                y_test,
                n_repeats=50,
                random_state=42,
                scoring='roc_auc',
                n_jobs=1
            )
            importance_df = pd.DataFrame({
                'feature': final_feature_names,
                'index': selected_indices,
                'importance_mean': perm.importances_mean,
                'importance_std': perm.importances_std
            })
            importance_df = importance_df.sort_values('importance_mean', ascending=False).reset_index(drop=True)

            # Top-20
            top_n = min(20, importance_df.shape[0])
            top_features = importance_df.head(top_n).copy()
            top_features['run'] = run + 1
            top_features['best_cv_score'] = study.best_value

        except Exception as e:
            print(f"Permutation importance failed for run {run+1}: {e}")
            importance_df = pd.DataFrame({
                'feature': final_feature_names,
                'index': selected_indices,
                'importance_mean': np.zeros(len(final_feature_names)),
                'importance_std': np.zeros(len(final_feature_names))
            })
            top_features = importance_df.head(min(20, len(final_feature_names))).copy()
            top_features['run'] = run + 1
            top_features['best_cv_score'] = study.best_value

    # Collect top features for final combined CSV
    top_features_all_runs.append(top_features)


# ---------------------------
# After all runs: save combined results & top features
# ---------------------------
print(f"\n{'='*50}")
print("SUMMARY ACROSS ALL RUNS")
print(f"{'='*50}")

results_df = pd.DataFrame(all_results)
metrics_to_avg = ['best_cv_score', 'accuracy', 'precision', 'recall', 'f1', 'roc_auc']

summary_row = {'run_number': 'SUMMARY'}
for metric in metrics_to_avg:
    avg_value = results_df[metric].mean()
    std_value = results_df[metric].std()
    summary_row[metric] = f"{avg_value:.4f} ± {std_value:.4f}"
    print(f"{metric}: {avg_value:.4f} ± {std_value:.4f}")

# Mark params N/A for summary
for key in [k for k in results_df.columns if k.startswith('param_')]:
    summary_row[key] = 'N/A'

combined_results = all_results + [summary_row]
df = pd.DataFrame(combined_results)
#df.to_csv(results_outpath, index=False)
print(f"\nCombined results with averages saved to: {results_outpath}")

# Combine top features from all runs into one CSV
if top_features_all_runs:
    combined_top = pd.concat(top_features_all_runs, ignore_index=True, sort=False)
    #combined_top_csv = os.path.join(out_dir, 'combined_top_features.csv')
    #combined_top.to_csv(combined_top_csv, index=False)
    print(f"Combined top features saved to: {combined_top_csv}")
else:
    print("No top features were collected.")

print("Done.")


[I 2025-09-10 13:25:20,118] A new study created in memory with name: no-name-3203c15a-1a44-4a4d-a5e5-045cd92e2d6f



STARTING RUN 1/10


  0%|          | 0/5 [00:00<?, ?it/s]

[I 2025-09-10 13:25:20,385] Trial 2 finished with value: 0.6716080216080216 and parameters: {'feature_selection': 'None', 'kernel': 'poly', 'C': 1.8810502988890667, 'gamma': 'scale', 'degree': 2}. Best is trial 2 with value: 0.6716080216080216.
[I 2025-09-10 13:25:20,732] Trial 4 finished with value: 0.6362976529643196 and parameters: {'feature_selection': 'None', 'kernel': 'poly', 'C': 24.776046250541565, 'gamma': 'scale', 'degree': 2}. Best is trial 2 with value: 0.6716080216080216.



[A
[A
[A
[A
[A
[A
100%|██████████| 15/15 [00:01<00:00, 13.07it/s]

[A

Got MRMR features



[A
[A
[A
[A
[A
[A
100%|██████████| 15/15 [00:01<00:00, 12.76it/s]

[A

Got MRMR features



[A
[A
[A
[A
[A
[A
[A
100%|██████████| 15/15 [00:01<00:00, 12.00it/s]

[A

Got MRMR features



[A
[A
[A
[A
[A
[A
100%|██████████| 15/15 [00:01<00:00, 12.30it/s]


100%|██████████| 65/65 [00:05<00:00, 11.31it/s]

Got MRMR features


100%|██████████| 65/65 [00:05<00:00, 12.19it/s]


Got MRMR features



[A
[A
[A
[A
[A
[A
100%|██████████| 15/15 [00:01<00:00, 13.12it/s]


[A[A                                         
[A
[A
[A

Got MRMR features
[I 2025-09-10 13:25:31,567] Trial 0 finished with value: 0.7666037666037666 and parameters: {'feature_selection': 'MRMR', 'k_features': 15, 'kernel': 'rbf', 'C': 1.391851625855914, 'gamma': 'scale'}. Best is trial 0 with value: 0.7666037666037666.



[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
100%|██████████| 65/65 [00:03<00:00, 19.79it/s]


Got MRMR features


100%|██████████| 65/65 [00:02<00:00, 23.09it/s]


Got MRMR features


100%|██████████| 65/65 [00:02<00:00, 22.72it/s]


Got MRMR features


100%|██████████| 65/65 [00:02<00:00, 22.62it/s]


Got MRMR features
[I 2025-09-10 13:25:42,694] Trial 3 finished with value: 0.7483109483109482 and parameters: {'feature_selection': 'MRMR', 'k_features': 65, 'kernel': 'rbf', 'C': 0.1549250405300186, 'gamma': 'scale'}. Best is trial 0 with value: 0.7666037666037666.
[I 2025-09-10 13:29:19,588] Trial 1 finished with value: 0.7519708686375354 and parameters: {'feature_selection': 'RFE', 'k_features': 75, 'kernel': 'linear', 'C': 0.8036805589587029, 'gamma': 'scale'}. Best is trial 0 with value: 0.7666037666037666.

Binary Classification Optimization Results:
Best ROC AUC Score: 0.7666
Best Parameters:
  feature_selection: MRMR
  k_features: 15
  kernel: rbf
  C: 1.391851625855914
  gamma: scale


100%|██████████| 15/15 [00:00<00:00, 32.20it/s]


Got MRMR features

Final Model Evaluation on Test Set:
Accuracy: 0.6500
Precision: 0.6667
Recall: 0.7273
F1 Score: 0.6957
ROC AUC: 0.7172

FEATURE IMPORTANCE (run 1)
Number of features in final model: 15


[I 2025-09-10 13:29:22,197] A new study created in memory with name: no-name-53e4985b-f142-4ffd-a402-830b8e021ea3



STARTING RUN 2/10


  0%|          | 0/5 [00:00<?, ?it/s]

[I 2025-09-10 13:29:22,495] Trial 3 finished with value: 0.7329232495899163 and parameters: {'feature_selection': 'None', 'kernel': 'linear', 'C': 3.149188748172839, 'gamma': 'scale'}. Best is trial 3 with value: 0.7329232495899163.
[I 2025-09-10 13:29:22,653] Trial 4 finished with value: 0.6362976529643196 and parameters: {'feature_selection': 'None', 'kernel': 'poly', 'C': 29.385294757259796, 'gamma': 'auto', 'degree': 2}. Best is trial 3 with value: 0.7329232495899163.
[I 2025-09-10 13:37:02,229] Trial 0 finished with value: 0.7701989368656036 and parameters: {'feature_selection': 'RFE', 'k_features': 75, 'kernel': 'linear', 'C': 0.48809670821703277, 'gamma': 'scale'}. Best is trial 0 with value: 0.7701989368656036.
[I 2025-09-10 13:37:21,057] Trial 1 finished with value: 0.7860226193559526 and parameters: {'feature_selection': 'RFE', 'k_features': 55, 'kernel': 'rbf', 'C': 5.015562764567839, 'gamma': 'scale'}. Best is trial 1 with value: 0.7860226193559526.
[I 2025-09-10 13:37:34,8

[I 2025-09-10 13:38:39,197] A new study created in memory with name: no-name-cbf1d73a-cac6-44d9-8977-abf5fd5d6cec



STARTING RUN 3/10


  0%|          | 0/5 [00:00<?, ?it/s]

[I 2025-09-10 13:38:39,465] Trial 3 finished with value: 0.6928133594800261 and parameters: {'feature_selection': 'None', 'kernel': 'poly', 'C': 2.57872609978344, 'gamma': 'scale', 'degree': 3}. Best is trial 3 with value: 0.6928133594800261.
[I 2025-09-10 13:38:39,515] Trial 2 finished with value: 0.7329232495899163 and parameters: {'feature_selection': 'None', 'kernel': 'linear', 'C': 1.133462971538642, 'gamma': 'scale'}. Best is trial 2 with value: 0.7329232495899163.



[A
[A
[A
[A
[A
[A
100%|██████████| 25/25 [00:02<00:00, 11.34it/s]

[A

Got MRMR features



[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A

[A[A
[A

100%|██████████| 25/25 [00:02<00:00, 10.71it/s]

[A

[A[A

Got MRMR features



[A

[A[A
[A

[A[A
[A

[A[A
[A

[A[A
[A

[A[A
[A

100%|██████████| 55/55 [00:05<00:00,  9.20it/s]


[A[A

Got MRMR features



[A

[A[A
[A

100%|██████████| 25/25 [00:01<00:00, 13.83it/s]

[A

Got MRMR features




[A[A
[A

[A[A

[A[A
[A

[A[A
100%|██████████| 25/25 [00:02<00:00, 12.33it/s]


[A[A
[A

Got MRMR features




[A[A
[A
[A

[A[A
[A

[A[A

[A[A
[A

[A[A
[A

[A[A
[A

[A[A
[A

[A[A
100%|██████████| 25/25 [00:01<00:00, 12.85it/s]

[A

Got MRMR features




[A[A
[A

[A[A

[A[A
100%|██████████| 25/25 [00:01<00:00, 12.95it/s]


[A[A
[A

Got MRMR features




[A[A
[A

[A[A
[A
[A

[A[A
[A

[A[A
[A

[A[A
[A

[A[A
[A

[A[A
[A

100%|██████████| 55/55 [00:04<00:00, 12.38it/s]


100%|██████████| 25/25 [00:01<00:00, 12.59it/s]


Got MRMR features
Got MRMR features



[A

[A[A
[A

[A[A
100%|██████████| 25/25 [00:01<00:00, 13.51it/s]


[A[A
[A

Got MRMR features




[A[A
[A
[A

[A[A
[A

[A[A
[A

[A[A
[A

[A[A
[A

[A[A
[A

[A[A
[A

[A[A
[A

[A[A
[A

100%|██████████| 25/25 [00:01<00:00, 12.66it/s]

[A                                            

[A[A                                         
[A
[A

Got MRMR features
[I 2025-09-10 13:38:52,753] Trial 0 finished with value: 0.7444925444925445 and parameters: {'feature_selection': 'MRMR', 'k_features': 25, 'kernel': 'rbf', 'C': 2.8588724662899714, 'gamma': 'scale'}. Best is trial 0 with value: 0.7444925444925445.



100%|██████████| 25/25 [00:01<00:00, 13.34it/s]


[A[A                                         
[A
[A
[A

Got MRMR features
[I 2025-09-10 13:38:53,093] Trial 4 finished with value: 0.7409898743232077 and parameters: {'feature_selection': 'MRMR', 'k_features': 25, 'kernel': 'rbf', 'C': 3.0394273351667227, 'gamma': 'scale'}. Best is trial 0 with value: 0.7444925444925445.



[A
[A
[A
[A
100%|██████████| 55/55 [00:03<00:00, 17.29it/s]


Got MRMR features


100%|██████████| 55/55 [00:01<00:00, 29.83it/s]


Got MRMR features


100%|██████████| 55/55 [00:01<00:00, 30.93it/s]


Got MRMR features
[I 2025-09-10 13:38:57,658] Trial 1 finished with value: 0.7665482665482666 and parameters: {'feature_selection': 'MRMR', 'k_features': 55, 'kernel': 'rbf', 'C': 0.2699125555959955, 'gamma': 'auto'}. Best is trial 1 with value: 0.7665482665482666.

Binary Classification Optimization Results:
Best ROC AUC Score: 0.7665
Best Parameters:
  feature_selection: MRMR
  k_features: 55
  kernel: rbf
  C: 0.2699125555959955
  gamma: auto


100%|██████████| 55/55 [00:01<00:00, 29.90it/s]


Got MRMR features

Final Model Evaluation on Test Set:
Accuracy: 0.6333
Precision: 0.6571
Recall: 0.6970
F1 Score: 0.6765
ROC AUC: 0.7116

FEATURE IMPORTANCE (run 3)
Number of features in final model: 55


[I 2025-09-10 13:39:08,262] A new study created in memory with name: no-name-f6a32f4c-90e2-483f-9736-a83232db89b1



STARTING RUN 4/10


  0%|          | 0/5 [00:00<?, ?it/s]


[A                                   
[A                                   
[A                                   


[I 2025-09-10 13:39:08,524] Trial 1 finished with value: 0.6705128205128205 and parameters: {'feature_selection': 'None', 'kernel': 'poly', 'C': 2.961382229487148, 'gamma': 'scale', 'degree': 2}. Best is trial 1 with value: 0.6705128205128205.
[I 2025-09-10 13:39:08,565] Trial 3 finished with value: 0.6966810966810966 and parameters: {'feature_selection': 'None', 'kernel': 'poly', 'C': 3.1249354766872304, 'gamma': 'scale', 'degree': 3}. Best is trial 3 with value: 0.6966810966810966.
[I 2025-09-10 13:39:08,573] Trial 2 finished with value: 0.7329232495899163 and parameters: {'feature_selection': 'None', 'kernel': 'linear', 'C': 3.852754892368117, 'gamma': 'auto'}. Best is trial 2 with value: 0.7329232495899163.


[A                                           


[I 2025-09-10 13:39:08,740] Trial 4 finished with value: 0.7852308185641519 and parameters: {'feature_selection': 'None', 'kernel': 'rbf', 'C': 82.07778546193242, 'gamma': 'auto'}. Best is trial 4 with value: 0.7852308185641519.


100%|██████████| 85/85 [00:02<00:00, 29.93it/s][A


Got MRMR features


100%|██████████| 85/85 [00:02<00:00, 30.21it/s]


Got MRMR features


100%|██████████| 85/85 [00:02<00:00, 31.09it/s]


Got MRMR features


100%|██████████| 85/85 [00:02<00:00, 31.72it/s]


Got MRMR features


100%|██████████| 85/85 [00:02<00:00, 30.96it/s]


Got MRMR features
[I 2025-09-10 13:39:23,119] Trial 0 finished with value: 0.7169164169164169 and parameters: {'feature_selection': 'MRMR', 'k_features': 85, 'kernel': 'linear', 'C': 9.692492732934237, 'gamma': 'auto'}. Best is trial 4 with value: 0.7852308185641519.

Binary Classification Optimization Results:
Best ROC AUC Score: 0.7852
Best Parameters:
  feature_selection: None
  kernel: rbf
  C: 82.07778546193242
  gamma: auto

Final Model Evaluation on Test Set:
Accuracy: 0.7167
Precision: 0.7222
Recall: 0.7879
F1 Score: 0.7536
ROC AUC: 0.7868

FEATURE IMPORTANCE (run 4)
Number of features in final model: 336


[I 2025-09-10 13:40:51,317] A new study created in memory with name: no-name-067e9ec5-3c94-4ac7-ab03-5b5fef5f89b9



STARTING RUN 5/10


  0%|          | 0/5 [00:00<?, ?it/s]


[A

[A[A                                
[A                                   
[A

[A[A                                
[A                                   
[A
[A

[I 2025-09-10 13:40:51,581] Trial 2 finished with value: 0.6147519147519148 and parameters: {'feature_selection': 'None', 'kernel': 'poly', 'C': 0.11581776743210287, 'gamma': 'auto', 'degree': 4}. Best is trial 2 with value: 0.6147519147519148.
[I 2025-09-10 13:40:51,593] Trial 3 finished with value: 0.6412235912235913 and parameters: {'feature_selection': 'None', 'kernel': 'poly', 'C': 40.00231058180918, 'gamma': 'scale', 'degree': 4}. Best is trial 3 with value: 0.6412235912235913.



[A

[A[A
[A

[A[A
[A

[A[A
[A

[A[A
[A

[A[A
[A

[A[A
[A

[A[A
[A

[A[A
[A

[A[A
[A

[A[A
[A

[A[A
[A

[A[A
[A

[A[A
[A

[A[A
[A

[A[A
[A

100%|██████████| 35/35 [00:02<00:00, 12.97it/s]


Got MRMR features




[A[A
[A

[A[A
[A
[A

[A[A
[A

[A[A
[A

[A[A

[A[A
[A
[A

[A[A
[A

[A[A
[A

100%|██████████| 55/55 [00:04<00:00, 13.22it/s]

[A

Got MRMR features




[A[A
[A

[A[A
[A

[A[A
[A

[A[A
[A

[A[A
[A

[A[A
[A
[A

[A[A
100%|██████████| 35/35 [00:02<00:00, 13.73it/s]


[A[A

Got MRMR features



[A

[A[A
[A

[A[A

[A[A
[A
[A

[A[A
[A

[A[A
[A

[A[A

[A[A
[A

[A[A
[A

[A[A
[A

[A[A
[A

[A[A
[A

[A[A
[A

[A[A
[A

[A[A
[A

[A[A
[A

[A[A
[A

[A[A
100%|██████████| 35/35 [00:02<00:00, 13.69it/s]


[A[A

Got MRMR features



[A

[A[A
[A

100%|██████████| 105/105 [00:08<00:00, 12.82it/s]
100%|██████████| 55/55 [00:04<00:00, 13.01it/s]

[A

Got MRMR features
Got MRMR features



[A

[A[A
[A

[A[A
[A

[A[A
[A

[A[A
[A

[A[A
[A

[A[A
[A

[A[A
[A

[A[A
[A

[A[A
[A

[A[A
[A

[A[A
[A

[A[A
[A

[A[A
[A

[A[A
[A

100%|██████████| 35/35 [00:02<00:00, 12.79it/s]


[A[A

Got MRMR features



[A

[A[A
[A

[A[A
[A

[A[A
[A

[A[A
[A

[A[A
[A

[A[A
[A

[A[A
[A

[A[A
[A

[A[A
[A

[A[A
[A

[A[A
100%|██████████| 55/55 [00:04<00:00, 12.80it/s]

[A

[A[A

Got MRMR features



[A

[A[A
[A

[A[A

[A[A
[A

[A[A
[A

[A[A
100%|██████████| 35/35 [00:02<00:00, 12.84it/s]



[A[A[A                                       
[A                                            

[A[A

[A[A




Got MRMR features
[I 2025-09-10 13:41:05,457] Trial 0 finished with value: 0.7186955020288354 and parameters: {'feature_selection': 'MRMR', 'k_features': 35, 'kernel': 'rbf', 'C': 44.29006525560096, 'gamma': 'auto'}. Best is trial 0 with value: 0.7186955020288354.


 66%|██████▌   | 69/105 [00:05<00:02, 14.54it/s][A[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

100%|██████████| 105/105 [00:07<00:00, 14.60it/s]
100%|██████████| 55/55 [00:02<00:00, 18.58it/s]


Got MRMR features
Got MRMR features



[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
100%|██████████| 55/55 [00:02<00:00, 19.22it/s]


[A[A                                          
[A
[A

Got MRMR features
[I 2025-09-10 13:41:10,601] Trial 1 finished with value: 0.6568055401388735 and parameters: {'feature_selection': 'MRMR', 'k_features': 55, 'kernel': 'poly', 'C': 4.563010087589046, 'gamma': 'auto', 'degree': 2}. Best is trial 0 with value: 0.7186955020288354.



[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
100%|██████████| 105/105 [00:04<00:00, 23.97it/s]


Got MRMR features


100%|██████████| 105/105 [00:03<00:00, 31.22it/s]


Got MRMR features


100%|██████████| 105/105 [00:03<00:00, 30.21it/s]


Got MRMR features
[I 2025-09-10 13:41:19,306] Trial 4 finished with value: 0.6964331964331965 and parameters: {'feature_selection': 'MRMR', 'k_features': 105, 'kernel': 'linear', 'C': 49.09357184883411, 'gamma': 'scale'}. Best is trial 0 with value: 0.7186955020288354.

Binary Classification Optimization Results:
Best ROC AUC Score: 0.7187
Best Parameters:
  feature_selection: MRMR
  k_features: 35
  kernel: rbf
  C: 44.29006525560096
  gamma: auto


100%|██████████| 35/35 [00:01<00:00, 29.86it/s]


Got MRMR features

Final Model Evaluation on Test Set:
Accuracy: 0.6333
Precision: 0.6897
Recall: 0.6061
F1 Score: 0.6452
ROC AUC: 0.7587

FEATURE IMPORTANCE (run 5)
Number of features in final model: 35


[I 2025-09-10 13:41:25,593] A new study created in memory with name: no-name-c3edc39b-f3b4-42df-9dcc-15997de63b1b



STARTING RUN 6/10


  0%|          | 0/5 [00:00<?, ?it/s]


[A                                  

[I 2025-09-10 13:41:25,814] Trial 0 finished with value: 0.6341461008127675 and parameters: {'feature_selection': 'None', 'kernel': 'poly', 'C': 0.11507077968722317, 'gamma': 'auto', 'degree': 2}. Best is trial 0 with value: 0.6341461008127675.



100%|██████████| 5/5 [00:00<00:00, 11.78it/s]


[I 2025-09-10 13:41:26,045] Trial 4 finished with value: 0.7785412118745452 and parameters: {'feature_selection': 'None', 'kernel': 'rbf', 'C': 1.0168415884490283, 'gamma': 'scale'}. Best is trial 4 with value: 0.7785412118745452.
Got MRMR features


100%|██████████| 5/5 [00:00<00:00, 16.56it/s]


Got MRMR features


100%|██████████| 5/5 [00:00<00:00, 16.62it/s]


Got MRMR features


100%|██████████| 5/5 [00:00<00:00, 16.48it/s]


Got MRMR features


100%|██████████| 5/5 [00:00<00:00, 16.36it/s]


Got MRMR features
[I 2025-09-10 13:41:27,919] Trial 1 finished with value: 0.6677587844254511 and parameters: {'feature_selection': 'MRMR', 'k_features': 5, 'kernel': 'poly', 'C': 74.9021059933421, 'gamma': 'scale', 'degree': 5}. Best is trial 4 with value: 0.7785412118745452.
[I 2025-09-10 13:46:06,956] Trial 3 finished with value: 0.7213847880514547 and parameters: {'feature_selection': 'RFE', 'k_features': 105, 'kernel': 'poly', 'C': 11.523609834946349, 'gamma': 'scale', 'degree': 3}. Best is trial 4 with value: 0.7785412118745452.
[I 2025-09-10 13:46:14,406] Trial 2 finished with value: 0.7255281755281755 and parameters: {'feature_selection': 'RFE', 'k_features': 95, 'kernel': 'linear', 'C': 3.735196862645632, 'gamma': 'scale'}. Best is trial 4 with value: 0.7785412118745452.

Binary Classification Optimization Results:
Best ROC AUC Score: 0.7785
Best Parameters:
  feature_selection: None
  kernel: rbf
  C: 1.0168415884490283
  gamma: scale

Final Model Evaluation on Test Set:
Accu

[I 2025-09-10 13:47:39,930] A new study created in memory with name: no-name-13eff9fa-6e89-4fa0-9b07-73a1709d364b



STARTING RUN 7/10


  0%|          | 0/5 [00:00<?, ?it/s]

[I 2025-09-10 13:47:40,251] Trial 1 finished with value: 0.7855804689138022 and parameters: {'feature_selection': 'None', 'kernel': 'rbf', 'C': 45.74781833591746, 'gamma': 'scale'}. Best is trial 1 with value: 0.7855804689138022.
[I 2025-09-10 13:47:40,276] Trial 2 finished with value: 0.7852308185641519 and parameters: {'feature_selection': 'None', 'kernel': 'rbf', 'C': 65.72018507622396, 'gamma': 'auto'}. Best is trial 1 with value: 0.7855804689138022.


100%|██████████| 65/65 [00:05<00:00, 11.29it/s]


Got MRMR features


100%|██████████| 65/65 [00:04<00:00, 14.00it/s]


Got MRMR features


100%|██████████| 65/65 [00:04<00:00, 13.94it/s]


Got MRMR features


100%|██████████| 65/65 [00:04<00:00, 13.84it/s]


Got MRMR features


100%|██████████| 65/65 [00:04<00:00, 14.10it/s]


Got MRMR features
[I 2025-09-10 13:48:07,582] Trial 3 finished with value: 0.6625664458997793 and parameters: {'feature_selection': 'MRMR', 'k_features': 65, 'kernel': 'poly', 'C': 1.2557638509000646, 'gamma': 'scale', 'degree': 4}. Best is trial 1 with value: 0.7855804689138022.
[I 2025-09-10 13:53:43,279] Trial 0 finished with value: 0.7496003996003996 and parameters: {'feature_selection': 'RFE', 'k_features': 45, 'kernel': 'linear', 'C': 14.758432380379935, 'gamma': 'auto'}. Best is trial 1 with value: 0.7855804689138022.
[I 2025-09-10 13:54:02,040] Trial 4 finished with value: 0.6408770242103575 and parameters: {'feature_selection': 'RFE', 'k_features': 15, 'kernel': 'poly', 'C': 20.625241768992275, 'gamma': 'auto', 'degree': 2}. Best is trial 1 with value: 0.7855804689138022.

Binary Classification Optimization Results:
Best ROC AUC Score: 0.7856
Best Parameters:
  feature_selection: None
  kernel: rbf
  C: 45.74781833591746
  gamma: scale

Final Model Evaluation on Test Set:
Accu

[I 2025-09-10 13:55:30,287] A new study created in memory with name: no-name-56776cb5-6cf8-4c06-85b8-46504eebdf28



STARTING RUN 8/10


  0%|          | 0/5 [00:00<?, ?it/s]

100%|██████████| 75/75 [00:09<00:00,  7.51it/s]


Got MRMR features


100%|██████████| 75/75 [00:08<00:00,  8.36it/s]


Got MRMR features


100%|██████████| 75/75 [00:10<00:00,  7.01it/s]


Got MRMR features


100%|██████████| 75/75 [00:09<00:00,  7.90it/s]


Got MRMR features


100%|██████████| 75/75 [00:09<00:00,  7.67it/s]


Got MRMR features
[I 2025-09-10 13:56:22,973] Trial 1 finished with value: 0.7494153994153994 and parameters: {'feature_selection': 'MRMR', 'k_features': 75, 'kernel': 'poly', 'C': 0.10704132765705353, 'gamma': 'auto', 'degree': 3}. Best is trial 1 with value: 0.7494153994153994.


100%|██████████| 25/25 [00:03<00:00,  7.05it/s]


Got MRMR features


100%|██████████| 25/25 [00:03<00:00,  7.08it/s]


Got MRMR features


100%|██████████| 25/25 [00:03<00:00,  7.81it/s]


Got MRMR features


100%|██████████| 25/25 [00:03<00:00,  7.97it/s]


Got MRMR features


100%|██████████| 25/25 [00:03<00:00,  7.49it/s]


Got MRMR features
[I 2025-09-10 13:56:40,812] Trial 4 finished with value: 0.7478996312329647 and parameters: {'feature_selection': 'MRMR', 'k_features': 25, 'kernel': 'rbf', 'C': 0.14062706363532934, 'gamma': 'auto'}. Best is trial 1 with value: 0.7494153994153994.
[I 2025-09-10 14:04:32,840] Trial 3 finished with value: 0.7774675941342608 and parameters: {'feature_selection': 'RFE', 'k_features': 35, 'kernel': 'rbf', 'C': 0.16419427593925218, 'gamma': 'auto'}. Best is trial 3 with value: 0.7774675941342608.
[I 2025-09-10 14:04:33,097] Trial 0 finished with value: 0.778169978169978 and parameters: {'feature_selection': 'RFE', 'k_features': 35, 'kernel': 'rbf', 'C': 0.17333030022252802, 'gamma': 'scale'}. Best is trial 0 with value: 0.778169978169978.
[I 2025-09-10 14:04:34,058] Trial 2 finished with value: 0.7744163244163245 and parameters: {'feature_selection': 'RFE', 'k_features': 35, 'kernel': 'linear', 'C': 4.597080998418853, 'gamma': 'scale'}. Best is trial 0 with value: 0.778169

[I 2025-09-10 14:05:39,940] A new study created in memory with name: no-name-af8d2125-d2a8-49da-8df8-6599029e5c5d



STARTING RUN 9/10


  0%|          | 0/5 [00:00<?, ?it/s]

[I 2025-09-10 14:05:40,259] Trial 0 finished with value: 0.652260085593419 and parameters: {'feature_selection': 'None', 'kernel': 'poly', 'C': 5.7297298847783225, 'gamma': 'scale', 'degree': 4}. Best is trial 0 with value: 0.652260085593419.
[I 2025-09-10 14:05:40,623] Trial 4 finished with value: 0.6868785535452202 and parameters: {'feature_selection': 'None', 'kernel': 'poly', 'C': 11.815455243493897, 'gamma': 'auto', 'degree': 3}. Best is trial 4 with value: 0.6868785535452202.


100%|██████████| 75/75 [00:06<00:00, 11.38it/s]


Got MRMR features


100%|██████████| 75/75 [00:05<00:00, 13.50it/s]


Got MRMR features


100%|██████████| 75/75 [00:05<00:00, 13.23it/s]


Got MRMR features


100%|██████████| 75/75 [00:05<00:00, 13.49it/s]


Got MRMR features


100%|██████████| 75/75 [00:05<00:00, 13.24it/s]


Got MRMR features
[I 2025-09-10 14:06:13,171] Trial 1 finished with value: 0.7627699461032794 and parameters: {'feature_selection': 'MRMR', 'k_features': 75, 'kernel': 'rbf', 'C': 36.02768825729103, 'gamma': 'scale'}. Best is trial 1 with value: 0.7627699461032794.
[I 2025-09-10 14:11:27,969] Trial 3 finished with value: 0.7218176884843552 and parameters: {'feature_selection': 'RFE', 'k_features': 65, 'kernel': 'linear', 'C': 60.16995658578677, 'gamma': 'auto'}. Best is trial 1 with value: 0.7627699461032794.
[I 2025-09-10 14:11:32,007] Trial 2 finished with value: 0.7937272603939272 and parameters: {'feature_selection': 'RFE', 'k_features': 55, 'kernel': 'rbf', 'C': 3.6425647752324055, 'gamma': 'auto'}. Best is trial 2 with value: 0.7937272603939272.

Binary Classification Optimization Results:
Best ROC AUC Score: 0.7937
Best Parameters:
  feature_selection: RFE
  k_features: 55
  kernel: rbf
  C: 3.6425647752324055
  gamma: auto

Final Model Evaluation on Test Set:
Accuracy: 0.6833
P

[I 2025-09-10 14:12:37,194] A new study created in memory with name: no-name-4ac95f46-e5fc-49cb-8ef1-986d12f7d300



STARTING RUN 10/10


  0%|          | 0/5 [00:00<?, ?it/s]

[I 2025-09-10 14:12:37,420] Trial 0 finished with value: 0.6228123728123729 and parameters: {'feature_selection': 'None', 'kernel': 'poly', 'C': 0.4480321001313627, 'gamma': 'auto', 'degree': 4}. Best is trial 0 with value: 0.6228123728123729.
[I 2025-09-10 14:12:37,502] Trial 3 finished with value: 0.7329232495899163 and parameters: {'feature_selection': 'None', 'kernel': 'linear', 'C': 23.37789259053337, 'gamma': 'auto'}. Best is trial 3 with value: 0.7329232495899163.
[I 2025-09-10 14:12:37,714] Trial 4 finished with value: 0.7382204215537549 and parameters: {'feature_selection': 'None', 'kernel': 'rbf', 'C': 0.309602614710893, 'gamma': 'scale'}. Best is trial 4 with value: 0.7382204215537549.



[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
100%|██████████| 55/55 [00:03<00:00, 17.28it/s]


Got MRMR features


100%|██████████| 75/75 [00:04<00:00, 16.96it/s]


Got MRMR features



[A
[A
[A
[A
[A
[A
100%|██████████| 55/55 [00:03<00:00, 16.79it/s]

[A
[A

Got MRMR features



[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
100%|██████████| 75/75 [00:02<00:00, 25.16it/s]


Got MRMR features



[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
100%|██████████| 75/75 [00:03<00:00, 21.37it/s]

[A

Got MRMR features



[A
[A
[A
100%|██████████| 55/55 [00:02<00:00, 18.50it/s]


Got MRMR features



[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
100%|██████████| 75/75 [00:03<00:00, 20.26it/s]

[A

Got MRMR features



[A
[A
[A
[A
[A
[A
100%|██████████| 55/55 [00:03<00:00, 17.56it/s]


Got MRMR features


100%|██████████| 75/75 [00:02<00:00, 25.59it/s]


Got MRMR features
[I 2025-09-10 14:13:00,446] Trial 1 finished with value: 0.7705146705146706 and parameters: {'feature_selection': 'MRMR', 'k_features': 75, 'kernel': 'linear', 'C': 0.466019631674853, 'gamma': 'scale'}. Best is trial 1 with value: 0.7705146705146706.


100%|██████████| 55/55 [00:01<00:00, 28.75it/s]


Got MRMR features
[I 2025-09-10 14:13:04,134] Trial 2 finished with value: 0.700888000888001 and parameters: {'feature_selection': 'MRMR', 'k_features': 55, 'kernel': 'linear', 'C': 90.39436647324135, 'gamma': 'auto'}. Best is trial 1 with value: 0.7705146705146706.

Binary Classification Optimization Results:
Best ROC AUC Score: 0.7705
Best Parameters:
  feature_selection: MRMR
  k_features: 75
  kernel: linear
  C: 0.466019631674853
  gamma: scale


100%|██████████| 75/75 [00:02<00:00, 29.42it/s]


Got MRMR features

Final Model Evaluation on Test Set:
Accuracy: 0.6667
Precision: 0.7097
Recall: 0.6667
F1 Score: 0.6875
ROC AUC: 0.7273

FEATURE IMPORTANCE (run 10)
Number of features in final model: 75

SUMMARY ACROSS ALL RUNS
best_cv_score: 0.7730 ± 0.0211
accuracy: 0.6717 ± 0.0315
precision: 0.6893 ± 0.0276
recall: 0.7364 ± 0.0623
f1: 0.7107 ± 0.0347
roc_auc: 0.7490 ± 0.0320


NameError: name 'results_outpath' is not defined