In [4]:
from data_loader import process_files
from feature_selection_MAO import model_with_metaheuristic_feature_selection
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, BaggingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import f1_score
from functools import partial
import numpy as np
import pandas as pd
import warnings
import time
warnings.filterwarnings("ignore")

# Load datasets
datasets, dataset_names = process_files("all_datasets")

# Define models
models = {
    "SVM": partial(SVC, kernel="linear", max_iter=1000),
    "k-NN (k=1)": partial(KNeighborsClassifier, n_neighbors=1),
    "k-NN (k=3)": partial(KNeighborsClassifier, n_neighbors=3),
    "k-NN (k=5)": partial(KNeighborsClassifier, n_neighbors=5),
    "Bayesian": GaussianNB,
    "Logistic Regression": partial(LogisticRegression, max_iter=200),
    "MLP": partial(MLPClassifier, max_iter=300),
    "Random Forest": partial(RandomForestClassifier, n_estimators=100),
    "Decision Tree": DecisionTreeClassifier,
}

# Stratified K-Fold configuration
kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

# Initialize results storage
f1_results = []
time_results = []
selected_features_results = []

# Process each dataset
for dataset, dataset_name in zip(datasets, dataset_names):
    print(f"Processing dataset: {dataset_name}")
    X = dataset.iloc[:, :-1].values
    y = dataset.iloc[:, -1].values

    dataset_f1 = {"Dataset": dataset_name}
    dataset_time = {"Dataset": dataset_name}
    dataset_features = {"Dataset": dataset_name}

    for model_name, model in models.items():
        # Without feature selection
        fold_f1_scores = []
        start_time = time.time()

        for train_idx, test_idx in kf.split(X, y):
            X_train, X_test = X[train_idx], X[test_idx]
            y_train, y_test = y[train_idx], y[test_idx]

            model_instance = model()
            model_instance.fit(X_train, y_train)
            y_pred = model_instance.predict(X_test)
            fold_f1_scores.append(f1_score(y_test, y_pred))

        mean_f1_without = np.mean(fold_f1_scores)
        elapsed_time_without = time.time() - start_time

        # Save results without feature selection
        dataset_f1[f"{model_name}"] = mean_f1_without
        dataset_time[f"{model_name}"] = elapsed_time_without

        # With feature selection (MAO)
        start_time = time.time()

        meta_results = model_with_metaheuristic_feature_selection(
            datasets=[dataset],
            datasets_names=[dataset_name],
            model=model,
            mao_metric = "alpha",  ##alpha, alpha-mean o sklearn metric
            evaluation_metric = f1_score, # sklearn metric
            validation_method="stratified_kfold",
            validation_params={"n_splits": 5, "shuffle": True, "random_state": 42},
            pop_size=50,
            max_iter=1000,
            early_stopping_steps=20,
            transition_prob=0.5,
            injury_prob=0.3,
            regeneration_prob=0.1,
            lambda_factor=0.5,
            k=3,
            number_of_steps = 100
        )

        elapsed_time_with = time.time() - start_time
        mean_f1_with = meta_results[dataset_name]["mean_metric"]
        selected_features = meta_results[dataset_name]["selected_features"]

        # Save results with feature selection
        dataset_f1[f"{model_name}-MAO"] = mean_f1_with
        dataset_time[f"{model_name}-MAO"] = elapsed_time_with
        dataset_features[f"{model_name}-Selected Features"] = selected_features

    # Append results
    f1_results.append(dataset_f1)
    time_results.append(dataset_time)
    selected_features_results.append(dataset_features)

# Save results to Excel
f1_df = pd.DataFrame(f1_results)
time_df = pd.DataFrame(time_results)
features_df = pd.DataFrame(selected_features_results)

f1_df.to_excel("alfa-mean_Results.xlsx", index=False)
time_df.to_excel("alfa-mean_Time_Results.xlsx", index=False)
features_df.to_excel("alfa-mean_Selected_Features.xlsx", index=False)


Processing dataset: twonorm with F1 metric
Processing dataset: spectfheart with F1 metric
Processing dataset: appendicitis with F1 metric
Processing dataset: pima with F1 metric
Processing dataset: Iris with F1 metric
Processing dataset: titanic with F1 metric
Processing dataset: mammographic with F1 metric
Processing dataset: Nutt with F1 metric
Processing dataset: spambase with F1 metric
Processing dataset: haberman with F1 metric
Processing dataset: heart with F1 metric
Processing dataset: australian with F1 metric
Processing dataset: sonar with F1 metric
Processing dataset: ring with F1 metric
Processing dataset: wdbc with F1 metric
Processing dataset: hepatitis with F1 metric
Processing dataset: phoneme with F1 metric
Processing dataset: banana with F1 metric
Processing dataset: bands with F1 metric
Processing dataset: ionosphere with F1 metric
Processing dataset: bupa with F1 metric
Processing dataset: wisconsin with F1 metric
