In [None]:
import itertools
import pandas as pd
import numpy as np
from sklearn.model_selection import StratifiedKFold, cross_val_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, f1_score, make_scorer
from sklearn.preprocessing import StandardScaler

data_path = 'categorized_data.csv'
df = pd.read_csv(data_path)

special_feature_columns = ["seedNumber_1","seedEbest_1","seedNumber_3","Pu1_1","Pu2_1","pumin1_4u",
            "pumin5_8u","pumin1_4d","pumin5_8d","E_diff_12","E_3","E_1","E_hybrid_1"]
y = df['Y']

models = {
    'Logistic Regression': LogisticRegression(max_iter=1000),
    'Random Forest': RandomForestClassifier(n_estimators=100),
    'XGBoost': XGBClassifier(use_label_encoder=False, eval_metric='mlogloss'),
    'SVM': SVC(),
    'KNN': KNeighborsClassifier()
}

scoring = {'accuracy': make_scorer(accuracy_score), 'f1_macro': make_scorer(f1_score, average='macro')}

results = []
kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

for r in range(5, 14):
    for combination in itertools.combinations(special_feature_columns, r):
        X = df[list(combination)]
        scaler = StandardScaler()
        X = scaler.fit_transform(X)
        
        for name, model in models.items():
            scores = {}
            for metric, scorer in scoring.items():
                score = cross_val_score(model, X, y, cv=kf, scoring=scorer)
                scores[metric] = {'mean': np.mean(score), 'std': np.std(score)}
            results.append({
                'model': name,
                'features': combination,
                'accuracy_mean': scores['accuracy']['mean'],
                'accuracy_std': scores['accuracy']['std'],
                'f1_macro_mean': scores['f1_macro']['mean'],
                'f1_macro_std': scores['f1_macro']['std']
            })

results_df = pd.DataFrame(results)
results_df.to_csv('model_comparison_results.csv', index=False)
print("Results saved to 'model_comparison_results.csv'")



Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encode