In [1]:
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from sklearn.pipeline import Pipeline
import numpy as np

In [2]:
param_grids = {
    'lr': {
        'classifier__C': [0.1, 1, 10],           # Regularization strength
        'classifier__penalty': ['l1', 'l2'],    # Regularization type
        'classifier__solver': ['liblinear']     # Solver that supports l1 and l2
    },
    'rf': {
        'classifier__n_estimators': [50, 100, 200],
        'classifier__max_depth': [None, 10, 20, 30]
    },
    'svc': {
        'classifier__C': [0.1, 1, 10],          # Regularization strength
        'classifier__kernel': ['linear', 'rbf']
    },
    'knn': {
        'classifier__n_neighbors': [3, 5, 7],
        'classifier__weights': ['uniform', 'distance']
    },
    'et': {
        'classifier__n_estimators': [50, 100, 200],
        'classifier__max_depth': [None, 10, 20, 30]
    },
    'xgb': {
        'classifier__n_estimators': [50, 100, 200],
        'classifier__learning_rate': [0.01, 0.1, 0.2],
        'classifier__max_depth': [3, 5, 7]
    },
    'ada': {
        'classifier__n_estimators': [50, 100, 200],
        'classifier__learning_rate': [0.01, 0.1, 0.5]
    }
}

In [3]:
from sklearn.ensemble import StackingClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, ExtraTreesClassifier,AdaBoostClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

In [4]:
from xgboost import XGBClassifier

In [5]:
base_models = [
    ('lr', LogisticRegression(max_iter=5000, random_state=42)),              # Linear model
    ('rf', RandomForestClassifier(n_estimators=100, random_state=42)),       # Tree-based model
    ('svc', SVC(kernel='linear', probability=True, random_state=42)),        # SVM with linear kernel
    ('knn', KNeighborsClassifier(n_neighbors=5)),                            # K-Nearest Neighbors
    ('et', ExtraTreesClassifier(n_estimators=100, random_state=42)),         # Extra Trees Classifier
    ('xgb', XGBClassifier( eval_metric='logloss', random_state=42)),
    ('ada', AdaBoostClassifier(n_estimators=100, random_state=42)) 
]

In [6]:
from sklearn.preprocessing import StandardScaler

In [7]:
import pandas as pd

In [None]:
df = pd.read_csv('')

In [None]:
best_models = {}
for name, model in base_models:
    print(f"Tuning hyperparameters for: {name}")
    
    # Create a pipeline for standardization + model
    pipeline = Pipeline([
        ('scaler', StandardScaler()),
        ('classifier', model)
    ])
    
    # Choose GridSearchCV or RandomizedSearchCV
    search = RandomizedSearchCV(
        pipeline,
        param_distributions=param_grids[name],
        n_iter=20,  # Use GridSearchCV for exhaustive search, RandomizedSearchCV for faster results
        scoring='accuracy',
        cv=5,
        random_state=42,
        n_jobs=-1
    )
    
    # Fit the model
    search.fit(X_train, Y_train)
    
    # Store the best model
    best_models[name] = search.best_estimator_
    print(f"Best parameters for {name}: {search.best_params_}")
    print(f"Best cross-validated accuracy for {name}: {search.best_score_:.4f}\n")
