In [None]:
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
from sklearn.linear_model import LogisticRegression, RidgeClassifier
from sklearn.svm import SVC
from sklearn.datasets import make_classification
import matplotlib.pyplot as plt
from lightgbm import LGBMClassifier
from xgboost import XGBClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, StackingClassifier
from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import randint
import pandas as pd
import numpy as np

In [2]:
x, y = make_classification(n_samples=1000, n_classes=2, weights=[0.7, 0.3], n_features=10, random_state=42)

x_train, x_test, y_train, y_test = train_test_split(
    x, y, test_size=0.2, random_state=42
)

**Tuning Random Forest**

In [4]:
param_dist = {
    'n_estimators': randint(50, 300),
    'max_depth': [5, 10, 15, 20, None],
    'min_samples_split': randint(2, 20),
    'min_samples_leaf': randint(1, 10),
    'max_features': ['sqrt', 'log2', None]
}

rf_random = RandomizedSearchCV(
    RandomForestClassifier(random_state=42),
    param_dist,
    n_iter=20,
    cv=5,
    scoring='accuracy',
    random_state=42,
    n_jobs=-1
)

rf_random.fit(x_train, y_train)
print(f"Best params: {rf_random.best_params_}")
print(f"Best score: {rf_random.best_score_:.3f}")

Best params: {'max_depth': 20, 'max_features': 'sqrt', 'min_samples_leaf': 3, 'min_samples_split': 6, 'n_estimators': 100}
Best score: 0.935


**Tuning XGBoost**

In [5]:
xgb_param_grid = {
    'n_estimators': [50, 100, 200],
    'learning_rate': [0.01, 0.1, 0.2],
    'max_depth': [3, 5, 7],
    'subsample': [0.8, 0.9, 1.0],
    'colsample_bytree': [0.8, 0.9, 1.0],
    'gamma': [0, 0.1, 0.2],
    'reg_alpha': [0, 0.1, 1],
    'reg_lambda': [1, 1.5, 2]
}

xgb_grid = RandomizedSearchCV(
    XGBClassifier(random_state=42),
    xgb_param_grid,
    n_iter=20,
    cv=5,
    scoring='accuracy',
    n_jobs=-1
)
    
xgb_grid.fit(x_train, y_train)
print(f"Best XGBoost params: {xgb_grid.best_params_}")

Best XGBoost params: {'subsample': 1.0, 'reg_lambda': 1.5, 'reg_alpha': 0.1, 'n_estimators': 100, 'max_depth': 3, 'learning_rate': 0.1, 'gamma': 0, 'colsample_bytree': 0.8}


**Tuning Stacking Meta-Learner**

In [6]:
# Tune meta-learner in stacking
base_models = [
    ('rf', RandomForestClassifier(n_estimators=100, random_state=42)),
    ('gb', GradientBoostingClassifier(n_estimators=100, random_state=42))
]

# Try different meta-learners
meta_learners = {
    'Logistic Regression': LogisticRegression(random_state=42, max_iter=1000),
    'Ridge': RidgeClassifier(random_state=42),
    'SVM': SVC(probability=True, random_state=42)
}

best_meta = None
best_score = 0

for name, meta in meta_learners.items():
    stacking = StackingClassifier(
        estimators=base_models,
        final_estimator=meta,
        cv=5
    )
    
    scores = cross_val_score(
        stacking, x_train, y_train,
        cv=5, scoring='accuracy', n_jobs=-1
    )
    
    if scores.mean() > best_score:
        best_score = scores.mean()
        best_meta = name
    
    print(f"{name}: {scores.mean():.3f} (+/- {scores.std():.3f})")

print(f"\nBest meta-learner: {best_meta}")

Logistic Regression: 0.934 (+/- 0.015)
Ridge: 0.935 (+/- 0.013)
SVM: 0.934 (+/- 0.013)

Best meta-learner: Ridge
