In [8]:
import numpy as np
from sklearn.datasets import make_classification
from sklearn.model_selection import GridSearchCV, cross_val_score, StratifiedKFold
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier, AdaBoostClassifier, GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.metrics import make_scorer, accuracy_score, precision_score, recall_score, roc_auc_score


In [9]:
search_space = [
    {
        'classifier': [RandomForestClassifier()],
        'classifier__n_estimators': [16, 32],
        'classifier__max_depth': [None],  # 'null' is equivalent to None in Python
        'classifier__min_samples_split': [2],
        'classifier__min_samples_leaf': [1],
    },
    {
        'classifier': [ExtraTreesClassifier()],
        'classifier__n_estimators': [16, 32],
        'classifier__max_depth': [None],
        'classifier__min_samples_split': [2],
        'classifier__min_samples_leaf': [1],
    },
    {
        'classifier': [AdaBoostClassifier()],
        'classifier__n_estimators': [16, 32],
        'classifier__learning_rate': [0.01],
    },
    {
        'classifier': [GradientBoostingClassifier()],
        'classifier__n_estimators': [16, 32],
        'classifier__learning_rate': [0.01, 0.1],
        'classifier__max_depth': [5],
    }
]

In [10]:


# Example synthetic dataset
X, y = make_classification(n_samples=1000, n_features=20, n_informative=2, n_redundant=10, random_state=42)

# Define a 10-fold cross-validation split
cv = StratifiedKFold(n_splits=10, random_state=42, shuffle=True)

# Creating a pipeline with a standard scaler and a placeholder for the classifier
pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('classifier', SVC())
])

# Define multiple scoring metrics
scoring = {
    'accuracy': make_scorer(accuracy_score),
    'precision': make_scorer(precision_score),
    'recall': make_scorer(recall_score),
    'roc_auc_score': make_scorer(roc_auc_score)
}



In [11]:
grid_search = GridSearchCV(pipeline, search_space, cv=cv, verbose=1, n_jobs=-1, scoring=scoring, refit='roc_auc_score')

# Execute the grid search
grid_search.fit(X, y)

Fitting 10 folds for each of 10 candidates, totalling 100 fits


In [17]:
cv_results = grid_search.cv_results_

for scorer in scoring:
    sample_score_mean = cv_results['mean_test_%s' % (scorer)]
    sample_score_std = cv_results['std_test_%s' % (scorer)]
    print(f"Mean {scorer} score across the folds: {sample_score_mean.mean():.3f} (+/- {sample_score_std.mean():.3f})")

Mean accuracy score across the folds: 0.900 (+/- 0.019)
Mean precision score across the folds: 0.905 (+/- 0.043)
Mean recall score across the folds: 0.896 (+/- 0.042)
Mean roc_auc_score score across the folds: 0.901 (+/- 0.019)


In [18]:
grid_search.best_estimator_

In [19]:
grid_search.best_params_

{'classifier': RandomForestClassifier(n_estimators=16),
 'classifier__max_depth': None,
 'classifier__min_samples_leaf': 1,
 'classifier__min_samples_split': 2,
 'classifier__n_estimators': 16}

In [20]:
grid_search.best_score_

0.918793595205717

In [21]:
grid_search

In [22]:
cv_results

{'mean_fit_time': array([0.11341937, 0.19121616, 0.03996608, 0.08383377, 0.12894871,
        0.23411698, 0.35794437, 0.71813216, 0.36578803, 0.61224074]),
 'std_fit_time': array([0.0192808 , 0.01286633, 0.00366282, 0.00562174, 0.01123505,
        0.00670233, 0.01440088, 0.02394098, 0.01432255, 0.03255893]),
 'mean_score_time': array([0.00940688, 0.00820084, 0.00795372, 0.01230147, 0.00780113,
        0.01100574, 0.00724185, 0.00572016, 0.00540183, 0.00354924]),
 'std_score_time': array([0.00365921, 0.00147003, 0.00214735, 0.00363282, 0.00166101,
        0.00471274, 0.00446894, 0.00265121, 0.00119878, 0.00123749]),
 'param_classifier': masked_array(data=[RandomForestClassifier(n_estimators=16),
                    RandomForestClassifier(n_estimators=16),
                    ExtraTreesClassifier(), ExtraTreesClassifier(),
                    AdaBoostClassifier(), AdaBoostClassifier(),
                    GradientBoostingClassifier(),
                    GradientBoostingClassifier(),
    