In [5]:
from sklearn.model_selection import GridSearchCV, cross_val_score, KFold, train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import make_classification
import numpy as np

In [3]:
x, y = make_classification(n_samples=1000, n_features=20, n_classes=2, random_state=42)

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

In [4]:
# Outer CV: For final performance estimate
outer_cv = KFold(n_splits=5, shuffle=True, random_state=42)

# Inner CV: For hyperparameter tuning
inner_cv = KFold(n_splits=3, shuffle=True, random_state=42)

param_grid = {
    'n_estimators': [50, 100, 200],
    'max_depth': [5, 10, None]
}

In [6]:
outer_score = []
for train_idx, test_idx in outer_cv.split(x):
    x_train_outer, x_test_outer = x[train_idx], x[test_idx]
    y_train_outer, y_test_outer = y[train_idx], y[test_idx]
    
    # Inner CV: Hyperparameter tuning
    grid_search = GridSearchCV(
        RandomForestClassifier(random_state=42),
        param_grid,
        cv=inner_cv,
        scoring='accuracy',
        n_jobs=-1
    )
    grid_search.fit(x_train_outer, y_train_outer)
    
    best_model = grid_search.best_estimator_
    score = best_model.score(x_test_outer, y_test_outer)
    outer_score.append(score)
    print(f"Fold score: {score:.3f}, Best params: {grid_search.best_params_}")

print(f"\nNested CV Mean Score: {np.mean(outer_score):.3f} (+/- {np.std(outer_score):.3f})")

Fold score: 0.895, Best params: {'max_depth': 10, 'n_estimators': 100}
Fold score: 0.870, Best params: {'max_depth': 10, 'n_estimators': 200}
Fold score: 0.935, Best params: {'max_depth': 10, 'n_estimators': 50}
Fold score: 0.880, Best params: {'max_depth': None, 'n_estimators': 200}
Fold score: 0.920, Best params: {'max_depth': None, 'n_estimators': 200}

Nested CV Mean Score: 0.900 (+/- 0.024)
