In [1]:
from sklearn.datasets import load_breast_cancer
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import BaggingClassifier
from sklearn.model_selection import StratifiedKFold, cross_validate
import numpy as np

In [2]:
# Load dataset
data = load_breast_cancer()
X, y = data.data, data.target

In [3]:
# Define bagging classifier with a decision tree as the base estimator
bagging_model = BaggingClassifier(
    base_estimator=DecisionTreeClassifier(class_weight='balanced'),
    n_estimators=100,            # number of bootstrapped trees
    bootstrap=True,              # enables sampling with replacement
    oob_score=True,              # enables OOB evaluation
    random_state=42,             # for reproducibility
    n_jobs=-1                    # use all CPU cores
)

In [4]:
# Fit the model
bagging_model.fit(X, y)

BaggingClassifier(base_estimator=DecisionTreeClassifier(class_weight='balanced'),
                  n_estimators=100, n_jobs=-1, oob_score=True, random_state=42)

In [5]:
# Print out-of-bag score (optional)
print("OOB Accuracy Estimate: %.2f" % bagging_model.oob_score_)

OOB Accuracy Estimate: 0.97


In [6]:
# Cross-validation
scoring_metrics = ['accuracy', 'precision', 'recall']
cv = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)

scores = cross_validate(bagging_model, X, y, cv=cv, scoring=scoring_metrics)

# Print performance metrics
print('Mean Accuracy: %.2f' % np.mean(scores['test_accuracy']))
print('Mean Precision: %.2f' % np.mean(scores['test_precision']))
print('Mean Recall: %.2f' % np.mean(scores['test_recall']))

Mean Accuracy: 0.96
Mean Precision: 0.96
Mean Recall: 0.98
