In [1]:
# load dataset
from sklearn.datasets import load_breast_cancer
import pandas as pd

data = load_breast_cancer()
X = pd.DataFrame(data.data, columns=data.feature_names)
y = data.target


In [2]:
# split data
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)


In [3]:
# default random forest
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

rf_default = RandomForestClassifier(random_state=42)
rf_default.fit(x_train, y_train)


In [4]:
# default random forest
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

rf_default = RandomForestClassifier(random_state=42)
rf_default.fit(x_train, y_train)


In [5]:
# accuracy without tuning
y_pred_default = rf_default.predict(x_test)
acc_without_rscv = accuracy_score(y_test, y_pred_default)

acc_without_rscv


0.9649122807017544

In [6]:
# hyperparameter grid
param_grid = {
    'n_estimators': [100, 200, 300],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 3, 5, 10],
    'min_samples_leaf': [1, 2, 4],
    'max_features': ['auto', 'sqrt', 'log2']
}


In [7]:
# randomized search setup
from sklearn.model_selection import RandomizedSearchCV

random_search = RandomizedSearchCV(
    estimator=RandomForestClassifier(random_state=42),
    param_distributions=param_grid,
    n_iter=30,
    scoring='accuracy',
    cv=5,
    n_jobs=-1,
    random_state=42,
    verbose=2
)


In [8]:
# train randomized search
random_search.fit(x_train, y_train)


Fitting 5 folds for each of 30 candidates, totalling 150 fits


35 fits failed out of a total of 150.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
35 fits failed with the following error:
Traceback (most recent call last):
  File "/usr/local/lib/python3.12/dist-packages/sklearn/model_selection/_validation.py", line 866, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/usr/local/lib/python3.12/dist-packages/sklearn/base.py", line 1382, in wrapper
    estimator._validate_params()
  File "/usr/local/lib/python3.12/dist-packages/sklearn/base.py", line 436, in _validate_params
    validate_parameter_constraints(
  File "/usr/local/lib/python3.12/dist-packages/sklearn/utils/_param_validation.py", line 98, in validate_parameter_constraints
    raise InvalidParameterError(
skle

In [10]:
# accuracy with randomized search
y_pred_rscv = random_search.best_estimator_.predict(x_test)
acc_with_rscv = accuracy_score(y_test, y_pred_rscv)

acc_with_rscv


0.9649122807017544

In [11]:
print("WITHOUT RandomSearchCV:", acc_without_rscv)
print("WITH RandomSearchCV:", acc_with_rscv)


WITHOUT RandomSearchCV: 0.9649122807017544
WITH RandomSearchCV: 0.9649122807017544
