Question 4: Hyperparameter Tuning with RandomizedSearchCV<br>
Task: Use RandomizedSearchCV to optimize hyperparameters for a RandomForestClassifier.

In [2]:
import numpy as np
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import load_iris
from scipy.stats import randint

# Load the Iris dataset (classification problem)
data = load_iris()
X = data.data
y = data.target

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Initialize a RandomForestClassifier
rf_classifier = RandomForestClassifier(random_state=42)

# Define the parameter grid for RandomizedSearchCV
param_dist = {
    'n_estimators': randint(50, 200),  # Number of trees
        'max_depth': randint(3, 20),       # Maximum depth of the tree
            'min_samples_split': randint(2, 20),  # Minimum samples to split
                'min_samples_leaf': randint(1, 20),   # Minimum samples in a leaf node
                    'max_features': ['sqrt', 'log2'],  # Corrected max_features values
                        'bootstrap': [True, False]          # Whether to use bootstrap samples
                        }

# Initialize RandomizedSearchCV with 5-fold cross-validation and 100 iterations
random_search = RandomizedSearchCV(estimator=rf_classifier, param_distributions=param_dist, 
                                   n_iter=100, cv=5, verbose=2, random_state=42, n_jobs=-1)

# Perform RandomizedSearchCV to find the best hyperparameters
random_search.fit(X_train, y_train)

# Print the best hyperparameters
print(f"Best Hyperparameters: {random_search.best_params_}")

# Evaluate the model on the test set
best_rf_model = random_search.best_estimator_
test_accuracy = best_rf_model.score(X_test, y_test)
print(f"Test Set Accuracy: {test_accuracy:.2f}")

Fitting 5 folds for each of 100 candidates, totalling 500 fits
[CV] END bootstrap=True, max_depth=17, max_features=sqrt, min_samples_leaf=8, min_samples_split=8, n_estimators=171; total time=   0.3s
[CV] END bootstrap=True, max_depth=17, max_features=sqrt, min_samples_leaf=8, min_samples_split=8, n_estimators=171; total time=   0.3s
[CV] END bootstrap=True, max_depth=17, max_features=sqrt, min_samples_leaf=8, min_samples_split=8, n_estimators=171; total time=   0.3s
[CV] END bootstrap=True, max_depth=17, max_features=sqrt, min_samples_leaf=8, min_samples_split=8, n_estimators=171; total time=   0.4s
[CV] END bootstrap=True, max_depth=17, max_features=sqrt, min_samples_leaf=8, min_samples_split=8, n_estimators=171; total time=   0.4s
[CV] END bootstrap=True, max_depth=13, max_features=sqrt, min_samples_leaf=4, min_samples_split=9, n_estimators=180; total time=   0.4s
[CV] END bootstrap=True, max_depth=13, max_features=sqrt, min_samples_leaf=4, min_samples_split=9, n_estimators=180; tota