In [1]:
import pandas as pd
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
import matplotlib.pyplot as plt
import seaborn as sns

## Load dataset

In [2]:
data = load_iris()
X = pd.DataFrame(data.data, columns=data.feature_names)
y = pd.Series(data.target)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Base Model

In [3]:
rf = RandomForestClassifier(random_state=42)
rf.fit(X_train, y_train)
y_pred = rf.predict(X_test)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        10
           1       1.00      1.00      1.00         9
           2       1.00      1.00      1.00        11

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30



# Gridsearch CV

In [4]:
param_grid = {
    'n_estimators': [10, 50, 100],
    'max_depth': [None, 5, 10],
    'min_samples_split': [2, 5, 10]
}

grid_search = GridSearchCV(RandomForestClassifier(random_state=42), param_grid, cv=5)
grid_search.fit(X_train, y_train)

print("Best Parameters (GridSearchCV):", grid_search.best_params_)
print("Best Score (GridSearchCV):", grid_search.best_score_)

Best Parameters (GridSearchCV): {'max_depth': None, 'min_samples_split': 2, 'n_estimators': 10}
Best Score (GridSearchCV): 0.95


# Random search CV

In [5]:
from scipy.stats import randint

param_dist = {
    'n_estimators': randint(10, 200),
    'max_depth': [None, 5, 10, 15],
    'min_samples_split': randint(2, 11)
}

random_search = RandomizedSearchCV(RandomForestClassifier(random_state=42), param_distributions=param_dist, n_iter=10, cv=5, random_state=42)
random_search.fit(X_train, y_train)

print("Best Parameters (RandomizedSearchCV):", random_search.best_params_)
print("Best Score (RandomizedSearchCV):", random_search.best_score_)

Best Parameters (RandomizedSearchCV): {'max_depth': 10, 'min_samples_split': 5, 'n_estimators': 102}
Best Score (RandomizedSearchCV): 0.95


# Evaluation on test set

In [8]:
# Get the best estimators
best_grid_model = grid_search.best_estimator_
best_random_model = random_search.best_estimator_

# Evaluate both on test data
from sklearn.metrics import classification_report

print("GridSearchCV Best Model:")
y_pred_grid = best_grid_model.predict(X_test)
print(classification_report(y_test, y_pred_grid))

print("RandomizedSearchCV Best Model:")
y_pred_random = best_random_model.predict(X_test)
print(classification_report(y_test, y_pred_random))


GridSearchCV Best Model:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        10
           1       1.00      1.00      1.00         9
           2       1.00      1.00      1.00        11

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30

RandomizedSearchCV Best Model:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        10
           1       1.00      1.00      1.00         9
           2       1.00      1.00      1.00        11

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30

