In [36]:
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV, train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix
from sklearn import datasets
from scipy.stats import randint

## Data

In [25]:
iris = datasets.load_iris()
X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size=0.33)

## Random Forest with GridSearch Parametric

In [29]:
class RandomForestModel1:
    def __init__(self):
        self.model = RandomForestClassifier()

    def train(self, X, y, param_grid):
        grid_search = GridSearchCV(self.model, param_grid, cv=5)
        grid_search.fit(X, y)
        self.best_model = grid_search.best_estimator_
        return grid_search.best_params_

    def predict(self, X):
        return self.best_model.predict(X)

In [30]:
param_grid1 = {
    'n_estimators': [50, 100, 200],
    'max_depth':[None, 10, 20, 30],
    'min_samples_split': [2, 5, 10]
}

rf_model = RandomForestModel1()
best_params = rf_model.train(X_train, y_train, param_grid1)

print(f"Best parameters found: {best_params}")

Best parameters found: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 100}


In [31]:
y_pred = rf_model.predict(X_test)

In [32]:
print(f"classification report: \n{classification_report(y_test, y_pred)}")

classification report: 
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        19
           1       0.94      0.88      0.91        17
           2       0.87      0.93      0.90        14

    accuracy                           0.94        50
   macro avg       0.93      0.94      0.94        50
weighted avg       0.94      0.94      0.94        50



In [33]:
print(f"confusion matrix: \n{confusion_matrix(y_test, y_pred)}")

confusion matrix: 
[[19  0  0]
 [ 0 15  2]
 [ 0  1 13]]


## Random Forest with RandomizedSearch

In [37]:
class RandomForestModel2:
    def __init__(self):
        self.model = RandomForestClassifier()

    def train(self, X, y, param_dist, n_iter_search=10):
        randomized_search = RandomizedSearchCV(self.model, param_dist, n_iter = n_iter_search, cv=5)
        randomized_search.fit(X, y)
        self.best_model = randomized_search.best_estimator_
        return randomized_search.best_params_

    def predict(self, X):
        return self.best_model.predict(X)

In [38]:
param_dist = {
    'n_estimators': randint(50, 200),
    'max_depth': randint(10, 50),
    'min_samples_split': randint(2, 10)
}

rf_model = RandomForestModel2()
best_params = rf_model.train(X_train, y_train, param_dist)

print("Best Parameters found: ", best_params)

y_pred = rf_model.predict(X_test)

Best Parameters found:  {'max_depth': 33, 'min_samples_split': 4, 'n_estimators': 95}


In [40]:
print(f"Classification report: \n{classification_report(y_test, y_pred)}")

Classification report: 
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        19
           1       0.94      0.88      0.91        17
           2       0.87      0.93      0.90        14

    accuracy                           0.94        50
   macro avg       0.93      0.94      0.94        50
weighted avg       0.94      0.94      0.94        50



In [41]:
print(f"Confusion Matrix :\n{confusion_matrix(y_test, y_pred)}")

Confusion Matrix :
[[19  0  0]
 [ 0 15  2]
 [ 0  1 13]]
