In [9]:
# import libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

In [3]:
df = sns.load_dataset('iris')
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   sepal_length  150 non-null    float64
 1   sepal_width   150 non-null    float64
 2   petal_length  150 non-null    float64
 3   petal_width   150 non-null    float64
 4   species       150 non-null    object 
dtypes: float64(4), object(1)
memory usage: 6.0+ KB


In [4]:
X = df.drop('species', axis=1)
y = df['species']

In [7]:
%%time 

model = RandomForestClassifier()

param_grid = {
    'n_estimators': [50, 100, 200, 300, 400, 1000],
    'max_depth': [4,5,6,7,8,9,10],
    'criterion': ['gini', 'entropy'],
    'bootstrap': [True, False]
}

grid = GridSearchCV(
    estimator=model,
    param_grid=param_grid,
    cv=5,
    scoring='accuracy',
    verbose=True,
    n_jobs=-1
    )

grid.fit(X, y)

print(grid.best_params_)
print(grid.best_score_)
print(grid.best_estimator_)

Fitting 5 folds for each of 168 candidates, totalling 840 fits
{'bootstrap': True, 'criterion': 'gini', 'max_depth': 4, 'n_estimators': 50}
0.9666666666666668
RandomForestClassifier(max_depth=4, n_estimators=50)
CPU times: total: 1.38 s
Wall time: 27.2 s


In [8]:
# print accuracy score
print(grid.score(X, y))


0.9733333333333334


In [15]:
%%time 

model = RandomForestClassifier()

param_grid = {
    'n_estimators': [50, 100, 200, 300, 400, 1000],
    'max_depth': [4,5,6,7,8,9,10],
    'criterion': ['gini', 'entropy'],
    'bootstrap': [True, False]
}

grid = RandomizedSearchCV(
    estimator=model,
    param_distributions=param_grid,
    cv=5,
    scoring='accuracy',
    verbose=True,
    n_jobs=-1
    )

grid.fit(X, y)

print(grid.best_params_)
print(grid.best_score_)
print(grid.score(X, y))

Fitting 5 folds for each of 10 candidates, totalling 50 fits
{'n_estimators': 400, 'max_depth': 4, 'criterion': 'gini', 'bootstrap': False}
0.9666666666666668
0.9933333333333333
CPU times: total: 46.9 ms
Wall time: 1.37 s


In [17]:
from sklearn.datasets import load_iris
iris = load_iris()
iris.data

array([[5.1, 3.5, 1.4, 0.2],
       [4.9, 3. , 1.4, 0.2],
       [4.7, 3.2, 1.3, 0.2],
       [4.6, 3.1, 1.5, 0.2],
       [5. , 3.6, 1.4, 0.2],
       [5.4, 3.9, 1.7, 0.4],
       [4.6, 3.4, 1.4, 0.3],
       [5. , 3.4, 1.5, 0.2],
       [4.4, 2.9, 1.4, 0.2],
       [4.9, 3.1, 1.5, 0.1],
       [5.4, 3.7, 1.5, 0.2],
       [4.8, 3.4, 1.6, 0.2],
       [4.8, 3. , 1.4, 0.1],
       [4.3, 3. , 1.1, 0.1],
       [5.8, 4. , 1.2, 0.2],
       [5.7, 4.4, 1.5, 0.4],
       [5.4, 3.9, 1.3, 0.4],
       [5.1, 3.5, 1.4, 0.3],
       [5.7, 3.8, 1.7, 0.3],
       [5.1, 3.8, 1.5, 0.3],
       [5.4, 3.4, 1.7, 0.2],
       [5.1, 3.7, 1.5, 0.4],
       [4.6, 3.6, 1. , 0.2],
       [5.1, 3.3, 1.7, 0.5],
       [4.8, 3.4, 1.9, 0.2],
       [5. , 3. , 1.6, 0.2],
       [5. , 3.4, 1.6, 0.4],
       [5.2, 3.5, 1.5, 0.2],
       [5.2, 3.4, 1.4, 0.2],
       [4.7, 3.2, 1.6, 0.2],
       [4.8, 3.1, 1.6, 0.2],
       [5.4, 3.4, 1.5, 0.4],
       [5.2, 4.1, 1.5, 0.1],
       [5.5, 4.2, 1.4, 0.2],
       [4.9, 3

In [None]:
# import 