In [1]:
import pandas as pd
from sklearn.datasets import load_iris
iris = load_iris()
table = pd.DataFrame(iris.data,columns=iris.feature_names)
table.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2


In [2]:
table['target'] = iris.target
table.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),target
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0


In [3]:
x = table.drop(['target'], axis='columns')
y = iris.target  

In [4]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size = 0.20, random_state = 0)

In [5]:
from sklearn.tree import DecisionTreeClassifier

In [6]:
dt1 = DecisionTreeClassifier(criterion= 'gini', splitter= 'best', max_depth=2)
dt2 = DecisionTreeClassifier(criterion= 'entropy', splitter= 'best', max_depth=5)
dt3 = DecisionTreeClassifier(criterion= 'gini', splitter= 'random', max_depth= 4)
dt4 = DecisionTreeClassifier(criterion= 'entropy', splitter= 'random', max_depth= 6)

In [7]:
dt1.fit(X_train, y_train)
dt2.fit(X_train, y_train)
dt3.fit(X_train, y_train)
dt4.fit(X_train, y_train)

DecisionTreeClassifier(criterion='entropy', max_depth=6, splitter='random')

In [8]:
print('dt1 -> ',dt1.score(X_test, y_test))
print('dt2 -> ',dt2.score(X_test, y_test))
print('dt3 -> ',dt3.score(X_test, y_test))
print('dt4 -> ',dt4.score(X_test, y_test))

dt1 ->  0.9666666666666667
dt2 ->  1.0
dt3 ->  0.9
dt4 ->  0.9666666666666667


In [9]:
dt = DecisionTreeClassifier()

In [10]:
from sklearn.model_selection import GridSearchCV

In [11]:
options = {'criterion': ('gini','entropy'),'splitter': ('best','random'),'max_depth' : list(range(1,7)) }

In [12]:
gs = GridSearchCV(dt,options)
gs.fit(X_train, y_train)

GridSearchCV(estimator=DecisionTreeClassifier(),
             param_grid={'criterion': ('gini', 'entropy'),
                         'max_depth': [1, 2, 3, 4, 5, 6],
                         'splitter': ('best', 'random')})

In [13]:
gs.cv_results_['params']

[{'criterion': 'gini', 'max_depth': 1, 'splitter': 'best'},
 {'criterion': 'gini', 'max_depth': 1, 'splitter': 'random'},
 {'criterion': 'gini', 'max_depth': 2, 'splitter': 'best'},
 {'criterion': 'gini', 'max_depth': 2, 'splitter': 'random'},
 {'criterion': 'gini', 'max_depth': 3, 'splitter': 'best'},
 {'criterion': 'gini', 'max_depth': 3, 'splitter': 'random'},
 {'criterion': 'gini', 'max_depth': 4, 'splitter': 'best'},
 {'criterion': 'gini', 'max_depth': 4, 'splitter': 'random'},
 {'criterion': 'gini', 'max_depth': 5, 'splitter': 'best'},
 {'criterion': 'gini', 'max_depth': 5, 'splitter': 'random'},
 {'criterion': 'gini', 'max_depth': 6, 'splitter': 'best'},
 {'criterion': 'gini', 'max_depth': 6, 'splitter': 'random'},
 {'criterion': 'entropy', 'max_depth': 1, 'splitter': 'best'},
 {'criterion': 'entropy', 'max_depth': 1, 'splitter': 'random'},
 {'criterion': 'entropy', 'max_depth': 2, 'splitter': 'best'},
 {'criterion': 'entropy', 'max_depth': 2, 'splitter': 'random'},
 {'criterion

In [14]:
gs.best_params_

{'criterion': 'entropy', 'max_depth': 3, 'splitter': 'random'}

In [15]:
scores=gs.cv_results_['mean_test_score']
scores

array([0.69166667, 0.68333333, 0.94166667, 0.84166667, 0.93333333,
       0.89166667, 0.93333333, 0.95      , 0.93333333, 0.94166667,
       0.93333333, 0.925     , 0.69166667, 0.575     , 0.93333333,
       0.86666667, 0.93333333, 0.95833333, 0.925     , 0.93333333,
       0.93333333, 0.925     , 0.925     , 0.91666667])

In [16]:
import numpy as np
np.argmax(scores)

17

In [17]:
scores[17]

0.9416666666666668

In [38]:
from sklearn.model_selection import RandomizedSearchCV

In [39]:
dt = DecisionTreeClassifier()

In [40]:
options = {'criterion': ('gini','entropy'),'splitter': ('best','random'),'max_depth' : list(range(1,7)) }

In [41]:
samples = 5  
rs = RandomizedSearchCV(dt, param_distributions=options, n_iter=samples)

In [42]:
rs.fit(X_train, y_train)

RandomizedSearchCV(estimator=DecisionTreeClassifier(), n_iter=5,
                   param_distributions={'criterion': ('gini', 'entropy'),
                                        'max_depth': [1, 2, 3, 4, 5, 6],
                                        'splitter': ('best', 'random')})

In [43]:
rs.cv_results_['params']

[{'splitter': 'best', 'max_depth': 4, 'criterion': 'entropy'},
 {'splitter': 'best', 'max_depth': 5, 'criterion': 'entropy'},
 {'splitter': 'best', 'max_depth': 6, 'criterion': 'entropy'},
 {'splitter': 'random', 'max_depth': 6, 'criterion': 'entropy'},
 {'splitter': 'best', 'max_depth': 3, 'criterion': 'entropy'}]

In [44]:
rs.best_params_

{'splitter': 'best', 'max_depth': 6, 'criterion': 'entropy'}

In [45]:
scores = rs.cv_results_['mean_test_score']
scores

array([0.91666667, 0.925     , 0.94166667, 0.93333333, 0.93333333])

In [46]:
np.argmax(scores)

2

In [47]:
scores[2]

0.9333333333333333