In [1]:
import pandas as pd

In [2]:
from sklearn.datasets import load_iris
iris = load_iris()

In [3]:
# split dataset into train and test sets
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target)

# Desicion tree

In [4]:
# initialize Desicion Tree Classifier model
from sklearn.tree import DecisionTreeClassifier
tree = DecisionTreeClassifier()

In [5]:
# fit the model using train data and labels
tree.fit(X_train, y_train)

DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None,
            max_features=None, max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, presort=False, random_state=None,
            splitter='best')

In [23]:
# show the feature importances
for feature_importance in zip(iris.feature_names, tree.feature_importances_):
    print(feature_importance)

('sepal length (cm)', 0.012701132769678542)
('sepal width (cm)', 0.0312824195993935)
('petal length (cm)', 0.05657203651108165)
('petal width (cm)', 0.8994444111198464)


In [6]:
# print the score of classifier
print('Decision Tree score: {:.2f}'.format(tree.score(X_test, y_test)))

Decision Tree score: 0.95


# K-Nearest Neighbor

In [7]:
# initialize KNN classifier model
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier()

In [8]:
# scale the data
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [9]:
# fit knn model with training data and labels
knn.fit(X_train_scaled, y_train)

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=None, n_neighbors=5, p=2,
           weights='uniform')

In [10]:
# print the score of the model on test data
print('KNN score: {:.2f}'.format(knn.score(X_test_scaled, y_test)))

KNN score: 0.95


# Grid Search CV

In [11]:
# perform GridSearchCV on Decision Tree classifier
# initialize GridSearchCV model
from sklearn.model_selection import GridSearchCV
tree_params = {'max_depth': range(1, 11),
               'max_features': range(1, 5),
               'min_samples_leaf': range(1, 11)}
tree_grid = GridSearchCV(DecisionTreeClassifier(), tree_params, cv=5)

In [12]:
# fit the model using training data and labels
tree_grid.fit(X_train, y_train)



GridSearchCV(cv=5, error_score='raise-deprecating',
       estimator=DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None,
            max_features=None, max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, presort=False, random_state=None,
            splitter='best'),
       fit_params=None, iid='warn', n_jobs=None,
       param_grid={'max_depth': range(1, 11), 'max_features': range(1, 5), 'min_samples_leaf': range(1, 11)},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring=None, verbose=0)

In [13]:
# show best parameters
tree_grid.best_params_

{'max_depth': 5, 'max_features': 3, 'min_samples_leaf': 4}

In [14]:
# print Decision Trees scores
print('Tree validation best score: {:.2f}'.format(tree_grid.best_score_))
print('Tree test score: {:.2f}'.format(tree_grid.score(X_test, y_test)))

Tree validation best score: 0.97
Tree test score: 0.95


In [15]:
# perform GridSearchCV on knn classifier
# initialize GridSearchCV model
knn_params = {'n_neighbors': range(1, 11)}
knn_grid = GridSearchCV(KNeighborsClassifier(), knn_params, cv=5)

In [16]:
# fit the model using scaled training data and labels
knn_grid.fit(X_train_scaled, y_train)



GridSearchCV(cv=5, error_score='raise-deprecating',
       estimator=KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=None, n_neighbors=5, p=2,
           weights='uniform'),
       fit_params=None, iid='warn', n_jobs=None,
       param_grid={'n_neighbors': range(1, 11)}, pre_dispatch='2*n_jobs',
       refit=True, return_train_score='warn', scoring=None, verbose=0)

In [17]:
# show best params
knn_grid.best_params_

{'n_neighbors': 5}

In [18]:
# print KNN scores
print('KNN best validation score: {:.2f}'.format(knn_grid.best_score_))
print('KNN test score: {:.2f}'.format(knn_grid.score(X_test_scaled, y_test)))

KNN best validation score: 0.96
KNN test score: 0.95
