<a href="https://colab.research.google.com/github/possomartin/Notebooks/blob/main/Demo_Pipeline_GridSearchCV.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Libraries import

In [None]:
# Standard Libraries
import random as rn
import pandas as pd
import numpy as np

# Datasets
from sklearn import datasets

# Split test Train Data
from sklearn.model_selection import train_test_split

# Create Pipeline
from sklearn.pipeline import Pipeline

# Grid Search to find best HyperParameter
from sklearn.model_selection import GridSearchCV

# StandarScaler
from sklearn.preprocessing import StandardScaler

# Models to be used
  # Classification
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier

# Read Dataset

In [None]:
wine = datasets.load_wine()

X = wine.data
y = wine.target

# Prepare Data for training

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42) # Split training data and test data

# Pipe Line

In [None]:
seed = 142

clf = Pipeline([('scl', StandardScaler()), ('classifier', DecisionTreeClassifier(random_state=seed))]) # Pipeline to Scale Data and then Fit Model

classifier_param_grid = [
    {
      "classifier":[DecisionTreeClassifier(random_state=seed)], # Model
      "classifier__criterion":["gini","entropy"], # Parameters from Model
      "classifier__max_depth":np.arange(10, 21, 2),
     },
     {
      "classifier":[RandomForestClassifier(random_state=seed)],
      "classifier__criterion":["gini","entropy"],
      "classifier__n_estimators":np.arange(50, 1200, 500),
     }
]

# Grid Data Search CV

In [None]:
clf_grid_search = GridSearchCV(estimator=clf, param_grid=classifier_param_grid, scoring='accuracy', cv=3, return_train_score=True) # Grid Search CV to tune with best Hyperparameters

# Model Fitting

In [None]:
grids = [clf_grid_search] # you can create an array of grids and then loop each to train
grids[0].fit(X_train, y_train) # Model Fitting

GridSearchCV(cv=3,
             estimator=Pipeline(steps=[('scl', StandardScaler()),
                                       ('classifier',
                                        DecisionTreeClassifier(random_state=142))]),
             param_grid=[{'classifier': [DecisionTreeClassifier(random_state=142)],
                          'classifier__criterion': ['gini', 'entropy'],
                          'classifier__max_depth': array([10, 12, 14, 16, 18, 20])},
                         {'classifier': [RandomForestClassifier(n_estimators=550,
                                                                random_state=142)],
                          'classifier__criterion': ['gini', 'entropy'],
                          'classifier__n_estimators': array([  50,  550, 1050])}],
             return_train_score=True, scoring='accuracy')

In [None]:
results_ = pd.DataFrame(clf_grid_search.cv_results_)
results_

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_classifier,param_classifier__criterion,param_classifier__max_depth,param_classifier__n_estimators,params,split0_test_score,split1_test_score,split2_test_score,mean_test_score,std_test_score,rank_test_score,split0_train_score,split1_train_score,split2_train_score,mean_train_score,std_train_score
0,0.002534,0.001227,0.001476,0.001484,DecisionTreeClassifier(random_state=142),gini,10.0,,{'classifier': DecisionTreeClassifier(random_s...,0.925,0.875,0.948718,0.916239,0.030726,7,1.0,1.0,1.0,1.0,0.0
1,0.001637,0.000166,0.000374,7e-06,DecisionTreeClassifier(random_state=142),gini,12.0,,{'classifier': DecisionTreeClassifier(random_s...,0.925,0.875,0.948718,0.916239,0.030726,7,1.0,1.0,1.0,1.0,0.0
2,0.001459,6e-06,0.000416,7e-06,DecisionTreeClassifier(random_state=142),gini,14.0,,{'classifier': DecisionTreeClassifier(random_s...,0.925,0.875,0.948718,0.916239,0.030726,7,1.0,1.0,1.0,1.0,0.0
3,0.001474,3.6e-05,0.000524,0.000177,DecisionTreeClassifier(random_state=142),gini,16.0,,{'classifier': DecisionTreeClassifier(random_s...,0.925,0.875,0.948718,0.916239,0.030726,7,1.0,1.0,1.0,1.0,0.0
4,0.001489,6.5e-05,0.000384,1.7e-05,DecisionTreeClassifier(random_state=142),gini,18.0,,{'classifier': DecisionTreeClassifier(random_s...,0.925,0.875,0.948718,0.916239,0.030726,7,1.0,1.0,1.0,1.0,0.0
5,0.001478,7.3e-05,0.000369,1.2e-05,DecisionTreeClassifier(random_state=142),gini,20.0,,{'classifier': DecisionTreeClassifier(random_s...,0.925,0.875,0.948718,0.916239,0.030726,7,1.0,1.0,1.0,1.0,0.0
6,0.00163,2.3e-05,0.000387,1.1e-05,DecisionTreeClassifier(random_state=142),entropy,10.0,,{'classifier': DecisionTreeClassifier(random_s...,0.925,0.875,0.923077,0.907692,0.02313,13,1.0,1.0,1.0,1.0,0.0
7,0.00176,0.000118,0.000401,2.9e-05,DecisionTreeClassifier(random_state=142),entropy,12.0,,{'classifier': DecisionTreeClassifier(random_s...,0.925,0.875,0.923077,0.907692,0.02313,13,1.0,1.0,1.0,1.0,0.0
8,0.001681,4.5e-05,0.000389,1.1e-05,DecisionTreeClassifier(random_state=142),entropy,14.0,,{'classifier': DecisionTreeClassifier(random_s...,0.925,0.875,0.923077,0.907692,0.02313,13,1.0,1.0,1.0,1.0,0.0
9,0.001674,7e-06,0.00038,1.3e-05,DecisionTreeClassifier(random_state=142),entropy,16.0,,{'classifier': DecisionTreeClassifier(random_s...,0.925,0.875,0.923077,0.907692,0.02313,13,1.0,1.0,1.0,1.0,0.0


# Results

In [None]:
grid_dict = {0: 'Classifiers'}
print('{} Test Accuracy: {}'.format(grid_dict[0], clf_grid_search.score(X_test, y_test))) # show score results with test data
print('{} Best Params: {}'.format(grid_dict[0], clf_grid_search.best_params_)) #list best parameters based on the Grid Params we gave to looked for

Classifiers Test Accuracy: 1.0
Classifiers Best Params: {'classifier': RandomForestClassifier(n_estimators=550, random_state=142), 'classifier__criterion': 'gini', 'classifier__n_estimators': 550}


## Test

In [None]:
y_test

array([0, 0, 2, 0, 1, 0, 1, 2, 1, 2, 0, 2, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1,
       1, 2, 2, 2, 1, 1, 1, 0, 0, 1, 2, 0, 0, 0, 2, 2, 1, 2, 0, 1, 1, 1,
       2, 0, 1, 1, 2, 0, 1, 0, 0, 2, 2, 1, 1, 0, 1])

In [None]:
clf_grid_search.predict(X_test)

array([0, 0, 2, 0, 1, 0, 1, 2, 1, 2, 0, 2, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1,
       1, 2, 2, 2, 1, 1, 1, 0, 0, 1, 2, 0, 0, 0, 2, 2, 1, 2, 0, 1, 1, 1,
       2, 0, 1, 1, 2, 0, 1, 0, 0, 2, 2, 1, 1, 0, 1])

In [None]:
y_test == clf_grid_search.predict(X_test)

array([ True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True])