In [1]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [2]:
# Creating the pipeline with DEFAULT Parameters

pipeline_example = make_pipeline(StandardScaler(), DecisionTreeClassifier(random_state=0))

In [3]:
# Loading Dataset

X, y = load_iris(return_X_y=True)

In [4]:
# Spliting the values

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

In [5]:
## Here trying the pipeline defined above...

pipeline_example.fit(X_train, y_train)

Pipeline(steps=[('standardscaler', StandardScaler()),
                ('decisiontreeclassifier',
                 DecisionTreeClassifier(random_state=0))])

In [6]:
y_pred = pipeline_example.predict(X_test)
acc = accuracy_score(y_test, y_pred)

In [7]:
# Accuracy Score:
acc

0.9736842105263158

## Parameters

In [8]:
## Cross Validation

from sklearn.model_selection import GridSearchCV

In [9]:
param_grid = dict(decisiontreeclassifier__min_samples_split=[2, 5, 10])
grid_search = GridSearchCV(pipeline_example, param_grid=param_grid)

In [10]:
grid_search.fit(X_train, y_train)

GridSearchCV(estimator=Pipeline(steps=[('standardscaler', StandardScaler()),
                                       ('decisiontreeclassifier',
                                        DecisionTreeClassifier(random_state=0))]),
             param_grid={'decisiontreeclassifier__min_samples_split': [2, 5,
                                                                       10]})

In [11]:
# Choose the best option among [2, 5, 10]

grid_search.best_params_

{'decisiontreeclassifier__min_samples_split': 5}

In [12]:
# Lets make another prediction over 5

y_grid_pred = grid_search.predict(X_test)

In [13]:
accuracy_score(y_test, y_grid_pred)

0.9736842105263158