In [10]:
from sklearn import decomposition, datasets
from sklearn import tree
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler


In [11]:
dataset = datasets.load_wine()
X = dataset.data
y = dataset.target

# Using StandardScaler and PCA

In [12]:
std_slc = StandardScaler()

# Principal Component Analysis(PCA) 
# - to reduce the size of features by creating new features which have most of the varience of the original data.

In [13]:
pca = decomposition.PCA()

# import decision tree classifier

In [14]:
dec_tree = tree.DecisionTreeClassifier()


# ->Using Pipeline for GridSearchCV

# ->Creating a pipeline of three steps. First, standardizing the data.
# ->Second, tranforming the data with PCA.
# ->Third, training a Decision Tree Classifier on the data.

# helps to pass modules one by one through GridSearchCV  to get the best parameters.

In [15]:
pipe = Pipeline(steps=[('std_slc', std_slc),('pca', pca),('dec_tree', dec_tree)]) # standardsclare ,principal component,decision tree

# pca -requires a parameter -n_components ,standardsclare doesnt require any argument

In [16]:
n_components = list(range(1,X.shape[1]+1,1))

# decision tree parameters

In [17]:
criterion = ['gini', 'entropy']
max_depth = [2,4,6,8,10,12]


# Creating a pipeline of three steps. First, standardizing the data.
# Second, tranforming the data with PCA.
# Third, training a Decision Tree Classifier on the data.
# creating a dictionary to set all the parameters

In [18]:
parameters = dict(pca__n_components=n_components, dec_tree__criterion=criterion,dec_tree__max_depth=max_depth)


# GridSearchCV object

In [19]:
clf_GS = GridSearchCV(pipe, parameters)
clf_GS.fit(X, y)

GridSearchCV(estimator=Pipeline(steps=[('std_slc', StandardScaler()),
                                       ('pca', PCA()),
                                       ('dec_tree', DecisionTreeClassifier())]),
             param_grid={'dec_tree__criterion': ['gini', 'entropy'],
                         'dec_tree__max_depth': [2, 4, 6, 8, 10, 12],
                         'pca__n_components': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
                                               11, 12, 13]})

# getting the best models

In [20]:
print('Best Criterion:', clf_GS.best_estimator_.get_params()['dec_tree__criterion'])
print('Best max_depth:', clf_GS.best_estimator_.get_params()['dec_tree__max_depth'])
print('Best Number Of Components:', clf_GS.best_estimator_.get_params()['pca__n_components'])
print(); print(clf_GS.best_estimator_.get_params()['dec_tree'])


Best Criterion: entropy
Best max_depth: 12
Best Number Of Components: 3

DecisionTreeClassifier(criterion='entropy', max_depth=12)
