In [26]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [27]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.pipeline import Pipeline
from sklearn.externals import joblib
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier

In [28]:
iris_df = load_iris()

In [29]:
iris_df.data

array([[5.1, 3.5, 1.4, 0.2],
       [4.9, 3. , 1.4, 0.2],
       [4.7, 3.2, 1.3, 0.2],
       [4.6, 3.1, 1.5, 0.2],
       [5. , 3.6, 1.4, 0.2],
       [5.4, 3.9, 1.7, 0.4],
       [4.6, 3.4, 1.4, 0.3],
       [5. , 3.4, 1.5, 0.2],
       [4.4, 2.9, 1.4, 0.2],
       [4.9, 3.1, 1.5, 0.1],
       [5.4, 3.7, 1.5, 0.2],
       [4.8, 3.4, 1.6, 0.2],
       [4.8, 3. , 1.4, 0.1],
       [4.3, 3. , 1.1, 0.1],
       [5.8, 4. , 1.2, 0.2],
       [5.7, 4.4, 1.5, 0.4],
       [5.4, 3.9, 1.3, 0.4],
       [5.1, 3.5, 1.4, 0.3],
       [5.7, 3.8, 1.7, 0.3],
       [5.1, 3.8, 1.5, 0.3],
       [5.4, 3.4, 1.7, 0.2],
       [5.1, 3.7, 1.5, 0.4],
       [4.6, 3.6, 1. , 0.2],
       [5.1, 3.3, 1.7, 0.5],
       [4.8, 3.4, 1.9, 0.2],
       [5. , 3. , 1.6, 0.2],
       [5. , 3.4, 1.6, 0.4],
       [5.2, 3.5, 1.5, 0.2],
       [5.2, 3.4, 1.4, 0.2],
       [4.7, 3.2, 1.6, 0.2],
       [4.8, 3.1, 1.6, 0.2],
       [5.4, 3.4, 1.5, 0.4],
       [5.2, 4.1, 1.5, 0.1],
       [5.5, 4.2, 1.4, 0.2],
       [4.9, 3

In [30]:
X_train, X_test, y_train, y_test = train_test_split(iris_df.data, iris_df.target, test_size=0.3, random_state=1)

In [41]:
## Pipelines creatiom:
## 1. Data Preprocessing using StandardScaler
## 2. Reduce dimension using PCA
## 3. Apply Classifier

In [42]:
pipeline_lr = Pipeline([('scaler1', StandardScaler()),
                       ('PCA1', PCA(n_components=2)),
                       ('lr_classifier', LogisticRegression())])

In [43]:
pipeline_dt = Pipeline([('scaler2', StandardScaler()),
                       ('PCA2', PCA(n_components=2)),
                       ('dt_classifier', DecisionTreeClassifier())])

In [44]:
pipeline_rf = Pipeline([('scalar3', StandardScaler()),
                       ('PCA3', PCA(n_components = 2)),
                       ('rf_classifier', RandomForestClassifier())])

In [45]:
#lets make the list of Pipelines
pipelines = [pipeline_lr, pipeline_dt, pipeline_rf]

In [46]:
best_accuracy = 0
best_classifier = 0
best_pipeline = ""

In [47]:
#Dictionary of pipelines and reference types for ease of reference
pipe_dict = {0:'Logistic Regression', 1:'Decision Tree', 2:'Random Forest'}

#fit the pipelines
for pipe in pipelines:
    pipe.fit(X_train, y_train)



In [48]:
for i,model in enumerate(pipelines):
    print('{} Test accuracy : {}'.format(pipe_dict[i], model.score(X_test, y_test)))

Logistic Regression Test accuracy : 0.8
Decision Tree Test accuracy : 0.8888888888888888
Random Forest Test accuracy : 0.8888888888888888


In [50]:
for i,model in enumerate(pipelines):
    if(model.score(X_test, y_test)>best_accuracy):
        best_accuracy = model.score(X_test, y_test)
        best_classifier = i
        best_pipeline = model
print('the best classifier is: {}'.format(pipe_dict[best_classifier]))

the best classifier is: Decision Tree
