In [1]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier

In [4]:
iris = load_iris()

In [8]:
print(iris.DESCR)

.. _iris_dataset:

Iris plants dataset
--------------------

**Data Set Characteristics:**

    :Number of Instances: 150 (50 in each of three classes)
    :Number of Attributes: 4 numeric, predictive attributes and the class
    :Attribute Information:
        - sepal length in cm
        - sepal width in cm
        - petal length in cm
        - petal width in cm
        - class:
                - Iris-Setosa
                - Iris-Versicolour
                - Iris-Virginica
                
    :Summary Statistics:

                    Min  Max   Mean    SD   Class Correlation
    sepal length:   4.3  7.9   5.84   0.83    0.7826
    sepal width:    2.0  4.4   3.05   0.43   -0.4194
    petal length:   1.0  6.9   3.76   1.76    0.9490  (high!)
    petal width:    0.1  2.5   1.20   0.76    0.9565  (high!)

    :Missing Attribute Values: None
    :Class Distribution: 33.3% for each of 3 classes.
    :Creator: R.A. Fisher
    :Donor: Michael Marshall (MARSHALL%PLU@io.arc.nasa.gov)
    :

In [31]:
X = iris.data
y = iris.target

In [32]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33,random_state=0)

In [33]:
pipeline_lr = Pipeline([
    ('scalar1',StandardScaler()),
    ('pca1',PCA(n_components=2)),
    ('lr_classifier',LogisticRegression(random_state=0))
])

In [34]:
pipeline_dt = Pipeline([
    ('scalar2',StandardScaler()),
    ('pca2',PCA(n_components=2)),
    ('dt_classifier',DecisionTreeClassifier())
])

In [35]:
pipeline_rfc = Pipeline([
    ('scalar3',StandardScaler()),
    ('pca3',PCA(n_components=2)),
    ('lr_classifier',RandomForestClassifier())
])

In [36]:
pipelines = [pipeline_lr,pipeline_dt,pipeline_rfc]

In [37]:
best_accuracy = 0
best_classifier = 0
best_pipeline = 0

In [38]:
pipe_dict = {0:'LR',1:'DT',2:'RF'}

In [39]:
for pipe in pipelines:
    pipe.fit(X_train,y_train)

In [40]:
for i,model in enumerate(pipelines):
    print(f'{pipe_dict[i]} Test Accuracy: {model.score(X_test,y_test)}')

LR Test Accuracy: 0.92
DT Test Accuracy: 0.9
RF Test Accuracy: 0.88


In [41]:
for i,model in enumerate(pipelines):
    if(model.score(X_test,y_test) > best_accuracy):
        best_accuracy = model.score(X_test,y_test)
        best_pipeline = pipe_dict[i]

In [42]:
print(best_pipeline,best_accuracy)

LR 0.92
