## Some Useful Libraries

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.pipeline import Pipeline

from sklearn.decomposition import PCA

from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier

## Load Dataset

In [2]:
df = sns.load_dataset('iris')
df.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


In [3]:
# Here i convert  the target feature into Numerical feature using LabelEncoder
le = LabelEncoder()
df['species'] = le.fit_transform(df['species'])

In [4]:
X_train,X_test,y_train,y_test = train_test_split(df.drop('species', axis=1), df['species'],test_size=0.2,
                                                random_state=42)

## Pipeline

In [5]:
## Pipeline Creation
## 1.Data preprocessing by using Standard scalar and Label encoder
## 2.Reduce Dimension using PCA
## 3.Apply classifier

In [6]:
pipeline_lr = Pipeline([
    ('scaler1', StandardScaler()),
    ('pca1', PCA(n_components=3)),
    ('lr_classifier',LogisticRegression(random_state=0))
])

In [7]:
pipeline_dt = Pipeline([
    ('scaler2', StandardScaler()),
    ('pca2', PCA(n_components=3)),
    ('lr_classifier',DecisionTreeClassifier())
])

In [8]:
pipeline_rf = Pipeline([
    ('scaler3', StandardScaler()),
    ('pca3', PCA(n_components=3)),
    ('lr_classifier',RandomForestClassifier())
])

In [9]:
# Let's make the list of pipelines
pipelines = [pipeline_lr,pipeline_dt,pipeline_rf]

In [10]:
best_accuracy = 0.0
best_classifier = 0
best_pipeline = ""

In [11]:
# dictionary of pipelines and classifier types for ease of reference
pipe_dict = {0:'Logistic Regression',1:'Decision Tree',2:'Random Forest'}

In [12]:
# fit the pipelines
for pipe in pipelines:
    pipe.fit(X_train,y_train)

In [13]:
# model accuracy
for i,model in enumerate(pipelines):
    print('{} Test accuracy: {}'.format(pipe_dict[i],np.round(model.score(X_test,y_test)*100),1))

Logistic Regression Test accuracy: 100.0
Decision Tree Test accuracy: 97.0
Random Forest Test accuracy: 100.0
