# Pipeline

In [2]:
import pandas as pd
from sklearn.pipeline import Pipeline
from sklearn.decomposition import PCA
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split

In [3]:
df = pd.read_csv("iris.csv")

In [4]:
df.head()

Unnamed: 0,Sepal_Length,Sepal_Width,Petal_Length,Petal_Width,Class
0,5.1,3.5,1.4,0.2,Setosa
1,4.9,3.0,1.4,0.2,Setosa
2,4.7,3.2,1.3,0.2,Setosa
3,4.6,3.1,1.5,0.2,Setosa
4,5.0,3.6,1.4,0.2,Setosa


In [5]:
df.tail()

Unnamed: 0,Sepal_Length,Sepal_Width,Petal_Length,Petal_Width,Class
145,6.7,3.0,5.2,2.3,Virginica
146,6.3,2.5,5.0,1.9,Virginica
147,6.5,3.0,5.2,2.0,Virginica
148,6.2,3.4,5.4,2.3,Virginica
149,5.9,3.0,5.1,1.8,Virginica


In [8]:
X = df.iloc[:, 0:4]
y = df.iloc[:, -1]

In [9]:
X.head()

Unnamed: 0,Sepal_Length,Sepal_Width,Petal_Length,Petal_Width
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2


In [10]:
y.head()

0    Setosa
1    Setosa
2    Setosa
3    Setosa
4    Setosa
Name: Class, dtype: object

In [11]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size= 0.3, random_state=50)

In [12]:
# Logistic Regression
pipeline_log = Pipeline([
    ('Scaler', StandardScaler()),
    ('Logistic Regression', LogisticRegression())
])

In [13]:
print(pipeline_log)

Pipeline(steps=[('Scaler', StandardScaler()),
                ('Logistic Regression', LogisticRegression())])


In [14]:
pipeline_log.fit(X_train, y_train)

Pipeline(steps=[('Scaler', StandardScaler()),
                ('Logistic Regression', LogisticRegression())])

In [15]:
pipeline_log.score(X_test, y_test)

0.9777777777777777

In [19]:
# Decision Tree
pipeline_dt = Pipeline([
    ('Scaler', StandardScaler()),
    ('Decision Tree', DecisionTreeClassifier())
])

In [20]:
print(pipeline_dt)

Pipeline(steps=[('Scaler', StandardScaler()),
                ('Decision Tree', DecisionTreeClassifier())])


In [21]:
pipeline_dt.fit(X_train, y_train)

Pipeline(steps=[('Scaler', StandardScaler()),
                ('Decision Tree', DecisionTreeClassifier())])

In [22]:
pipeline_dt.score(X_test, y_test)

0.9555555555555556

In [23]:
# Random Forest
pipeline_rf = Pipeline([
    ('Scaler', StandardScaler()),
    ('Random Forest', RandomForestClassifier())
])

In [24]:
print(pipeline_rf)

Pipeline(steps=[('Scaler', StandardScaler()),
                ('Random Forest', RandomForestClassifier())])


In [25]:
pipeline_rf.fit(X_train, y_train)

Pipeline(steps=[('Scaler', StandardScaler()),
                ('Random Forest', RandomForestClassifier())])

In [26]:
pipeline_rf.score(X_test, y_test)

0.9555555555555556

In [27]:
# Logistic Regression
pipeline_log = Pipeline([
    ('Scaler', StandardScaler()),
    ('PCA', PCA(n_components=2)),
    ('Logistic Regression', LogisticRegression())
])

In [28]:
print(pipeline_log)

Pipeline(steps=[('Scaler', StandardScaler()), ('PCA', PCA(n_components=2)),
                ('Logistic Regression', LogisticRegression())])


In [29]:
pipeline_log.fit(X_train, y_train)

Pipeline(steps=[('Scaler', StandardScaler()), ('PCA', PCA(n_components=2)),
                ('Logistic Regression', LogisticRegression())])

In [30]:
pipeline_log.score(X_test, y_test)

0.8888888888888888