# Pipeline

- Memudahkan proses scale/transform, dilanjutkan ke pembuatan model

In [1]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris

In [2]:
df = pd.DataFrame(
    load_iris()['data'],
    columns = ['SL', 'SW', 'PL', 'PW']
)
df['target'] = load_iris()['target']
df.head(3)

Unnamed: 0,SL,SW,PL,PW,target
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0


<hr>

### 1. Without pipeline

- features di-```StandardScaler``` lalu dibuat train model ```LogisticRegression```

In [6]:
# 1. standard scaler
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()

In [16]:
scaler.fit(df[['SL', 'SW', 'PL', 'PW']])
xSS = scaler.transform(df[['SL', 'SW', 'PL', 'PW']])
# xSS

In [11]:
# 2. model logistic regression ditrain dg X terstandardisasi
from sklearn.linear_model import LogisticRegression
model = LogisticRegression()

In [12]:
model.fit(xSS, df['target'])

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=None, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False)

In [19]:
# prediksi (data harus distandardisasi juga)
model.predict(scaler.transform([[1, 1, 1, 1]]))

array([0])

<hr>

### 2. Using Pipeline

- features di-```StandardScaler``` lalu dibuat train model ```LogisticRegression```

In [21]:
from sklearn.pipeline import make_pipeline

In [22]:
myPipe = make_pipeline(
    StandardScaler(),
    LogisticRegression()
)

In [23]:
# create pipe
myPipe.fit(df[['SL', 'SW', 'PL', 'PW']], df['target'])

Pipeline(memory=None,
         steps=[('standardscaler',
                 StandardScaler(copy=True, with_mean=True, with_std=True)),
                ('logisticregression',
                 LogisticRegression(C=1.0, class_weight=None, dual=False,
                                    fit_intercept=True, intercept_scaling=1,
                                    l1_ratio=None, max_iter=100,
                                    multi_class='auto', n_jobs=None,
                                    penalty='l2', random_state=None,
                                    solver='lbfgs', tol=0.0001, verbose=0,
                                    warm_start=False))],
         verbose=False)

In [24]:
# prediksi, features value tanpa harus distandardisasi
myPipe.predict(df[['SL', 'SW', 'PL', 'PW']])

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])

In [25]:
myPipe.predict([[1, 1, 1, 1]])

array([0])