# Pipeline

In [1]:
import numpy as np
from sklearn import datasets
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline, make_pipeline
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler, MinMaxScaler

In [2]:
iris = datasets.load_iris()
X = iris.data
y = iris.target

In [4]:
scaler = MinMaxScaler((0,1))
scaler.fit(X_train)
X_train_transfoemd = scaler.transform(X_train)

lr = LogisticRegression()
lr.fit(X_train_transformed, y)

#lr.fit(X_train_transformed)

#X_test_tr = scaler.transform(X_test)
#lr.predict(X_test_tr, y_test)


NameError: name 'X_train' is not defined

## 1. `sklearn.pipeline.Pipeline` 이용

In [5]:
norm_pipe = Pipeline([
    ('Normalization', MinMaxScaler(feature_range=(0, 1))), 
    ('LR', LogisticRegression(random_state=1234))
])  

In [7]:
print(norm_pipe)

Pipeline(memory=None,
     steps=[('Normalization', MinMaxScaler(copy=True, feature_range=(0, 1))), ('LR', LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=1234, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False))])


In [8]:
stand_pipe = Pipeline([
    ('Standardization', StandardScaler()),
    ('LR', LogisticRegression(random_state=1234))
])

In [9]:
print(stand_pipe)

Pipeline(memory=None,
     steps=[('Standardization', StandardScaler(copy=True, with_mean=True, with_std=True)), ('LR', LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=1234, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False))])


In [10]:
param_grid = {
    'Normalization__feature_range': [(0, 1), (-0.5, 0.5), (-1, 1)],
    'LR__C': [0.1, 0.5, 1.0, 5.0],
    'LR__penalty': ['l1', 'l2']
}

In [None]:
norm_grid = GridSearchCV(norm_pipe, param_grid, cv=5, verbose=1)
norm_grid.fit(X, y)

In [None]:
norm_grid.grid_scores_

In [None]:
print(norm_grid.best_params_)
print(norm_grid.best_score_)

In [None]:
param_grid = {
    'LR__C': [0.1, 0.5, 1.0, 5.0],
    'LR__penalty': ['l1', 'l2']
}

In [None]:
stand_grid = GridSearchCV(stand_pipe, param_grid, cv=5, verbose=1)
stand_grid.fit(X, y)

In [None]:
stand_grid.grid_scores_

In [None]:
print(stand_grid.best_params_)
print(stand_grid.best_score_)

## 2. `sklearn.pipeline.make_pipeline`

In [None]:
norm_pipe = make_pipeline(
    MinMaxScaler(feature_range=(0,1)), 
    LogisticRegression(random_state=1234)
)  

In [None]:
print(norm_pipe)

In [None]:
stand_pipe = make_pipeline(
    StandardScaler(), 
    LogisticRegression(random_state=1234)
)

In [None]:
# 모듈에 이름을 붙이면 error 발생
stand_pipe = make_pipeline(
    ('aa', StandardScaler()), 
    ('bb', LogisticRegression(random_state=1234))
)

In [None]:
print(stand_pipe)

In [None]:
param_grid = {
    'logisticregression__C': [0.1, 0.5, 1.0, 5.0],
    'logisticregression__penalty': ['l1', 'l2']
}

In [None]:
norm_grid = GridSearchCV(norm_pipe, param_grid, cv=5, verbose=1)
norm_grid.fit(X, y)

In [None]:
stand_grid = GridSearchCV(stand_pipe, param_grid, cv=4, verbose=1)
stand_grid.fit(X, y)