In [6]:
from sklearn.linear_model import LogisticRegression
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

X, y = load_iris(return_X_y = True)
train_X, test_X, train_y, test_y = train_test_split(X, y)

In [7]:
reg = LogisticRegression()
reg.fit(train_X, train_y)
scores = reg.predict(test_X)

In [8]:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler

est_list = [('scaler', StandardScaler()),
           ('logistic', LogisticRegression())]
pipe = Pipeline(est_list)
pipe.fit(train_X, train_y)
scores = pipe.predict(test_X)

In [9]:
from sklearn.ensemble import GradientBoostingClassifier

def score_iris(est):
    #Data Preperation
    X, y = load_iris(return_X_y = True)
    train_X, test_X, train_y, test_y = train_test_split(X, y)
    #
    est_list = [('scaler', StandardScaler()),
           ('your_estimator', est)]
    pipe = Pipeline(est_list)
    pipe.fit(train_X, train_y)
    scores = pipe.predict(test_X)
    return pipe, scores

In [12]:
gbt = GradientBoostingClassifier(n_estimators=50)
pipe, scores = score_iris(gbt)

In [13]:
pipe.steps

[('scaler', StandardScaler()),
 ('your_estimator', GradientBoostingClassifier(n_estimators=50))]

# Pipeline in a Pipeline

In [20]:
from sklearn.impute import SimpleImputer
import numpy as np

pipe_est = Pipeline([( 'imputer', SimpleImputer(missing_values=np.nan, strategy='mean') ), ('gbt', GradientBoostingClassifier())])
pipe, scores = score_iris(pipe_est)

In [21]:
pipe.steps

[('scaler', StandardScaler()),
 ('your_estimator',
  Pipeline(steps=[('imputer', SimpleImputer()),
                  ('gbt', GradientBoostingClassifier())]))]

In [27]:
!pip install civisml-extensions

Collecting civisml-extensions
  Downloading civisml_extensions-0.3.1-py3-none-any.whl (39 kB)
Installing collected packages: civisml-extensions
Successfully installed civisml-extensions-0.3.1


In [29]:
from civismlext.stacking import StackedClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.datasets import load_iris

In [32]:
iris_X, iris_y = load_iris(return_X_y=True)
est_list = [('logistic', LogisticRegression(max_iter=1000)),
           ('rf', RandomForestClassifier()),
           ('gbt', GradientBoostingClassifier()),
           ('meta', LogisticRegression(max_iter=1000))] #위의 세 결과로 나온 predict값을 다시 feature로 사용한다

stacker = StackedClassifier(est_list)

stacker.fit(iris_X, iris_y)
scores = stacker.predict(iris_X)

In [35]:
(iris_y == scores).mean()

0.9933333333333333

In [36]:
pipe = Pipeline([('scaler', StandardScaler()),
                ('imputer', SimpleImputer(missing_values=np.nan, strategy='mean')),
                ('stacker', stacker)])