# Workflow for Visualizing Esiimators

We can build a simple pipeline for classification:

In [2]:
from sklearn.pipeline import Pipeline
from sklearn.decomposition import PCA
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV
from sklearn.svm import LinearSVC
from sklearn.ensemble import StackingClassifier
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import VotingClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.pipeline import FeatureUnion
from sklearn.decomposition import TruncatedSVD
from sklearn.cluster import Birch
from sklearn.cluster import AgglomerativeClustering

In [3]:
pca = PCA(n_components=10)
pca

In [4]:
pipe = Pipeline([
    ('scale', StandardScaler()),
    ('pca', PCA(n_components=10)),
    ('clf', LogisticRegression())
])

In [5]:
pipe

In [6]:
grid = GridSearchCV(pca, {'n_components': [1, 2]})
grid

In [7]:
grid

In [8]:
pca = PCA()
lr = LogisticRegression()
pipe1 = Pipeline([
    ('pca', PCA()),
    ('lr', LogisticRegression())
])

pipe1

To add a column transformer that handles categories and numerical features:

In [9]:
cat_pipe = Pipeline([
    ('impute', SimpleImputer(strategy='constant', fill_value='missing')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))
])

num_pipe = Pipeline([
    ('impute', SimpleImputer()),
    ('scale', StandardScaler())
])

preprcoessor = ColumnTransformer([
    ('cat', SimpleImputer(), ['gender', 'age_bracket']),
    ('num', num_pipe, ['age', 'income']),
])

preprcoessor

In [10]:
# With drop and None
preprcoessor2 = ColumnTransformer([
    ('cat', cat_pipe, ['gender', 'age_bracket']),
    ('num', num_pipe, ['age', 'income']),
    ('not_needed', 'drop', ['unknown']),
    ('not_needed_again', None, ['unknown2'])
])
preprcoessor2

Next, we can attached a classifier to this preprocessor:

In [11]:
pipe2 = Pipeline([
    ('preprocessor', preprcoessor),
    ('lr', lr)
])

pipe2

We can build a more complicated classifier at the end:

In [12]:
vote_clf = VotingClassifier([
    ('rf', RandomForestClassifier()),
    ('mlp', MLPClassifier()),
    ('lr', LogisticRegression())
])

pipe3  = Pipeline([
    ('preprocessor', preprcoessor),
    ('do_nothing', None),
    ('do_nothing_again', 'passthrough'),
    ('clf', vote_clf)
])

pipe3

Next, we can do some dimensional reduction with a PCA and a Truncated SVD:

In [13]:
dim_reduce = FeatureUnion([("pca", PCA(n_components=1)),
                           ("svd", TruncatedSVD(n_components=2))])

pipe4 = Pipeline([
    ('preprocessor', preprcoessor),
    ('dim_reduce', dim_reduce),
    ('clf', vote_clf)
])

pipe4

In [14]:
# Single PCA for dimensino reduction
pipe4.set_params(dim_reduce=PCA())

In [15]:
estimators = [('lr', LogisticRegression()), ('svc', LinearSVC())]
clf = StackingClassifier(
    estimators=estimators, final_estimator=None)
clf

In [16]:
pipe5 = Pipeline([
    ('preprocessor', preprcoessor),
    ('dim_reduce', dim_reduce),
    ('hello', PCA()),
    ('stacker', clf)
])
pipe5

In [17]:
# meta estimator
GridSearchCV(pipe5, {})

In [18]:
# weird pipeline
cat_pipe2 = Pipeline([
    ('impute', SimpleImputer(strategy='constant', fill_value='missing')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))
])

num_pipe = Pipeline([
    ('impute', SimpleImputer()),
    ('scale', StandardScaler()),
    ('dim_reduce', dim_reduce)
])

preprcoessor = ColumnTransformer([
    ('cat', cat_pipe2, ['gender', 'age_bracket']),
    ('num', num_pipe, ['age', 'income'])
])

pipe6 = Pipeline([
    ('prep', preprcoessor),
    ('clf', vote_clf)
])
pipe6