# Workflow for Visualizing Esiimators

We can build a simple pipeline for classification:

In [1]:
from sklearn.pipeline import Pipeline
from sklearn.decomposition import PCA
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV

In [2]:
pca = PCA(n_components=10)
pca

_EstHTMLInfo(type='single', estimators=[PCA(copy=True, iterated_power='auto', n_components=10, random_state=None,
    svd_solver='auto', tol=0.0, whiten=False)], names=['PCA'], name_details=["PCA(copy=True, iterated_power='auto', n_components=10, random_state=None,&#xa;    svd_solver='auto', tol=0.0, whiten=False)"])
hello


In [3]:
grid = GridSearchCV(pca, {'n_components': [1, 2]})
grid

_EstHTMLInfo(type='single-meta', estimators=[PCA(copy=True, iterated_power='auto', n_components=10, random_state=None,
    svd_solver='auto', tol=0.0, whiten=False)], names=['GridSearchCV'], name_details=["GridSearchCV(cv=None, error_score=nan,&#xa;             estimator=PCA(copy=True, iterated_power='auto', n_components=10,&#xa;                           random_state=None, svd_solver='auto', tol=0.0,&#xa;                           whiten=False),&#xa;             iid='deprecated', n_jobs=None, param_grid={'n_components': [1, 2]},&#xa;             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,&#xa;             scoring=None, verbose=0)"])
_EstHTMLInfo(type='single', estimators=[PCA(copy=True, iterated_power='auto', n_components=10, random_state=None,
    svd_solver='auto', tol=0.0, whiten=False)], names=['PCA'], name_details=['PCA(n_components=10)'])
hello


In [None]:
print(grid.__repr__())

In [None]:
pca = PCA()
lr = LogisticRegression()
pipe1 = Pipeline([
    ('pca', PCA()),
    ('lr', LogisticRegression())
])

pipe1

To add a column transformer that handles categories and numerical features:

In [None]:
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import StandardScaler

In [None]:
cat_pipe = Pipeline([
    ('impute', SimpleImputer(strategy='constant', fill_value='missing')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))
])

num_pipe = Pipeline([
    ('impute', SimpleImputer()),
    ('scale', StandardScaler())
])

preprcoessor = ColumnTransformer([
    ('cat', cat_pipe, ['gender', 'age_bracket']),
    ('num', num_pipe, ['age', 'income'])
])

preprcoessor

Next, we can attached a classifier to this preprocessor:

In [None]:
pipe2 = Pipeline([
    ('preprocessor', preprcoessor),
    ('lr', lr)
])

pipe2

We can build a more complicated classifier at the end:

In [None]:
from sklearn.ensemble import VotingClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier

In [None]:
vote_clf = VotingClassifier([
    ('rf', RandomForestClassifier()),
    ('mlp', MLPClassifier()),
    ('lr', LogisticRegression())
])

pipe3  = Pipeline([
    ('preprocessor', preprcoessor),
    ('clf', vote_clf)
])

pipe3

Next, we can do some dimensional reduction with a PCA and a Truncated SVD:

In [None]:
from sklearn.pipeline import FeatureUnion
from sklearn.decomposition import PCA, TruncatedSVD

dim_reduce = FeatureUnion([("pca", PCA(n_components=1)),
                           ("svd", TruncatedSVD(n_components=2))])

pipe4 = Pipeline([
    ('preprocessor', preprcoessor),
    ('dim_reduce', dim_reduce),
    ('clf', vote_clf)
])

pipe4