# Workflow for Visualizing Esiimators

We can build a simple pipeline for classification:

In [1]:
from sklearn.pipeline import Pipeline
from sklearn.decomposition import PCA
from sklearn.linear_model import LogisticRegression
from sklearn.inspection import display_estimator

In [2]:
pca = PCA()
lr = LogisticRegression()
pipe1 = Pipeline([
    ('pca', PCA()),
    ('lr', LogisticRegression())
])

display_estimator(pipe1)

To add a column transformer that handles categories and numerical features:

In [3]:
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import StandardScaler

In [4]:
cat_pipe = Pipeline([
    ('impute', SimpleImputer(strategy='constant', fill_value='missing')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))
])

num_pipe = Pipeline([
    ('impute', SimpleImputer()),
    ('scale', StandardScaler())
])

preprcoessor = ColumnTransformer([
    ('cat', cat_pipe, ['gender', 'age_bracket']),
    ('num', num_pipe, ['age', 'income'])
])

display_estimator(preprcoessor)

Next, we can attached a classifier to this preprocessor:

In [5]:
pipe2 = Pipeline([
    ('preprocessor', preprcoessor),
    ('lr', lr)
])

display_estimator(pipe2)

We can build a more complicated classifier at the end:

In [7]:
from sklearn.ensemble import VotingClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier

In [8]:
vote_clf = VotingClassifier([
    ('rf', RandomForestClassifier()),
    ('mlp', MLPClassifier()),
    ('lr', LogisticRegression())
])

pipe3  = Pipeline([
    ('preprocessor', preprcoessor),
    ('clf', vote_clf)
])

display_estimator(pipe3)

Next, we can do some dimensional reduction with a PCA and a Truncated SVD:

In [10]:
from sklearn.pipeline import FeatureUnion
from sklearn.decomposition import PCA, TruncatedSVD

dim_reduce = FeatureUnion([("pca", PCA(n_components=1)),
                           ("svd", TruncatedSVD(n_components=2))])

pipe4 = Pipeline([
    ('preprocessor', preprcoessor),
    ('dim_reduce', dim_reduce),
    ('clf', vote_clf)
])

display_estimator(pipe4)