# Workflow for Visualizing Esiimators

We can build a simple pipeline for classification:

In [1]:
from sklearn.pipeline import Pipeline
from sklearn.decomposition import PCA
from sklearn.linear_model import LogisticRegression
from sklearn.inspection import display_estimator

In [2]:
pca = PCA()
lr = LogisticRegression()
pipe1 = Pipeline([
    ('pca', PCA()),
    ('lr', LogisticRegression())
])

display_estimator(pipe1)

'<html><head><style>.sk-toggleable {background-color: white;}.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;}.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}div.sk-toggleable__content pre {margin: 0.5rem;margin-bottom: 0;border-radius: 0.25em;}.sk-toggleable__control:checked~.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}.sk-estimator {font-family: monospace;background-color: #f0f8ff;padding: 0.5em;margin: 0.25em 0.25em;border: 1px dotted black;border-radius: 0.25em;text-align: center;}.sk-parallel-item::after {content: "";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}.sk-serial::before {content: "";position: absolute;border-left: 1px solid gray;top: 2em;bottom: 0;left: 50%;}.s

To add a column transformer that handles categories and numerical features:

In [3]:
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import StandardScaler

In [4]:
cat_pipe = Pipeline([
    ('impute', SimpleImputer(strategy='constant', fill_value='missing')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))
])

num_pipe = Pipeline([
    ('impute', SimpleImputer()),
    ('scale', StandardScaler())
])

preprcoessor = ColumnTransformer([
    ('cat', cat_pipe, ['gender', 'age_bracket']),
    ('num', num_pipe, ['age', 'income'])
])

display_estimator(preprcoessor)

'<html><head><style>.sk-toggleable {background-color: white;}.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;}.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}div.sk-toggleable__content pre {margin: 0.5rem;margin-bottom: 0;border-radius: 0.25em;}.sk-toggleable__control:checked~.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}.sk-estimator {font-family: monospace;background-color: #f0f8ff;padding: 0.5em;margin: 0.25em 0.25em;border: 1px dotted black;border-radius: 0.25em;text-align: center;}.sk-parallel-item::after {content: "";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}.sk-serial::before {content: "";position: absolute;border-left: 1px solid gray;top: 2em;bottom: 0;left: 50%;}.s

Next, we can attached a classifier to this preprocessor:

In [5]:
pipe2 = Pipeline([
    ('preprocessor', preprcoessor),
    ('lr', lr)
])

display_estimator(pipe2)

'<html><head><style>.sk-toggleable {background-color: white;}.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;}.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}div.sk-toggleable__content pre {margin: 0.5rem;margin-bottom: 0;border-radius: 0.25em;}.sk-toggleable__control:checked~.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}.sk-estimator {font-family: monospace;background-color: #f0f8ff;padding: 0.5em;margin: 0.25em 0.25em;border: 1px dotted black;border-radius: 0.25em;text-align: center;}.sk-parallel-item::after {content: "";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}.sk-serial::before {content: "";position: absolute;border-left: 1px solid gray;top: 2em;bottom: 0;left: 50%;}.s

We can build a more complicated classifier at the end:

In [6]:
from sklearn.ensemble import VotingClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier

In [7]:
vote_clf = VotingClassifier([
    ('rf', RandomForestClassifier()),
    ('mlp', MLPClassifier()),
    ('lr', LogisticRegression())
])

pipe3  = Pipeline([
    ('preprocessor', preprcoessor),
    ('clf', vote_clf)
])

display_estimator(pipe3)

'<html><head><style>.sk-toggleable {background-color: white;}.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;}.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}div.sk-toggleable__content pre {margin: 0.5rem;margin-bottom: 0;border-radius: 0.25em;}.sk-toggleable__control:checked~.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}.sk-estimator {font-family: monospace;background-color: #f0f8ff;padding: 0.5em;margin: 0.25em 0.25em;border: 1px dotted black;border-radius: 0.25em;text-align: center;}.sk-parallel-item::after {content: "";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}.sk-serial::before {content: "";position: absolute;border-left: 1px solid gray;top: 2em;bottom: 0;left: 50%;}.s

Next, we can do some dimensional reduction with a PCA and a Truncated SVD:

In [8]:
from sklearn.pipeline import FeatureUnion
from sklearn.decomposition import PCA, TruncatedSVD

dim_reduce = FeatureUnion([("pca", PCA(n_components=1)),
                           ("svd", TruncatedSVD(n_components=2))])

pipe4 = Pipeline([
    ('preprocessor', preprcoessor),
    ('dim_reduce', dim_reduce),
    ('clf', vote_clf)
])

display_estimator(pipe4)

'<html><head><style>.sk-toggleable {background-color: white;}.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;}.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}div.sk-toggleable__content pre {margin: 0.5rem;margin-bottom: 0;border-radius: 0.25em;}.sk-toggleable__control:checked~.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}.sk-estimator {font-family: monospace;background-color: #f0f8ff;padding: 0.5em;margin: 0.25em 0.25em;border: 1px dotted black;border-radius: 0.25em;text-align: center;}.sk-parallel-item::after {content: "";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}.sk-serial::before {content: "";position: absolute;border-left: 1px solid gray;top: 2em;bottom: 0;left: 50%;}.s