<div class="alert alert-block alert-success">
    <h1 align="center">Scikit-Learn Tips</h1>
    <h3 align="center">Tip 17 : interactive diagrams of Pipelines</h3>
</div>

In [5]:
import pandas as pd
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_selection import SelectPercentile, chi2
from sklearn.linear_model import LogisticRegression
from sklearn.compose import make_column_transformer
from sklearn.pipeline import make_pipeline

In [8]:
df = pd.read_csv('data.csv')
X = df[['parch', 'fare', 'embarked', 'sex', 'name', 'age']]
y = df['survived']

In [9]:
imp_constant = SimpleImputer(strategy='constant')
ohe = OneHotEncoder()

In [10]:
imp_ohe = make_pipeline(imp_constant, ohe)
vect = CountVectorizer()
imp = SimpleImputer()

In [11]:
# pipeline step 1
ct = make_column_transformer(
    (imp_ohe, ['embarked', 'sex']),
    (vect, 'name'),
    (imp, ['age', 'fare']),
    ('passthrough', ['parch']))

In [12]:
# pipeline step 2
selection = SelectPercentile(chi2, percentile=50)

In [13]:
# pipeline step 3
logreg = LogisticRegression(solver='liblinear')

In [14]:
# display estimators as diagrams
from sklearn import set_config
set_config(display='diagram')

In [15]:
pipe = make_pipeline(ct, selection, logreg)
pipe

In [17]:
# export the diagram to a file
from sklearn.utils import estimator_html_repr
with open('pipeline.html', 'w') as f:  
    f.write(estimator_html_repr(pipe))