In [13]:
import pandas as pd
import numpy as np
import seaborn as sns

import sklearn
from sklearn.model_selection import train_test_split
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression, LinearRegression
from sklearn.compose import make_column_transformer
from sklearn.utils._estimator_html_repr import *
from IPython.core.display import display, HTML

### 1. set_config에서 display를 diagram으로 설정

In [14]:
sklearn.set_config(display="diagram")
sklearn.__version__

'0.23.1'

### 2. diamonds 데이터를 통해 파이프 라인 만들기

In [15]:
df_diamonds = sns.load_dataset('diamonds')
X = df_diamonds[['carat', 'depth','cut']]
y = df_diamonds['price']
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state = 123)

### 3. make_columns transformer를 이용하여 변수별로 다양하게 특성 변환

In [16]:
ct = make_column_transformer(
    (StandardScaler(), ['carat', 'depth']),
    (OneHotEncoder(), ['cut']),
    remainder='passthrough'
)

### 4. 파이프라인 생성을 통한 로지스틱 회귀분석 수행

In [17]:
pipe = make_pipeline(ct, LogisticRegression())
pipe.fit(X_train.head(1000), y_train.head(1000))

### 5. 좀더 복잡한 파이프라인을 생성

In [6]:
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler, OneHotEncoder

numeric_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='median')),
    ('scaler', StandardScaler())])

categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='constant', fill_value='missing')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))])

preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, ['carat', 'depth']),
        ('cat', categorical_transformer, ["cut"])])
clf = Pipeline(steps=[('preprocessor', preprocessor),("logistic", LogisticRegression())])

In [18]:
clf.fit(X_train.head(1000), y_train.head(1000),)

#### 각각의 어떤 변수를 사용하는지 알수 있게 시각화를 해줌

### 6. 생성된 시각화를 HTML로 저장해보자
>- sklearn.utils._estimator_html_repr 모듈을 사용해서 HTML 코드 생성

In [19]:
from sklearn.utils._estimator_html_repr import *
from IPython.core.display import display, HTML

#### estimator_html_repr() 함수를 통한 파이프라인 객체의 시각화 HTML 코드 생성

In [21]:
estimator_html_repr(clf)

'<style>div.sk-top-container {color: black;background-color: white;}div.sk-toggleable {background-color: white;}label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.2em 0.3em;box-sizing: border-box;text-align: center;}div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}div

### 7. HTML 소스 코드를 파일로 저장

In [22]:
html = estimator_html_repr(clf)
with open("pipeline.txt","w") as w :
    w.write(html)

### 8. 저장된 파일에서 HTML 소스 코드 읽어오기

In [23]:
with open("pipeline.txt","r") as r :
    pipeline_html = r.readlines()
pipeline_html = "".join(pipeline_html)

### 9. 2가지 함수를 이용하여 HTML로 시각화 하기

In [12]:
from IPython.core.display import display, HTML
display(HTML(pipeline_html))
