# Microsoft Interpret ML によるモデル開発とモデル解釈 (Notebook VM 対応)
製造プロセスから採取されたセンサーデータと検査結果のデータを用いて、品質管理モデルを構築します。

## 1. データ準備

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split

df = pd.read_csv("../data/Factory.csv")

In [None]:
from sklearn.model_selection import train_test_split

X = df.drop(columns=["Quality","ID"],axis=1)
y = df["Quality"].values

X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.1,random_state=100,stratify=y)

## 2. データ探索

In [None]:
from interpret import preserve
from interpret import show
from interpret.data import ClassHistogram

hist = ClassHistogram().explain_data(X_train, y_train, name = 'Train Data')

In [None]:
preserve(hist)
for i in hist.selector.Name:
    print(i)
    preserve(hist, i)

## 3. Explainable Boosting Machine (EBM) によるモデル学習

In [None]:
from interpret.glassbox import ExplainableBoostingClassifier, LogisticRegression, ClassificationTree, DecisionListClassifier
seed = 1234
ebm = ExplainableBoostingClassifier(random_state=seed)
ebm.fit(X_train, y_train)   #Works on dataframes and numpy arrays

## 4. モデル解釈 (グローバル)

In [None]:
ebm_global = ebm.explain_global(name='EBM')

In [None]:
preserve(ebm_global)
for i in ebm_global.selector.Name:
    print(i)
    preserve(ebm_global, i)

## 5. モデル解釈 (ローカル)
テストデータから算出された予測値の解釈を行います。

In [None]:
ebm_local = ebm.explain_local(X_test[:20], y_test[:20], name='EBM')

In [None]:
for i in range(len(ebm_local.selector)):
    preserve(ebm_local, i)

##  6. EBM モデルの精度確認

In [None]:
from interpret.perf import ROC

ebm_perf = ROC(ebm.predict_proba).explain_perf(X_test, y_test, name='EBM')

In [None]:
preserve(ebm_perf)

## 7. ロジスティック回帰、決定木モデルの学習

In [None]:
from interpret.glassbox import LogisticRegression, ClassificationTree

# We have to transform categorical variables to use Logistic Regression and Decision Tree
X_enc = pd.get_dummies(X, prefix_sep='.')
feature_names = list(X_enc.columns)
X_train_enc, X_test_enc, y_train, y_test = train_test_split(X_enc, y, test_size=0.20, random_state=seed)

lr = LogisticRegression(random_state=seed, feature_names=feature_names, penalty='l1')
lr.fit(X_train_enc, y_train)

tree = ClassificationTree()
tree.fit(X_train_enc, y_train)

## 8. ダッシュボードでのモデル比較

In [None]:
lr_perf = ROC(lr.predict_proba).explain_perf(X_test_enc, y_test, name='Logistic Regression')
tree_perf = ROC(tree.predict_proba).explain_perf(X_test_enc, y_test, name='Classification Tree')

preserve(lr_perf)
preserve(tree_perf)
preserve(ebm_perf)

## 9. モデル解釈の比較 (グローバル、ローカル)

### Global Explain (決定木は未対応)

In [None]:
lr_global = lr.explain_global(name='LR')
tree_global = tree.explain_global(name='Tree')

### Local Explain (決定木は未対応)

In [None]:
lr_local = lr.explain_local(X_test[:20], y_test[:20], name='LR')
tree_local = tree.explain_local(X_test[:20], y_test[:20], name='Tree')

In [None]:
for i in range(len(lr_local.selector)):
    print(lr_local.selector.iloc[i])
    preserve(lr_local, i)
    preserve(ebm_local, i)

### ダッシュボード (未対応)

In [None]:
# Do everything in one shot with the InterpretML Dashboard by passing a list into show
#show([hist, lr_global, lr_local, lr_perf, tree_global, tree_local, tree_perf, ebm_global, ebm_local, ebm_perf], share_tables=True)