# 3-5. 評価

## Cross Validation

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
import lightgbm as lgb
from sklearn.model_selection import GridSearchCV, StratifiedKFold
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, classification_report, roc_curve, roc_auc_score
import shap
shap.initjs()

In [None]:
train = pd.read_csv("data/converted_train.csv")
X_train = train.drop(["label"], axis=1)
y_train = train["label"]

In [None]:
lgb_model = lgb.LGBMClassifier()

In [None]:
param_grid = {
    "objective": ["binary"],
    "boosting_type": ["gbdt"],
    "num_leaves": [10, 100], 
    "n_estimators": [50, 100, 500, 1000],
}

In [None]:
cv = GridSearchCV(lgb.LGBMClassifier(), param_grid, cv=StratifiedKFold(5), n_jobs=-1, verbose=True)

In [None]:
cv.fit(X_train, y_train)

In [None]:
cv.best_estimator_

In [None]:
cv.best_params_

In [None]:
cv.cv_results_["mean_test_score"][cv.best_index_]

In [None]:
cv.cv_results_["std_test_score"][cv.best_index_]

## テスト

In [None]:
test = pd.read_csv("data/converted_test.csv")
X_test = test.drop(["label"], axis=1)
y_test = test["label"]

In [None]:
model = cv

In [None]:
y_pred = model.predict_proba(X_test)[:, 1]

In [None]:
y_pred

In [None]:
ConfusionMatrixDisplay(confusion_matrix=confusion_matrix(y_test, y_pred>0.5)).plot()

In [None]:
print(classification_report(y_test, y_pred>0.5))

In [None]:
fpr, tpr, thresholds = roc_curve(y_test, y_pred)

In [None]:
plt.plot(fpr, tpr)

In [None]:
roc_auc_score(y_test, y_pred)

## SHAPによる予測の説明

In [None]:
explainer = shap.TreeExplainer(cv.best_estimator_)
shap_values = explainer.shap_values(X_test)

In [None]:
shap.summary_plot(shap_values=shap_values[1], features=X_test, feature_names=X_test.columns)

In [None]:
n = 50

In [None]:
shap.force_plot(explainer.expected_value[1], shap_values[1][n,:], X_test.columns)

In [None]:
shap.plots._waterfall.waterfall_legacy(
    explainer.expected_value[1],
    shap_values[1][n,:],
    X_test.iloc[n,:]
)