# 1. Packages

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats

import warnings
warnings.simplefilter(action='ignore', category = FutureWarning)

plt.rcParams.update({'figure.max_open_warning': 0})

In [None]:
from sklearn.model_selection import train_test_split, KFold, cross_val_score, StratifiedKFold
from sklearn.preprocessing import StandardScaler, RobustScaler
from sklearn.decomposition import PCA
from sklearn.metrics import accuracy_score, recall_score, f1_score 
from sklearn.metrics import plot_confusion_matrix, roc_auc_score, classification_report
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
import lightgbm as lgb
import xgboost as xgb

# 2. General Infos About Data

In [None]:
df = pd.read_csv("../input/breast-cancer-wisconsin-data/data.csv")
df.drop(["id", "Unnamed: 32"], axis = 1, inplace = True)
df.sample(10)

In [None]:
df.shape

In [None]:
df[df.duplicated()]

In [None]:
df.info()

In [None]:
df.describe()

There are no duplicated rows and missing values.

## 2.1 Features

https://archive.ics.uci.edu/ml/datasets/Breast+Cancer+Wisconsin+%28Diagnostic%29

https://www.kaggle.com/uciml/breast-cancer-wisconsin-data

**1) ID number**

**2) Diagnosis** (M = malignant, B = benign)

Ten real-valued features are computed for each cell nucleus:

**a) radius** (mean of distances from center to points on the perimeter)

**b) texture** (standard deviation of gray-scale values)

**c) perimeter**

**d) area**

**e) smoothness** (local variation in radius lengths)

**f) compactness** (perimeter^2 / area - 1.0)

**g) concavity** (severity of concave portions of the contour)

**h) concave points** (number of concave portions of the contour)

**i) symmetry**

**j) fractal dimension** ("coastline approximation" - 1)


The mean, standard error and "worst" or largest (mean of the three
largest values) of these features were computed for each image,
resulting in 30 features. For instance, field 3 is Mean Radius, field
13 is Radius SE, field 23 is Worst Radius.

All feature values are recoded with four significant digits.

Missing attribute values: none

Class distribution: 357 benign, 212 malignant

# 3. Target

In [None]:
df.diagnosis.value_counts()

In [None]:
fig, ax = plt.subplots(figsize = (8, 8))

ax.pie(df.diagnosis.value_counts(), labels = ["Benign", "Malignant"], 
       autopct = '%1.2f%%', startangle = 180, colors = ["#0EB8F1", "#F1480F"])

ax.set_title("Diagnosis")
plt.show()

37-63 target distribution. I will use stratification on splitting data and cross validation, but we don't need to resample data.

In [None]:
df["diagnosis"] = df["diagnosis"].map({"M": 1, "B": 0})

All of our features are numerical

In [None]:
target = "diagnosis"
predictors = [col for col in df.columns if col != target]

# 4. EDA

In [None]:
def feature_dist_clas(df, col, hue):
    
    fig, axes = plt.subplots(1, 4, figsize = (25, 5))
    order = sorted(df[hue].unique())
    palette = ["#0EB8F1", "#F1480F"]
    
    sns.histplot(x = col, hue = hue, data = df, ax = axes[0], palette = palette, edgecolor="black", linewidth=0.5)
    sns.kdeplot(x = col, hue = hue, data = df, fill = True, ax = axes[1], palette = palette, linewidth = 2)
    sns.boxplot(y = col, hue = hue, data = df, x = [""] * len(df), ax = axes[2], 
                palette = palette, linewidth = 2, flierprops = dict(marker = "x", markersize = 3.5))
    
    sns.violinplot(y = col, hue = hue, data = df, x = [""] * len(df), ax = axes[3], palette = palette)
    
    fig.suptitle("For Feature:  " + col)
    axes[0].set_title("Histogram For Feature " + col)
    axes[1].set_title("KDE Plot For Feature " + col)   
    axes[2].set_title("Boxplot For Feature " + col)   
    axes[3].set_title("Violinplot For Feature " + col)   
    
    for ax in axes:
        ax.set_facecolor("#C7D3D4FF")
        ax.grid(linewidth = 0.25)

In [None]:
for col in predictors:
    feature_dist_clas(df, col, "diagnosis")

### 4.1 Takeaways - Features I

radius_mean --> larger values for malignant class, easily separable

texture_mean --> larger values for malignant class, but benign class has lots of outliers, it could be a problem

perimeter_mean --> larger values for malignant class, easily separable

area_mean --> larger values for malignant class, easily separable

smoothness_mean --> almost similar distributions for both class, a little bit larger median for malignant, hardly separable

compactness_mean --> larger values for malignant class, separable

concavity_mean --> larger values for malignant class, but benign class has lots of outliers, but it seems easily separable

concave points_mean --> larger values for malignant class, easily separable

symmetry_mean --> almost similar distributions for both class

fractal_dimension_mean --> similar, benign class has lots of outliers, malignant class has wider range

Generally, benign's kurtosis greater than malignant's kurtosis, so that malignant classes have wider range.

Also, we have a little skewness and kurtosis problem, but distributions' shape similar to normal (not normal). With transformations, we minimize this problem

In [None]:
def feature_distribution(df, col):
    
    skewness = np.round(df[col].skew(), 3)
    kurtosis = np.round(df[col].kurtosis(), 3)

    fig, axes = plt.subplots(1, 3, figsize = (18, 6))
    
    sns.kdeplot(data = df, x = col, fill = True, ax = axes[0], color = "#603F83", linewidth=2)
    sns.boxplot(data = df, y = col, ax = axes[1], color = "#603F83",
                linewidth = 2, flierprops = dict(marker = "x", markersize = 3.5))
    stats.probplot(df[col], plot = axes[2])

    axes[0].set_title("Distribution \nSkewness: " + str(skewness) + "\nKurtosis: " + str(kurtosis))
    axes[1].set_title("Boxplot")
    axes[2].set_title("Probability Plot")
    fig.suptitle("For Feature:  " + col)
    
    for ax in axes:
        ax.set_facecolor("#C7D3D4FF")
        ax.grid(linewidth = 0.1)
    
    axes[2].get_lines()[0].set_markerfacecolor('#8157AE')
    axes[2].get_lines()[0].set_markeredgecolor('#603F83')
    axes[2].get_lines()[0].set_markeredgewidth(0.1)
    axes[2].get_lines()[1].set_color('#F1480F')
    axes[2].get_lines()[1].set_linewidth(3)
    
    sns.despine(top = True, right = True, left = True, bottom = True)
    plt.show()

In [None]:
for col in predictors:
    feature_distribution(df, col)

### 4.1 Takeaways - Features II

Lots of features have problem with skewness and kurtosis. Generally, we have right skewed and large kurtosis features. To solve this problem and get normal distributed features, I'll apply transformations.

In [None]:
def heatmap(df):
    
    fig, ax = plt.subplots(figsize = (15, 15))
    
    sns.heatmap(df.corr(), cmap = "coolwarm", annot = True, fmt = ".2f", annot_kws = {"fontsize": 9},
                vmin = -1, vmax = 1, square = True, linewidths = 0.25, linecolor = "black", cbar = False)
    
    sns.despine(top = True, right = True, left = True, bottom = True)
    
heatmap(df)

# 5. Preprocessing

In [None]:
X_train, X_test, y_train, y_test = train_test_split(df[predictors],
                                                    df[target],
                                                    test_size = 0.3,
                                                    random_state = 42,
                                                    stratify = df[target])

### **Handling skewness with using Yeo-Johnson transformation**

In [None]:
train_skew, test_skew = [], []
train_kurtosis, test_kurtosis = [], []
for col in predictors:
    
    train_skew.append(X_train[col].skew())
    test_skew.append(X_test[col].skew())
    train_kurtosis.append(X_train[col].kurtosis())
    test_kurtosis.append(X_test[col].kurtosis())
    
skew_df = pd.DataFrame({"Feature": predictors, "TrainSkewness": train_skew, "TestSkewness": test_skew,
                        "TrainKurtosis": train_kurtosis, "TestKurtosis": test_kurtosis})
skewed = skew_df[skew_df.TrainSkewness.abs() >= 0.5]
skewed

In [None]:
train_skew_yeoj, test_skew_yeoj = [], []
train_kurtosis_yeoj, test_kurtosis_yeoj = [], []

for col in skewed.Feature.tolist():
    
    X_train[col], fitted_lambda = stats.yeojohnson(X_train[col])
    X_test[col] = stats.yeojohnson(X_test[col], fitted_lambda)
    
    train_skew_yeoj.append(X_train[col].skew())
    test_skew_yeoj.append(X_test[col].skew())    
    train_kurtosis_yeoj.append(X_train[col].kurtosis())
    test_kurtosis_yeoj.append(X_test[col].kurtosis())
    
skewed.loc[:, "TrainSkew_Transformed"] = train_skew_yeoj
skewed.loc[:, "TestSkew_Transformed"] = test_skew_yeoj
skewed.loc[:, "TrainKurtosis_Transformed"] = train_kurtosis_yeoj
skewed.loc[:, "TestKurtosis_Transformed"] = test_kurtosis_yeoj

skewed

**Scaling predictors with using Robust Scaler**

In [None]:
for col in predictors:   
    
    scaler = RobustScaler()
    X_train[col] = scaler.fit_transform(X_train[col].values.reshape(-1, 1))
    X_test[col] = scaler.transform(X_test[col].values.reshape(-1, 1))

# 6. EDA for Target

##  6.1 Plotting with Two Features

This codes helps us to visualize target distribution with respect to two features.

You can free to change f1 and f2 (feature1 and feature2) for examining two features.

In [None]:
fig, ax1 = plt.subplots(figsize=(15, 8))
ax1.set_facecolor("#393838")

X = X_train.values
y = y_train.values

f1 = 17
f2 = 23

ax1.scatter(X[y == 0, f1], X[y == 0, f2], label = "Benign", alpha = 1, linewidth = 0, c = "#0EB8F1")
ax1.scatter(X[y == 1, f1], X[y == 1, f2], label = "Malignant", alpha = 1, linewidth = 0, c = '#F1480F', marker = "X")

ax1.legend()
sns.despine(top = True, right = True, left = True, bottom = True)

## 6.2 Plotting with PCA, 2 Component

In [None]:
X_pca = df[predictors].copy()
pca = PCA(n_components = 2, random_state = 42)
components = pca.fit_transform(X_pca)
comp_df = pd.DataFrame(components, columns=["X1", "X2"])
plot_df = pd.concat([comp_df, df[target]], axis=1)


fig, ax = plt.subplots(figsize=(14, 8))
ax.set_facecolor("#393838")

sns.scatterplot(x = "X1", y = "X2", alpha = 0.8, data = plot_df[plot_df.diagnosis == 0], 
                ax = ax, label = "Benign", linewidth = 0, color = "#0EB8F1")

sns.scatterplot(x = "X1", y = "X2", alpha = 0.8, data = plot_df[plot_df.diagnosis == 1], 
                ax = ax, label = "Malignant", marker="X", linewidth = 0, color = '#F1480F')

ax.set_title("PCA with 2 Component, Explained Variance:  \n" + 
             str(pca.explained_variance_ratio_[0].round(5)) + ", " +
             str(pca.explained_variance_ratio_[1].round(5)))

sns.despine(top = True, right = True, left = True, bottom = True)

For above graphs;

We can easily say that, our target's classes are quitely balanced.

For PCA graph, we can detect malignants easily.

# 7. Model Comparison

### 7.1 Metric

Target variable has 37-63 class distribution, we can use accuracy as a metric. But in this problem, we need more than accuracy. **For biostatistical problems or subjects with related to health; precision, recall, and f1-score are more important.**

Precision: Observations we correctly identify as a malignant out of all the malignant observations.

Recall : Ratio of successfully detected malignant observations over all malignant observations.

F1 is harmonic average of precision and recall.

In [None]:
skf = StratifiedKFold(n_splits = 5, random_state = 42, shuffle = True)

def cv_model(model, X = X_train, y = y_train, cv = skf, scoring = "f1"):
    return cross_val_score(model, X, y, scoring = scoring, cv = cv, n_jobs = -1).mean()

In [None]:
logreg = LogisticRegression(random_state = 42)
svc = SVC(random_state=42, probability = True)
gnb = GaussianNB()
rfc = RandomForestClassifier(random_state = 42)
knnc = KNeighborsClassifier(n_jobs = -1)
lgbc = lgb.LGBMClassifier(random_state = 42, n_jobs = -1)
dtc = DecisionTreeClassifier(random_state = 42)
xgbc = xgb.XGBClassifier(random_state = 42, n_jobs = -1, use_label_encoder = False, eval_metric = "logloss")


models = [logreg, svc, gnb, rfc, knnc, lgbc, dtc, xgbc]

In [None]:
def model_results(model, xtrain = X_train, ytrain = y_train, xtest = X_test, ytest = y_test):

    sns.set_style("white") 
    import matplotlib
    matplotlib.rcParams.update({'font.size': 12})

    model.fit(xtrain, ytrain)
    train_preds = model.predict(xtrain)
    test_preds = model.predict(xtest)
    
    fig, axes = plt.subplots(1, 2, figsize = (15, 5))

    plot_confusion_matrix(model, xtrain, ytrain, 
                          cmap = plt.cm.Reds, display_labels = ["Benign", "Malignant"], 
                          values_format = "6d", normalize = None, ax = axes[0], colorbar = False)
    
    plot_confusion_matrix(model, xtest, ytest, 
                          cmap = plt.cm.Reds, display_labels = ["Benign", "Malignant"], 
                          values_format = "6d", normalize = None, ax = axes[1], colorbar = False)
    
    print("For training set: \n")
    print(classification_report(ytrain, train_preds, target_names = ["Benign", "Malignant"], digits = 3))
    print("Roc-Auc Score: " , roc_auc_score(ytrain, train_preds).round(3))
    print("5 Fold CV Score (Avg): " , cv_model(model, xtrain, ytrain).round(3))
    
 
    print("\nFor test set: \n")
    print(classification_report(ytest, test_preds, target_names = ["Benign", "Malignant"], digits = 3))    
    print("Roc-Auc Score: " , roc_auc_score(ytest, test_preds).round(3))
    
    axes[0].set_title("Training Set")
    axes[1].set_title("Test Set")
    fig.suptitle("For model " + type(model).__name__)
    sns.despine(top = True, right = True, left = True, bottom = True)
    plt.show()

In [None]:
model_results(logreg)

In [None]:
model_results(svc)

In [None]:
model_results(xgbc)

In [None]:
train_accuracy, test_accuracy, train_recall, test_recall = {}, {}, {}, {}
train_f1, test_f1, train_auc, test_auc = {}, {}, {}, {}
cv_score_f1, cv_accuracy, cv_recall = {}, {}, {}

for model in models:
    
    name = type(model).__name__
    model.fit(X_train, y_train)
    train_preds = model.predict(X_train)
    test_preds = model.predict(X_test)
    
    train_accuracy[name] = accuracy_score(y_train, train_preds).round(4)
    test_accuracy[name] = accuracy_score(y_test, test_preds).round(4)
    train_recall[name] = recall_score(y_train, train_preds).round(4)
    test_recall[name] = recall_score(y_test, test_preds).round(4)
    train_f1[name] = f1_score(y_train, train_preds).round(4)
    test_f1[name] = f1_score(y_test, test_preds).round(4)
    train_auc[name] = roc_auc_score(y_train, train_preds).round(4)
    test_auc[name] = roc_auc_score(y_test, test_preds).round(4)    
    cv_score_f1[name] = cv_model(model, scoring = "f1")
    cv_accuracy[name] = cv_model(model, scoring = "accuracy")
    cv_recall[name] = cv_model(model, scoring = "recall")
    
scores = pd.DataFrame(
    [cv_score_f1, cv_accuracy, cv_recall, train_accuracy, test_accuracy, 
     train_recall, test_recall, train_f1, test_f1, train_auc, test_auc], 
     ["CVF1", "CVAccuracy", "CVRecall", "TrainAccuracy", "TestAccuracy", "TrainRecall", 
      "TestRecall", "TrainF1", "TestF1", "TrainAuc", "TestAuc"]).T

In [None]:
scores

### 7.2 Takeaways - Model Comparison

**LogisticRegression**, **SVC**, **KNeighborsClassifier** and **LGBMClassifier** have best cv scores for f1-score. (over 95%)

They also have best accuracy and recall scores on cv.

**RandomForestClassifier** has nearly 96% accuracy on test data, but its recall score on test is just 89%. In this problem, recall, basically means that the power of detecting malignancy. So, RandomForestClassifier is not a good model for this problem. 

**KNeighborsClassifier and LGBMClassifier** have good scores on cv but their performances on test data are not very good.

**XGBClassifier** is also good model. It has great cv scores and it performs well on test data.

**LogisticRegression** has great cv scores and it also performs well on test data.

**DecisionTreeClassifier is worst model for this problem**. Smallest cv scores and overfitting problem.

**SVC is best model for this problem**. It has best cv scores (nearly 95%, min.) and great results on test data. SVC model has nearly **99% accuracy**, **97% recall** and nearly **98% f1 score** **on test data**.

# 8. Model Explainability

Inspiration: 

https://www.kaggle.com/tentotheminus9/what-causes-heart-disease-explaining-the-model

https://www.kaggle.com/learn/machine-learning-explainability

## 8.1 Permutation Importance

https://www.kaggle.com/dansbecker/permutation-importance

https://scikit-learn.org/stable/modules/permutation_importance.html

https://eli5.readthedocs.io/en/latest/blackbox/permutation_importance.html

In [None]:
import eli5
from eli5.sklearn import PermutationImportance

perm = PermutationImportance(svc, random_state = 42).fit(X_test, y_test)
eli5.show_weights(perm, feature_names = X_test.columns.tolist(), top = 30)

For permutation importance;

Features on top like texture_worst, smoothness_worst et cetera, are most important features. Features on bottom like compactness_se or fractal_dimension_se have least importance

## 8.2 Feature Importances

### 8.2.1 For Lightgbm

In [None]:
ax = lgb.plot_importance(lgbc, max_num_features = 30, grid = False, figsize = (12, 8), 
                         edgecolor = "#603F83", color = "#603F83",
                         title = "Feature Importances for Lightgbm Classifier")

ax.set_facecolor("#C7D3D4")
plt.show()

**texture_mean**, **concave_points_worst**, **texture_worst**, **concave_points_mean**, and **perimeter_worst** are top five most important features for lightgbm classifier model.

Test Accuracy: 0.9649

Test F1: 0.9508

Test Recall: 0.9062

for lgbmclassifier model with default parameters.

In [None]:
lgbc.feature_importances_

### 8.2.2 For XGBoost

In [None]:
fig, ax = plt.subplots(figsize = (12, 8))
ax.set_facecolor("#C7D3D4")

xgb.plot_importance(xgbc, ax = ax, grid = False, max_num_features = 30, edgecolor = "#603F83", color = "#603F83",
                    title = "Feature Importances for XGBoost Classifier")

**texture_worst**, **area_se**, **texture_mean**, **concave_points_mean**, and **compactness_se** are top five most important features for XGBoost classifier. 

Test Accuracy: 0.9825

Test F1: 0.9760

Test Recall: 0.9531

for xgbclassifier model with default parameters.

# 8.3 Visualizing Trees

### 8.3.1 For Decision Tree

In [None]:
from sklearn import tree
import graphviz

tree_graph = tree.export_graphviz(dtc, out_file = None, feature_names = predictors)
graphviz.Source(tree_graph)

### 8.3.2. For Lightgbm

In [None]:
lgb.create_tree_digraph(lgbc, show_info = ["split_gain", "data_percentage"])

### 8.3.3. For XGBoost

In [None]:
fig, ax = plt.subplots(figsize = (20, 12))

xgb.plot_tree(xgbc, ax = ax)

# 8.4 Partial Dependence Plot (PDP)

https://www.kaggle.com/dansbecker/partial-plots

https://pdpbox.readthedocs.io/en/latest/pdp_isolate.html

**y-axis** - change in prediction

**blue shaded are** - level of confidence

In [None]:
from pdpbox import pdp, get_dataset, info_plots

pdp_ = pdp.pdp_isolate(model = svc, dataset = X_test, model_features = predictors, feature = 'texture_mean')

pdp.pdp_plot(pdp_, 'texture_mean')
plt.show()

In [None]:
pdp_ = pdp.pdp_isolate(model = lgbc, dataset = X_test, model_features = predictors, feature = 'texture_mean')

pdp.pdp_plot(pdp_, 'texture_mean')
plt.show()

For above graphs;

First graph is partial dependence plot of **texture_mean** for model Support Vector Classifier (svc)

Second graph is partial dependence plot of **texture_mean** for model LGBClassifier (lgbc)

Remember, **texture_mean** variable is the most important feature for lgbc feature importances, and third most important feature for model svc with respect to permutation importance.

If we look at the first plot, we can say that, malignant probability are increases consistently.

If we look second plot, for model lgbc, malignant probability are generally increases when texture_mean increas. But, until -0.6 this feature doesn't have an effect on our predictions. Also, more than 0.5 texture_mean doesn't have significant effect on beign malignant. 

In [None]:
pdp_ = pdp.pdp_isolate(model = svc, dataset = X_test, model_features = predictors, feature = 'concavity_se')

pdp.pdp_plot(pdp_, 'concavity_se')
plt.show()

In [None]:
pdp_ = pdp.pdp_isolate(model = xgbc, dataset = X_test, model_features = predictors, feature = 'concavity_se')

pdp.pdp_plot(pdp_, 'concavity_se')
plt.show()

For above graphs;

First graph is partial dependence plot of **concavity_se** for model Support Vector Classifier (svc)

Second graph is partial dependence plot of **concavity_se** for model XGBClassifier (xgbc)

Remember, **concavity_se** variable is one of the least important features for svc permutation importance and least important feature for xgbc.

If we look at the first plot, we can say that, malignant probability are increases consistently. (about 0.025)

If we look at second plot, for xgbc, **concavity_se** feature has no impact for detecting malignance until 0.1 concavity_se. After, probability of malignance decreases too little when concavity_se increases. But, this decrease is too little, it is about 0.0001.


Finally, I just want to say, variables could have a different impact for different models. A feature could be the most important feature for some model and have significant impact for prediction, but on the contrary that feature could be least important feature for different models and maybe it has too little impact for prediction.

# 8.5 Shap Package

https://www.kaggle.com/dansbecker/shap-values

https://shap-lrjball.readthedocs.io/en/docs_update/examples.html

https://www.kaggle.com/dansbecker/advanced-uses-of-shap-values

shap_values[1] --> for Malignant Class

**Note:** I am not comfortable with below codes. I have to spend time on this subject.

In [None]:
import shap

data_point = X_test.iloc[111].values.reshape(1, -1)

In [None]:
explainer = shap.TreeExplainer(lgbc)

shap_values = explainer.shap_values(data_point)

print(lgbc.predict_proba(data_point).round(3))

shap.initjs()

shap.force_plot(explainer.expected_value[1], shap_values[1], data_point, feature_names = X_test.columns, link = "logit")

In [None]:
explainer = shap.TreeExplainer(rfc)

shap_values = explainer.shap_values(data_point)

print(rfc.predict_proba(data_point).round(3))

shap.initjs()

shap.force_plot(explainer.expected_value[1], shap_values[1], data_point, feature_names = X_test.columns)

In [None]:
k_explainer = shap.KernelExplainer(svc.predict_proba, X_train)

print(svc.predict_proba(data_point).round(3))

k_shap_values = k_explainer.shap_values(data_point)

shap.force_plot(k_explainer.expected_value[1], k_shap_values[1], data_point, feature_names = X_test.columns)

In [None]:
k_explainer = shap.KernelExplainer(rfc.predict_proba, X_train)
k_shap_values = k_explainer.shap_values(data_point)
shap.force_plot(k_explainer.expected_value[1], k_shap_values[1], data_point, feature_names = X_test.columns)

In [None]:
explainer = shap.TreeExplainer(lgbc)

shap_values = explainer.shap_values(X_test)

shap.summary_plot(shap_values[1], X_test)

Color represents feature's value; blue for low, purple for high values

Horizontal line represents impact on model.

For example;
If we look at upper right part of the graph, **perimeter_worst** and **concave_points_worst** variables has an great impact on predicting malignancy when their values are great.

In [None]:
svc.fit(X_train, y_train)

# X_train_summary = shap.kmeans(X_train, 25)

explainer = shap.KernelExplainer(svc.predict_proba, X_train[:100])
shap_values = explainer.shap_values(X_test[:100])
shap.force_plot(explainer.expected_value[0], shap_values[0], X_test[:100])

In [None]:
explainer = shap.TreeExplainer(lgbc)

shap_values = explainer.shap_values(X_test)

shap.dependence_plot('perimeter_worst', shap_values[1], X_test)

In [None]:
shap.dependence_plot('texture_mean', shap_values[1], X_test)