In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import sklearn 
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
data = pd.read_csv('../input/customer-churn-with-explainable-ai/Churn_Modelling.csv')
data.head()

Let's explore the data! 

In [None]:
data.info()

In [None]:
data.nunique()

There are 10000 entries, with 14 columns.
There are 3 text fields, Surname, Geography & Gender.
The rest of the features have number entries, and there are no empty fields.

There are several columns with categorical data:
Geography, Gender, HasCrCard, IsActiveMember.
The column Exited has the label to be predicted. 

In [None]:
# inspect target results
sns.countplot(data['Exited'])

There is a class imbalance issue, with the no. of people exiting 1 quarter that of those that stayed!

In [None]:
g = sns.FacetGrid(data,col='Exited')
g.map(plt.hist,'CreditScore')

Looks like credit score is a normal distribution, with the center between 600-700 for both classes.

In [None]:
# inspect geography
sns.countplot(data['Geography'])

All the customers come from Europe. Most are from France (~50%) while Germany is ~25% and Spain 25%

In [None]:
g = sns.FacetGrid(data,col='Exited')
g.map(plt.hist,'Age')

Looks like a majority of the loyal customers are between 20-40 years old

In [None]:
g = sns.FacetGrid(data,col='Exited')
for ax in g.axes.flat:
    for label in ax.get_xticklabels():
        label.set_rotation(45)
g.map(plt.hist,'Balance')

Funny..there are a lot of balances with zero amount!

Start Data Processing

Start separating out the labels column

In [None]:
from sklearn.preprocessing import LabelEncoder
labels = data['Exited']
le = LabelEncoder()
le.fit(labels)
labels = le.transform(labels)
class_names = le.classes_

Drop the columns RowNumber, CustomerId, Surname as they are not predictive features, and remove the labels column

In [None]:
data = data.drop(['RowNumber', 'CustomerId', 'Surname', 'Exited'], axis = 1) 
data.head()

In [None]:
feature_names = data.columns.tolist()

Explicitly define the categorical features, we need this for LIME.
Categorical data: Geography, Gender, HasCrCard, IsActiveMember.

In [None]:
categorical_features = [1,2,7,8]
categorical_names = {}
for feature in categorical_features:
    column = data.iloc[:,feature].values
    le = LabelEncoder()
    le.fit(column)
    data.iloc[:, feature] = le.transform(column)
    categorical_names[feature] = le.classes_

In [None]:
data.head(10)

In [None]:

data.info()

Ok..now the dataframe has all data as integers/floats, so we are ready to start training the XGBoost classifier!

In [None]:
seed = 101 # fix random seed for reproducibility
np.random.seed(seed)

# Split Train Test sets
from sklearn.model_selection import train_test_split
train, test, labels_train, labels_test = train_test_split(data, labels, 
                                                    test_size=0.2,
                                                    stratify=labels,
                                                    random_state=seed)
print(train.shape, test.shape, labels_train.shape, labels.shape)

In [None]:
import xgboost
gbtree = xgboost.XGBClassifier(n_estimators=300, seed=seed, scale_pos_weight=4 ,eval_metric="aucpr")
gbtree.fit(train, labels_train)

In [None]:
y_preds = gbtree.predict(test)

In [None]:
from sklearn.metrics import accuracy_score,confusion_matrix,classification_report
def model_evaluate(): 
    
    print('Test Accuracy:\t{:0.1f}%'.format(accuracy_score(labels_test,y_preds)*100))
    
    #classification report
    print('\n')
    print(classification_report(labels_test, y_preds))

    #confusion matrix
    confmat = confusion_matrix(labels_test, y_preds)

    fig, ax = plt.subplots(figsize=(4, 4))
    ax.matshow(confmat, cmap=plt.cm.Blues, alpha=0.3)
    for i in range(confmat.shape[0]):
        for j in range(confmat.shape[1]):
            ax.text(x=j, y=i, s=confmat[i, j], va='center', ha='center')
    plt.xlabel('Predicted label')
    plt.ylabel('True label')
    plt.tight_layout()

In [None]:
# evaluate model with test set
model_evaluate()

In [None]:
#PLot PR-curve
from sklearn.metrics import precision_recall_curve
y_pred = gbtree.predict_proba(test)
precision, recall, thresholds = precision_recall_curve(labels_test, y_pred[:,1])
 
thresholds = np.append(thresholds, 1)
f1_scores = 2*(precision*recall)/(precision+recall)
plt.step(recall, precision, color='b', alpha=0.4, where='post')
plt.xlabel('Recall')
plt.ylabel('Precision')
plt.ylim([0.0, 1.05])
plt.xlim([0.0, 1.0])
plt.title('Precision-Recall curve')
plt.show()

This was an imbalanced dataset where the positive cases were much lesser than the negative. Hence for the positive cases, the XGB model gets 50+% of the positive classification correct. There are alot of false negatives & positives.

Let's look at the XGB model global importance features, we'll contrast with LIME local importance later.

In [None]:
# Feature importance
from xgboost import plot_importance
plot_importance(gbtree)

From a global importance level, it seems the top 3 features are EstimatedSalary, CreditScore & Balance.

Now, let's use LIME!!

In [None]:
import lime.lime_tabular
explainer = lime.lime_tabular.LimeTabularExplainer(train.values ,feature_names = feature_names,class_names=class_names,
                                                   categorical_features=categorical_features, 
                                                   categorical_names=categorical_names, kernel_width=3)

Lime tabular needs a predict function

In [None]:
predict_fn = lambda x: gbtree.predict_proba(x).astype(float)

In [None]:
#Choose a local instance
chosen = test.iloc[111]
print(chosen)

In [None]:
print('True class: %s' % class_names[labels_test[111]])

In [None]:
exp = explainer.explain_instance(chosen, predict_fn,num_features=5)
exp.show_in_notebook(show_all=False)

LIME predicts for this local instance, the 2 features that contribute to the classification is Age and IsActiveMember, rather than the 3 global importance features reported by XGBoost. Let's test this out, by modifying the 2 features that LIME chose 

In [None]:
chosen['Age'] = 40
chosen['IsActiveMember'] = 0
print(chosen)

In [None]:
exp = explainer.explain_instance(chosen, predict_fn,num_features=5)
exp.show_in_notebook(show_all=False)

Predicted Probability of the class has now reduced!