In [28]:
import pandas as pd
import numpy as np
import interpret

In [2]:
import matplotlib.pyplot as plt
import seaborn as sns

In [3]:
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier

In [5]:
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, plot_confusion_matrix

In [6]:
df = pd.read_csv("bank-full.csv", sep = ',')

In [7]:
df.head(4)

Unnamed: 0,age,job,marital,education,default,balance,housing,loan,contact,day,month,duration,campaign,pdays,previous,poutcome,Target
0,58,management,married,tertiary,no,2143,yes,no,unknown,5,may,261,1,-1,0,unknown,no
1,44,technician,single,secondary,no,29,yes,no,unknown,5,may,151,1,-1,0,unknown,no
2,33,entrepreneur,married,secondary,no,2,yes,yes,unknown,5,may,76,1,-1,0,unknown,no
3,47,blue-collar,married,unknown,no,1506,yes,no,unknown,5,may,92,1,-1,0,unknown,no


In [8]:
df.dtypes

age           int64
job          object
marital      object
education    object
default      object
balance       int64
housing      object
loan         object
contact      object
day           int64
month        object
duration      int64
campaign      int64
pdays         int64
previous      int64
poutcome     object
Target       object
dtype: object

In [9]:
[col for col in df]

['age',
 'job',
 'marital',
 'education',
 'default',
 'balance',
 'housing',
 'loan',
 'contact',
 'day',
 'month',
 'duration',
 'campaign',
 'pdays',
 'previous',
 'poutcome',
 'Target']

In [10]:
df1 = pd.DataFrame({col:df[col].astype('category').cat.codes for col in df}, index = df.index)

In [11]:
df1.head(3)

Unnamed: 0,age,job,marital,education,default,balance,housing,loan,contact,day,month,duration,campaign,pdays,previous,poutcome,Target
0,40,4,1,2,0,3036,1,0,2,4,8,261,0,0,0,3,0
1,26,9,2,1,0,945,1,0,2,4,8,151,0,0,0,3,0
2,15,2,1,1,0,918,1,1,2,4,8,76,0,0,0,3,0


In [23]:
label_dict = {col : {n : cat for n, cat in enumerate(df[col].astype('category').cat.categories) } for col in df}

In [24]:
Xfeatures = df1[['age', 'job', 'marital', 'education', 'default', 'balance', 'housing',
       'loan', 'contact', 'day', 'month', 'duration', 'campaign', 'pdays',
       'previous', 'poutcome']]

ylabels = df1['Target'] 

In [14]:
Xfeatures.head(2)

Unnamed: 0,age,job,marital,education,default,balance,housing,loan,contact,day,month,duration,campaign,pdays,previous,poutcome
0,40,4,1,2,0,3036,1,0,2,4,8,261,0,0,0,3
1,26,9,2,1,0,945,1,0,2,4,8,151,0,0,0,3


In [15]:
Xfeatures.dtypes

age           int8
job           int8
marital       int8
education     int8
default       int8
balance      int16
housing       int8
loan          int8
contact       int8
day           int8
month         int8
duration     int16
campaign      int8
pdays        int16
previous      int8
poutcome      int8
dtype: object

In [16]:
ylabels.dtypes

dtype('int8')

In [17]:
x_train, x_test, y_train, y_test = train_test_split(Xfeatures, ylabels, test_size = 0.3, random_state = 7)

In [29]:
# lr_model = LogisticRegression()
# lr_model.fit(x_train, y_train)

In [31]:
from interpret.glassbox import ExplainableBoostingClassifier

In [32]:
ebm = ExplainableBoostingClassifier()

In [34]:
ebm.fit(x_train, y_train)

ExplainableBoostingClassifier(feature_names=['age', 'job', 'marital',
                                             'education', 'default', 'balance',
                                             'housing', 'loan', 'contact',
                                             'day', 'month', 'duration',
                                             'campaign', 'pdays', 'previous',
                                             'poutcome', 'housing x month',
                                             'housing x duration',
                                             'day x month', 'month x duration',
                                             'contact x duration',
                                             'duration x poutcome',
                                             'age x duration',
                                             'balance x duration',
                                             'duration x pdays',
                                             'dur...
                  

In [35]:
ebm.score(x_test, y_test)

0.9081391919787674

In [36]:
ex1 = x_test.iloc[8]
act1 = y_test.iloc[8]

In [38]:
print(ebm.predict([ex1]))
print(ebm.predict_proba([ex1]))

[0]
[[0.93202639 0.06797361]]


In [39]:
from interpret import show

In [41]:
dir(ebm)

['__abstractmethods__',
 '__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__setstate__',
 '__sizeof__',
 '__slots__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_abc_impl',
 '_check_n_features',
 '_class_idx_',
 '_estimator_type',
 '_get_param_names',
 '_get_tags',
 '_merged_pair_score_fn',
 '_more_tags',
 '_original_term_means_',
 '_repr_html_',
 '_repr_html_inner',
 '_repr_mimebundle_',
 '_validate_data',
 'additive_terms_',
 'available_explanations',
 'bagged_models_',
 'bin_budget_frac',
 'binning',
 'breakpoint_iteration_',
 'classes_',
 'composition',
 'decision_function',
 'delta',
 'early_stopping_rounds',
 'early_stopping_tolerance',
 'epsilon',
 'explain_global',
 'explain_local',
 'explainer_type'

In [40]:
ebm_global = ebm.explain_global()

In [42]:
show(ebm_global)

The dash_html_components package is deprecated. Please replace
`import dash_html_components as html` with `from dash import html`
  import dash_html_components as html
The dash_core_components package is deprecated. Please replace
`import dash_core_components as dcc` with `from dash import dcc`
  import dash_core_components as dcc
The dash_table package is deprecated. Please replace
`import dash_table` with `from dash import dash_table`

Also, if you're using any of the table format helpers (e.g. Group), replace 
`from dash_table.Format import Group` with 
`from dash.dash_table.Format import Group`
  import dash_table as dt


In [43]:
show([ebm_global])

In [44]:
ebm_local = ebm.explain_local(x_test, y_test)

In [45]:
show(ebm_local)

In [46]:
show([ebm_local])

In [47]:
import lime
import lime.lime_tabular

In [48]:
feature_names = Xfeatures.columns.tolist()
class_name = ylabels.unique()

In [None]:
explainer = lime.lime_tabular.LimeTabularExplainer(x_train.values, feature_names = feature_names, class_names = class_name)

exp = explainer.explain_instance(x_test.iloc[8], lr_model.predict_proba, num_features = 4, top_labels = 1)