In [2]:
import pandas as pd

In [3]:
data = pd.read_csv('./data/DAT_EURUSD_2018.csv', sep=',', header=0, index_col=0 )
data.head(2)

Unnamed: 0_level_0,Buying_Rate,Cash_Buying_Rate,Selling_Rate,Cash_Selling_Rate
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2018-01-01 17:00,1.20037,1.201,1.20037,1.201
2018-01-01 17:01,1.20083,1.20095,1.20017,1.2003


In [21]:
from sklearn.model_selection import train_test_split

target = 'Buying_Rate'
features = [i for i in data.columns if i!= target]

X_train, X_test, y_train, y_test = train_test_split(data.loc[:, features], data.loc[:, target], test_size=0.33, random_state=42)

## Standard Modelling

In [12]:
from interpret.glassbox import ExplainableBoostingRegressor, LinearRegression, RegressionTree

ebr = ExplainableBoostingRegressor(random_state=20200114, n_jobs=4)
ebr.fit(X_train, y_train) 

lr = LinearRegression(random_state=20200114)
lr.fit(X_train, y_train) 

tr = RegressionTree(random_state=20200114)
tr.fit(X_train, y_train)


<interpret.glassbox.decisiontree.RegressionTree at 0x2f78c5d64c8>

### Explanation of Features

In [22]:
from interpret import show
from interpret.data import Marginal

marginal_train = Marginal().explain_data(X_train, y_train, name = 'Train Data')
marginal_test = Marginal().explain_data(X_test, y_test, name = 'Test Data')

ebr_global = ebr.explain_global(name='EBR')
lr_global = lr.explain_global(name='LR')
tr_global = tr.explain_global(name='TR')

show([marginal_train, marginal_test, ebr_global, lr_global, tr_global])

### Explanation of prediction individus

In [24]:
ebr_local = ebr.explain_local(X_test[:5], y_test[:5], name='EBR')
lr_local = lr.explain_local(X_test[:5], y_test[:5], name='LR')
tr_local = tr.explain_local(X_test[:5], y_test[:5], name='TR')

In [25]:
show([ebr_local, lr_local, tr_local])

In [26]:
from interpret.perf import RegressionPerf

ebr_perf = RegressionPerf(ebr.predict).explain_perf(X_test, y_test, name='Emr')
lr_perf = RegressionPerf(lr.predict).explain_perf(X_test, y_test, name='Lr')
tr_perf = RegressionPerf(tr.predict).explain_perf(X_test, y_test, name='Tr')

print('MAE ebr : ', ebr_perf._internal_obj['overall']['mae'])
print('MAE lr : ', lr_perf._internal_obj['overall']['mae'])
print('MAE tr : ', tr_perf._internal_obj['overall']['mae'])

show([ebr_perf, lr_perf, tr_perf])

MAE ebr :  0.0006664150665749629
MAE lr :  0.03237018995366083
MAE tr :  0.0034873319539358116


## Blackbox Customized Modelling

In [29]:
from sklearn.ensemble import StackingRegressor
from sklearn.linear_model import RidgeCV, LinearRegression

from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import PowerTransformer, StandardScaler
from sklearn.feature_selection import SelectKBest
from sklearn.pipeline import Pipeline, FeatureUnion
from sklearn.decomposition import PCA
from interpret.glassbox import ExplainableBoostingRegressor

estimators = [
    ('ridge', RidgeCV()),
    ('lr', LinearRegression()),
    ('rf', RandomForestRegressor())
]

stacker = StackingRegressor(
    estimators=estimators,
    final_estimator=ExplainableBoostingRegressor(random_state=20200114, n_jobs=4)
    )

black_model_emr = Pipeline([
    ('e1', PowerTransformer()),
    ('e2', StandardScaler()),
    ('e3', SelectKBest(k='all')),
    ('m', stacker)
])

black_model_emr.fit(X_train, y_train)


Pipeline(memory=None,
         steps=[('e1',
                 PowerTransformer(copy=True, method='yeo-johnson',
                                  standardize=True)),
                ('e2',
                 StandardScaler(copy=True, with_mean=True, with_std=True)),
                ('e3',
                 SelectKBest(k='all',
                             score_func=<function f_classif at 0x000002F79CE9DAF8>)),
                ('m',
                 StackingRegressor(cv=None,
                                   estimators=[('ridge',
                                                RidgeCV(alphas=array([ 0.1,  1. , 10. ]),
                                                        cv=None,
                                                        fit_intercept...
                                                                                early_stopping_tolerance=1e-05,
                                                                                feature_names=None,
                         

### Explanation of Features

In [30]:
from interpret.blackbox import MorrisSensitivity

marginal_train = Marginal().explain_data(X_train , y_train , name = 'Train Data')
marginal_test = Marginal().explain_data(X_test , y_test , name = 'Test Data')

sensitivity_emr = MorrisSensitivity(predict_fn=black_model_emr.predict, data=X_train )
sensitivity_emr_global = sensitivity_emr.explain_global(name="Global Emr Sensitivity")


show([marginal_train, marginal_test, sensitivity_emr_global])

In [33]:
from interpret.blackbox import PartialDependence

pdp = PartialDependence(predict_fn=black_model_emr.predict, data=X_train)
pdp_global = pdp.explain_global(name='Partial Dependence')

show(pdp_global)

### Explanation of prediction individus

In [37]:
from interpret.blackbox import LimeTabular
from interpret import show

#Blackbox explainers need a predict function, and optionally a dataset
lime = LimeTabular(predict_fn=black_model_emr.predict, data=X_train, random_state=20200114)

#Pick the instances to explain, optionally pass in labels if you have them
lime_local = lime.explain_local(X_test[:5], y_test[:5], name='LIME')

show(lime_local)