## FLAML for hp optimisation and model selection
We use FLAML twice, first to find the best component model for each estimator, and then to optimise the estimators themselves and choose the best estimator. Here we show how it's done

In [1]:
%load_ext autoreload
%autoreload 2
import os, sys
import warnings
warnings.filterwarnings('ignore') # suppress sklearn deprecation warnings for now.. 

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

# the below checks for whether we run dowhy and auto-causality from source
root_path = root_path = os.path.realpath('../..')
try: 
    import auto_causality
except ModuleNotFoundError:
    sys.path.append(os.path.join(root_path, "auto-causality"))
    
try:
    import dowhy
except ModuleNotFoundError:
    sys.path.append(os.path.join(root_path, "dowhy"))

In [2]:
# this makes the notebook expand to full width of the browser window
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [3]:
%%javascript

// turn off scrollable windows for large output
IPython.OutputArea.prototype._should_scroll = function(lines) {
    return false;
}

<IPython.core.display.Javascript object>

In [5]:
from auto_causality import AutoCausality
from auto_causality.datasets import synth_ihdp
from auto_causality.data_utils import preprocess_dataset
from auto_causality.scoring import ate, group_ate

### Model fitting & scoring
Here we fit a (selection of) model(s) to the data and score them with the ERUPT metric on held-out data

In [7]:
# import dataset and pre-process it 
# that one-hot-encodes all categorical and int variables
# because otherwise DoWhy/EconML will do it for us in hard-to-reproduce ways
# if you don't want an int variable to be one-hot-encoded, 
# please cast it to float before preprocessing

data_df = synth_ihdp()
treatment = 'treatment'
targets=['y_factual']
data_df, features_X, features_W = preprocess_dataset(data_df, treatment, targets)
outcome = targets[0]

train_df, test_df = train_test_split(data_df, test_size=0.2)

In [8]:

# If you want to use specific estimators, comment in the estimator_list below
# to include any estimators whose full name contains any of the elements of 
# estimator_list
# The other allowed values are 'all' and 'auto', the default is 'auto'
ac = AutoCausality(
    time_budget=120, 
#     estimator_list=[
#             "Dummy",
#             "SparseLinearDML",
#             "ForestDRLearner",
#             "TransformedOutcome",
#             "CausalForestDML",
#             ".LinearDML",
#             "DomainAdaptationLearner",
#             "SLearner",
#             "XLearner",
#             "TLearner",
#             "Ortho",
#         ],
    metric="norm_erupt", 
    verbose=3,
    components_verbose=2,
    components_time_budget=10,
)


# run autocausality
ac.fit(train_df, treatment, outcome, features_W, features_X)

# return best estimator
print(f"Best estimator: {ac.best_estimator}")
# config of best estimator:
print(f"best config: {ac.best_config}")
# best score:
print(f"best score: {ac.best_score}")


[flaml.tune.tune: 05-02 14:44:33] {456} INFO - trial 1 config: {'estimator': {'estimator_name': 'backdoor.auto_causality.models.Dummy'}}


Initial configs: [{'estimator': {'estimator_name': 'backdoor.auto_causality.models.Dummy'}}, {'estimator': {'estimator_name': 'backdoor.econml.metalearners.SLearner'}}, {'estimator': {'estimator_name': 'backdoor.econml.metalearners.DomainAdaptationLearner'}}, {'estimator': {'estimator_name': 'backdoor.econml.dr.ForestDRLearner', 'min_propensity': 1e-06, 'n_estimators': 1000, 'min_samples_split': 5, 'min_samples_leaf': 5, 'min_weight_fraction_leaf': 0.0, 'max_features': 'auto', 'min_impurity_decrease': 0.0, 'max_samples': 0.45, 'min_balancedness_tol': 0.45, 'honest': True, 'subforest_size': 4}}, {'estimator': {'estimator_name': 'backdoor.econml.dml.CausalForestDML', 'drate': True, 'n_estimators': 100, 'criterion': 'mse', 'min_samples_split': 10, 'min_samples_leaf': 5, 'min_weight_fraction_leaf': 0.0, 'max_features': 'auto', 'min_impurity_decrease': 0.0, 'max_samples': 0.45, 'min_balancedness_tol': 0.45, 'honest': True, 'fit_intercept': True, 'subforest_size': 4}}, {'estimator': {'estima

[flaml.tune.tune: 05-02 14:45:31] {110} INFO - result: {'norm_erupt': 2.643584116404753, 'estimator_name': 'backdoor.auto_causality.models.Dummy', 'scores': {'train': {'erupt': 6.389034563236032, 'norm_erupt': 2.1791099314293527, 'qini': -0.06419486160233057, 'auc': 0.47135030949161466, 'ate': 4.0245515465994, 'intrp': <econml.cate_interpreter._interpreters.SingleTreeCateInterpreter object at 0x000001B42B62E3D0>, 'values':      treated  y_factual         p  policy  norm_policy   weights
0        0.0   2.704396  0.186583    True         True  0.000000
1        1.0   6.205641  0.186583    True        False  5.359551
2        0.0   4.498734  0.186583    True         True  0.000000
3        0.0   4.596620  0.186583    True        False  0.000000
4        0.0   3.399457  0.186583    True         True  0.000000
..       ...        ...       ...     ...          ...       ...
472      0.0   1.052633  0.186583    True        False  0.000000
473      1.0   6.295170  0.186583    True         Tru

{'estimator_name': 'backdoor.econml.metalearners.SLearner'}


[flaml.tune.tune: 05-02 14:56:24] {110} INFO - result: {'norm_erupt': 2.7397746717607614, 'estimator_name': 'backdoor.econml.metalearners.SLearner', 'scores': {'train': {'erupt': 6.389034563236032, 'norm_erupt': 2.7055959460175822, 'qini': 0.031078713616757892, 'auc': 0.5541243514884452, 'ate': 3.8101140380418803, 'intrp': <econml.cate_interpreter._interpreters.SingleTreeCateInterpreter object at 0x000001B42C66ED00>, 'values':      treated  y_factual         p  policy  norm_policy   weights
0        0.0   2.704396  0.186583    True         True  0.000000
1        1.0   6.205641  0.186583    True        False  5.359551
2        0.0   4.498734  0.186583    True        False  0.000000
3        0.0   4.596620  0.186583    True        False  0.000000
4        0.0   3.399457  0.186583    True        False  0.000000
..       ...        ...       ...     ...          ...       ...
472      0.0   1.052633  0.186583    True         True  0.000000
473      1.0   6.295170  0.186583    True        

{'estimator_name': 'backdoor.econml.metalearners.DomainAdaptationLearner'}


[flaml.tune.tune: 05-02 15:26:25] {110} INFO - result: {'norm_erupt': 2.91780471657086, 'estimator_name': 'backdoor.econml.metalearners.DomainAdaptationLearner', 'scores': {'train': {'erupt': 6.394325091946862, 'norm_erupt': 2.7071098041415813, 'qini': 0.04340815024564181, 'auc': 0.5826228329619279, 'ate': 3.979624915471649, 'intrp': <econml.cate_interpreter._interpreters.SingleTreeCateInterpreter object at 0x000001B42FC2FCD0>, 'values':      treated  y_factual         p  policy  norm_policy   weights
0        0.0   2.704396  0.186583    True         True  0.000000
1        1.0   6.205641  0.186583    True         True  5.378396
2        0.0   4.498734  0.186583    True        False  0.000000
3        0.0   4.596620  0.186583    True        False  0.000000
4        0.0   3.399457  0.186583    True         True  0.000000
..       ...        ...       ...     ...          ...       ...
472      0.0   1.052633  0.186583    True         True  0.000000
473      1.0   6.295170  0.186583    T

{'estimator_name': 'backdoor.econml.dr.ForestDRLearner', 'min_propensity': 1e-06, 'n_estimators': 1000, 'min_samples_split': 5, 'min_samples_leaf': 5, 'min_weight_fraction_leaf': 0.0, 'max_features': 'auto', 'min_impurity_decrease': 0.0, 'max_samples': 0.45, 'min_balancedness_tol': 0.45, 'honest': 1, 'subforest_size': 4}


[flaml.tune.tune: 05-02 15:46:31] {110} INFO - result: {'norm_erupt': 2.8042173510373987, 'estimator_name': 'backdoor.econml.dr.ForestDRLearner', 'scores': {'train': {'erupt': 6.389034563236032, 'norm_erupt': 2.8036503553956784, 'qini': 0.09733005414638386, 'auc': 0.5844271968848646, 'ate': 3.997875584461242, 'intrp': <econml.cate_interpreter._interpreters.SingleTreeCateInterpreter object at 0x000001B4304120A0>, 'values':      treated  y_factual         p  policy  norm_policy   weights
0        0.0   2.704396  0.186583    True         True  0.000000
1        1.0   6.205641  0.186583    True         True  5.359551
2        0.0   4.498734  0.186583    True        False  0.000000
3        0.0   4.596620  0.186583    True        False  0.000000
4        0.0   3.399457  0.186583    True         True  0.000000
..       ...        ...       ...     ...          ...       ...
472      0.0   1.052633  0.186583    True         True  0.000000
473      1.0   6.295170  0.186583    True        False

{'estimator_name': 'backdoor.econml.dml.CausalForestDML', 'drate': 1, 'n_estimators': 100, 'criterion': 'mse', 'min_samples_split': 10, 'min_samples_leaf': 5, 'min_weight_fraction_leaf': 0.0, 'max_features': 'auto', 'min_impurity_decrease': 0.0, 'max_samples': 0.45, 'min_balancedness_tol': 0.45, 'honest': 1, 'fit_intercept': 1, 'subforest_size': 4}


[flaml.tune.tune: 05-02 16:06:34] {110} INFO - result: {'norm_erupt': 2.733966063748121, 'estimator_name': 'backdoor.econml.dml.CausalForestDML', 'scores': {'train': {'erupt': 6.389034563236032, 'norm_erupt': 2.8805595215654543, 'qini': 0.08455963281044819, 'auc': 0.5790754447142378, 'ate': 3.8521904721553697, 'intrp': <econml.cate_interpreter._interpreters.SingleTreeCateInterpreter object at 0x000001B439D53100>, 'values':      treated  y_factual         p  policy  norm_policy   weights
0        0.0   2.704396  0.186583    True         True  0.000000
1        1.0   6.205641  0.186583    True        False  5.359551
2        0.0   4.498734  0.186583    True        False  0.000000
3        0.0   4.596620  0.186583    True        False  0.000000
4        0.0   3.399457  0.186583    True        False  0.000000
..       ...        ...       ...     ...          ...       ...
472      0.0   1.052633  0.186583    True         True  0.000000
473      1.0   6.295170  0.186583    True        Fals

{'estimator_name': 'backdoor.auto_causality.models.TransformedOutcome'}


[flaml.tune.tune: 05-02 16:16:37] {110} INFO - result: {'norm_erupt': 2.636768127582508, 'estimator_name': 'backdoor.auto_causality.models.TransformedOutcome', 'scores': {'train': {'erupt': 5.678211234630098, 'norm_erupt': 2.5606562772524453, 'qini': 0.15615764965781648, 'auc': 0.4908930744818894, 'ate': 4.532105, 'intrp': <econml.cate_interpreter._interpreters.SingleTreeCateInterpreter object at 0x000001B439C0D880>, 'values':      treated  y_factual         p  policy  norm_policy   weights
0        0.0   2.704396  0.186583    True        False  0.000000
1        1.0   6.205641  0.186583    True         True  4.677653
2        0.0   4.498734  0.186583    True         True  0.000000
3        0.0   4.596620  0.186583    True        False  0.000000
4        0.0   3.399457  0.186583    True         True  0.000000
..       ...        ...       ...     ...          ...       ...
472      0.0   1.052633  0.186583    True        False  0.000000
473      1.0   6.295170  0.186583    True        

{'estimator_name': 'backdoor.econml.dr.ForestDRLearner', 'min_propensity': 0.0017231790528715186, 'n_estimators': 21, 'min_samples_split': 2, 'min_samples_leaf': 9, 'min_weight_fraction_leaf': 0.18432972513405987, 'max_features': 'log2', 'min_impurity_decrease': 2.6639242043080236, 'max_samples': 0.378105341302808, 'min_balancedness_tol': 0.006805783323944936, 'honest': 1, 'subforest_size': 8}


[flaml.tune.tune: 05-02 16:36:40] {110} INFO - result: {'norm_erupt': 2.558943963050842, 'estimator': <dowhy.causal_estimator.CausalEstimate object at 0x000001B42B5DFAC0>, 'estimator_name': 'backdoor.econml.dr.ForestDRLearner', 'scores': {'train': {'erupt': 6.389034563236032, 'norm_erupt': 2.363611513309274, 'qini': -0.004328450015501336, 'auc': 0.5065125457617966, 'ate': 4.225177882411375, 'intrp': <econml.cate_interpreter._interpreters.SingleTreeCateInterpreter object at 0x000001B43B50F100>, 'values':      treated  y_factual         p  policy  norm_policy   weights
0        0.0   2.704396  0.186583    True         True  0.000000
1        1.0   6.205641  0.186583    True         True  5.359551
2        0.0   4.498734  0.186583    True         True  0.000000
3        0.0   4.596620  0.186583    True         True  0.000000
4        0.0   3.399457  0.186583    True         True  0.000000
..       ...        ...       ...     ...          ...       ...
472      0.0   1.052633  0.186583   

{'estimator_name': 'backdoor.econml.dr.ForestDRLearner', 'min_propensity': 0.00151444794946084, 'n_estimators': 2, 'min_samples_split': 4, 'min_samples_leaf': 8, 'min_weight_fraction_leaf': 0.15437800297405868, 'max_features': 'auto', 'min_impurity_decrease': 3.784618715866711, 'max_samples': 0.3107998617035387, 'min_balancedness_tol': 0.04501275691617817, 'honest': 1, 'subforest_size': 8}


[flaml.tune.tune: 05-02 16:56:43] {110} INFO - result: {'norm_erupt': 2.558943963050842, 'estimator': <dowhy.causal_estimator.CausalEstimate object at 0x000001B43C045040>, 'estimator_name': 'backdoor.econml.dr.ForestDRLearner', 'scores': {'train': {'erupt': 6.389034563236032, 'norm_erupt': 2.363611513309274, 'qini': -0.004328450015501336, 'auc': 0.5065125457617966, 'ate': 4.143188683765622, 'intrp': <econml.cate_interpreter._interpreters.SingleTreeCateInterpreter object at 0x000001B43BEB7730>, 'values':      treated  y_factual         p  policy  norm_policy   weights
0        0.0   2.704396  0.186583    True         True  0.000000
1        1.0   6.205641  0.186583    True         True  5.359551
2        0.0   4.498734  0.186583    True         True  0.000000
3        0.0   4.596620  0.186583    True         True  0.000000
4        0.0   3.399457  0.186583    True         True  0.000000
..       ...        ...       ...     ...          ...       ...
472      0.0   1.052633  0.186583   

Best estimator: backdoor.econml.metalearners.DomainAdaptationLearner
best config: {'estimator': {'estimator_name': 'backdoor.econml.metalearners.DomainAdaptationLearner'}}
best score: 2.91780471657086


In [None]:
# we can now resume the fit to continue with the init_cfgs which we haven't tried yet
# ac.fit(train_df, treatment, outcome, features_W, features_X,resume=True)
# # return best estimator
# print(f"Best estimator: {ac.best_estimator}")
# # config of best estimator:
# print(f"best config: {ac.best_config}")
# # best score:
# print(f"best score: {ac.best_score}")

In [None]:
ac.results.results

In [None]:
# score all estimators on the test set, which we've kept aside up till now
from auto_causality.scoring import make_scores
for est_name, scr in ac.scores.items():
    scr['scores']['test'] = make_scores(scr['estimator'], test_df)

In [None]:
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.ticker as mtick

colors = ([matplotlib.colors.CSS4_COLORS['black']] +
    list(matplotlib.colors.TABLEAU_COLORS) + [
    matplotlib.colors.CSS4_COLORS['lime'],
    matplotlib.colors.CSS4_COLORS['yellow'],
    matplotlib.colors.CSS4_COLORS['pink']
])


plt.figure(figsize = (7,5))
plt.title(outcome)

m1 = "erupt"
m2 = "norm_erupt"

for (est, scr), col in zip(ac.scores.items(),colors):
    try:
        sc = [scr["scores"]['train'][m1], scr["scores"]['validation'][m1], scr["scores"]['test'][m1]]
        crv = [scr["scores"]['train'][m2], scr["scores"]['validation'][m2], scr["scores"]['test'][m2]]
        plt.plot(sc, crv, color=col, marker="o", label=est)
        plt.scatter(sc[1:2],crv[1:2], c=col, s=70, label="_nolegend_" )
        plt.scatter(sc[2:],crv[2:], c=col, s=120, label="_nolegend_" )

    except:
        pass
plt.xlabel(m1)
plt.ylabel(m2)

plt.legend(bbox_to_anchor=(1.04,1), borderaxespad=0)

plt.grid()
plt.show()


In [None]:
scr = ac.scores[ac.best_estimator]
intrp = scr["scores"]['validation']['intrp']
plt.figure(figsize=(15, 7))
intrp.plot(feature_names=intrp.feature_names, fontsize=10)
plt.title(f"{ac.best_estimator}_{outcome}")
plt.show()


In [None]:
# TODO: add SHAP plots!
import matplotlib.pyplot as plt
import shap

# and now let's visualize feature importances!
from auto_causality.shap import shap_values

# Shapley values calculation can be slow so let's subsample
this_df = test_df.sample(100)


scr = ac.scores[ac.best_estimator]
print(outcome, ac.best_estimator)
est = ac.model
shaps = shap_values(est, this_df)

plt.title(outcome + '_' + ac.best_estimator.split('.')[-1])
shap.summary_plot(shaps, this_df[est.estimator._effect_modifier_names])
plt.show()


In [None]:
from auto_causality.scoring import  group_ate
# plot out-of sample difference of outcomes between treated and untreated for the points where a model predicts positive vs negative impact
my_est = ac.best_estimator
stats = []

v = ac.scores[my_est]['scores']['test']['values']

sts = group_ate(v['treated'], v[outcome], v['norm_policy'])

display(sts)


colors = (matplotlib.colors.CSS4_COLORS['black'],
    matplotlib.colors.CSS4_COLORS['red'],
    matplotlib.colors.CSS4_COLORS['blue'])

grp = sts["policy"].unique()

for i,(p,c) in enumerate(zip(grp, colors)):
    st = sts[sts["policy"] == p]
    plt.errorbar(np.array(range(len(st))) +0.1*i, st["mean"].values[0],  yerr = st["std"].values[0], color=c)
plt.legend(grp)
plt.grid(True)
plt.title(my_est.split('.')[-1])
plt.show()