## FLAML for hp optimisation and model selection
We use FLAML twice, first to find the best component model for each estimator, and then to optimise the estimators themselves and choose the best estimator. Here we show how it's done

In [1]:
%load_ext autoreload
%autoreload 2
import os, sys
import warnings
warnings.filterwarnings('ignore') # suppress sklearn deprecation warnings for now.. 

# the below checks for whether we run dowhy and auto-causality from source
root_path = root_path = os.path.realpath('../..')
try: 
    import auto_causality
except ModuleNotFoundError:
    sys.path.append(os.path.join(root_path, "auto-causality"))
    
try:
    import dowhy
except ModuleNotFoundError:
    sys.path.append(os.path.join(root_path, "dowhy"))

In [2]:
from auto_causality import AutoCausality
from auto_causality.datasets import synth_ihdp, preprocess_dataset
from auto_causality.scoring import ate

### Model fitting & scoring
Here we fit a (selection of) model(s) to the data and score them with the ERUPT metric on held-out data

In [None]:
# import dataset
data_df = synth_ihdp()
data_df, features_X, features_W, targets, treatment = preprocess_dataset(data_df)
outcome = targets[0]

# choose which estimators to fit
estimator_list = ["LinearDML","SLearner","TransformedOutcome", "ForestDRLearner"]

# init autocausality object with chosen metric to optimise
ac = AutoCausality(
    time_budget=180, 
    estimator_list=estimator_list, 
    metric="erupt", 
    verbose=3,
    components_verbose=2,
    components_time_budget=30,
    use_ray=False
)

# run autocausality
myresults = ac.fit(data_df, treatment, outcome, features_W, features_X)

# return best estimator
print(f"Best estimator: {ac.best_estimator}")
# config of best estimator:
print(f"best config: {ac.best_config}")
# best score:
print(f"best score: {ac.best_score}")


[flaml.tune.tune: 03-14 14:29:27] {447} INFO - trial 1 config: {'fit_cate_intercept': 1, 'mc_iters': 8}
[flaml.tune.tune: 03-14 14:38:18] {108} INFO - result: {'erupt': 6.584295597076416, 'qini': -0.049453029342096826, 'auc': 0.5369621767422776, 'ate': 3.658294605930205, 'r_score': 0.12867089289821954, 'estimator': <dowhy.causal_estimator.CausalEstimate object at 0x000002440D372BE0>, 'scores': {'estimator_name': 'backdoor.econml.dml.LinearDML', 'train': {'erupt': 6.3989696876413635, 'qini': 0.04337000423617723, 'auc': 0.5505174778691536, 'r_score': 0.06628259380834034, 'ate': 3.730222743597676, 'intrp': <econml.cate_interpreter._interpreters.SingleTreeCateInterpreter object at 0x000002440F2673A0>, 'values':      treated  y_factual         p  policy   weights
0        0.0   4.592250  0.190955    True  5.226022
1        0.0   1.665200  0.190955    True  0.000000
2        0.0   6.420538  0.190955    True       NaN
3        0.0   3.573685  0.190955    True  0.000000
4        1.0   5.650913

... Estimator: backdoor.econml.dml.LinearDML
 erupt (validation): 6.584296
 qini (validation): -0.049453
 auc (validation): 0.536962
 ate (validation): 3.658295
 r_score (validation): 0.128671


[flaml.tune.tune: 03-14 14:46:20] {108} INFO - result: {'erupt': 6.584295597076416, 'qini': -0.06667256513466772, 'auc': 0.5370085583714438, 'ate': 3.540848670550587, 'r_score': 0.11658919330570794, 'estimator': <dowhy.causal_estimator.CausalEstimate object at 0x000002440D6039D0>, 'scores': {'estimator_name': 'backdoor.econml.dml.SparseLinearDML', 'train': {'erupt': 6.399111252082019, 'qini': 0.032526931855987026, 'auc': 0.5513643707984703, 'r_score': 0.06253219070216098, 'ate': 3.5911303555805953, 'intrp': <econml.cate_interpreter._interpreters.SingleTreeCateInterpreter object at 0x000002440FC4B880>, 'values':      treated  y_factual         p  policy   weights
0        0.0   4.592250  0.190955    True  5.236842
1        0.0   1.665200  0.190955    True  0.000000
2        0.0   6.420538  0.190955    True       NaN
3        0.0   3.573685  0.190955    True  0.000000
4        1.0   5.650913  0.190955    True  0.000000
..       ...        ...       ...     ...       ...
592      0.0   3.

... Estimator: backdoor.econml.dml.SparseLinearDML
 erupt (validation): 6.584296
 qini (validation): -0.066673
 auc (validation): 0.537009
 ate (validation): 3.540849
 r_score (validation): 0.116589
config: {'overall_model': AutoML(append_log=False, auto_augment=True, early_stop=False, ensemble=False,
       estimator_list='auto', eval_method='auto', gpu_per_trial=0,
       hpo_method='auto', keep_search_state=False, learner_selector='sample',
       log_file_name='', log_training_metric=False, log_type='better',
       max_iter=1000000, mem_thres=4294967296, metric='auto',
       min_sample_size=10000, model_history=False, n_concurrent_trials=1,
       n_jobs=-1, n_splits=5, pred_time_limit=1e-05, retrain_full=True,
       sample=True, split_ratio=0.1, split_type='auto', starting_points={},
       task='regression', time_budget=30, train_time_limit=inf, ...)}




... Estimator: backdoor.econml.metalearners.SLearner
 erupt (validation): 6.584296
 qini (validation): -0.045372
 auc (validation): 0.547148
 ate (validation): 3.748026
 r_score (validation): 0.114682
config: {'propensity_model': DummyClassifier(), 'outcome_model': AutoML(append_log=False, auto_augment=True, early_stop=False, ensemble=False,
       estimator_list='auto', eval_method='auto', gpu_per_trial=0,
       hpo_method='auto', keep_search_state=False, learner_selector='sample',
       log_file_name='', log_training_metric=False, log_type='better',
       max_iter=1000000, mem_thres=4294967296, metric='auto',
       min_sample_size=10000, model_history=False, n_concurrent_trials=1,
       n_jobs=-1, n_splits=5, pred_time_limit=1e-05, retrain_full=True,
       sample=True, split_ratio=0.1, split_type='auto', starting_points={},
       task='regression', time_budget=30, train_time_limit=inf, ...)}


[flaml.tune.tune: 03-14 14:47:21] {447} INFO - trial 1 config: {'min_propensity': 0.00039392081127942284, 'mc_iters': 5, 'n_estimators': 413, 'max_depth': 630, 'min_samples_split': 29, 'min_samples_leaf': 12, 'min_weight_fraction_leaf': 0.07449592798509735, 'max_features': 'sqrt', 'min_impurity_decrease': 5.266237972689596, 'max_samples': 0.12303482939899912, 'min_balancedness_tol': 0.23834773752057958, 'honest': 1, 'subforest_size': 7}


... Estimator: backdoor.auto_causality.models.TransformedOutcome
 erupt (validation): 6.028015
 qini (validation): -0.024295
 auc (validation): 0.510177
 ate (validation): 3.535790
 r_score (validation): -0.637251


In [None]:
ac.train_df

In [None]:
outcome = targets[0]
ac.full_scores["baseline"]={"estimator": "baseline",
                               "outcome": outcome,
                              "train":{"erupt": ac.train_df[outcome].mean(),
                                       "ate": ate(ac.train_df[treatment],ac.train_df[outcome])[0]},
                              "validation":{"erupt": ac.test_df[outcome].mean(),
                                      "ate": ate(ac.test_df[treatment],ac.test_df[outcome])[0]}}

In [None]:
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.ticker as mtick

colors = ([matplotlib.colors.CSS4_COLORS['black']] +
    list(matplotlib.colors.TABLEAU_COLORS) + [
    matplotlib.colors.CSS4_COLORS['lime'],
    matplotlib.colors.CSS4_COLORS['yellow'],
    matplotlib.colors.CSS4_COLORS['pink']
])

v = ac.full_scores
plt.figure(figsize = (7,5))
plt.title(outcome)
for (est, scr),col in zip(v.items(),colors):
    sc = [scr['train']['erupt'], scr['validation']['erupt']]
    crv = [scr['train']['ate'], scr['validation']['ate']]
    plt.plot(sc, crv, color=col, marker="o")
    plt.scatter(sc[1:],crv[1:], c=col, s=120 )
    plt.grid()
    plt.xlabel("ERUPT score")
    plt.ylabel("ATE")
    plt.legend(v.keys(),bbox_to_anchor=(1.04,1), borderaxespad=0)

plt.grid()
plt.show()


In [None]:
scr = ac.full_scores[ac.best_estimator]
intrp = scr['validation']['intrp']
plt.figure(figsize=(15, 7))
try: 
    feature_names = intrp.feature_names
except:
    feature_names = features_X + [ w for w in features_W if w not in features_X]
intrp.plot(feature_names=intrp.feature_names, fontsize=10)
#         intrp.plot( fontsize=10)
plt.title(f"{ac.best_estimator}_{outcome}")
plt.show()


In [None]:
# TODO: add SHAP plots!
import matplotlib.pyplot as plt
import shap

# and now let's visualize feature importances!
from auto_causality.shap import shap_values

# Shapley values calculation can be slow so let's subsample
this_df = ac.test_df.sample(100)

wanted = ["CausalForestDML"]#,"ForestDRLearner","DirectUpliftDoWhyWrapper"]#,"CausalForestDML",]

scr = ac.full_scores[ac.best_estimator]
print(outcome, ac.best_estimator)
est = ac.estimates[ac.best_estimator]
shaps = shap_values(est, this_df)

plt.title(outcome + '_' + ac.best_estimator.split('.')[-1])
shap.summary_plot(shaps, this_df[est.estimator._effect_modifier_names])
plt.show()
