# (5.1) Solve AFAPE for dataset completed with multiple imputation
Use an estimator to evaluate E[C|do(R_bar = 1)]. Also give valid confidence intervals (through estimating mean and variance). 

In [1]:
%load_ext autoreload
%autoreload 2

## Define paths

Paths for data

In [3]:
from afa.configurations.utils_ts import specify_default_paths_ts
# which dataset to work on 
dataset_name   = "synthetic_1"

# name for of missingness scenario 
miss_scenario  = 'MCAR_1'

# automatically specify some path locations (change paths manually if needed) 
paths = specify_default_paths_ts(dataset_name = dataset_name , miss_scenario = miss_scenario) 

# name for agent 
agent_name            = 'DQN'
agent_dir = paths['data_dir'] + 'afa_agents' + '/' + agent_name + '/'

# how to name the afa_dataset
mi_model_name   =  'mi_simple'
mi_model_dir  =  paths['data_dir'] + 'mi_models/' + mi_model_name + '/'
afa_dataset_name = mi_model_name 

## Load afa dataset

In [4]:
from afa.data_modelling.datasets.data_loader.data_loader_ts import DataLoader_ts
from afa.data_modelling.missingness.multiple_imputation.multiple_imputed_data_loader.multiple_imputed_data_loader_ts import MultipleImputedDataLoader_ts
from afa.afa_datasets.afa_data_loader.afa_data_loader_ts import AFADataLoader_ts

2023-02-27 19:21:28.390018: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-02-27 19:21:28.520990: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2023-02-27 19:21:28.521020: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2023-02-27 19:21:29.236518: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory
2023-

In [5]:
# load dataset 
data_loader = DataLoader_ts(     data_file                  = paths['data_file'],
                                 temporal_data_file         = paths['temporal_data_file'],
                                 superfeature_mapping_file  = paths['superfeature_mapping_file'],
                                 problem_file               = paths['problem_file'],
                                 afa_problem_files          = paths['afa_problem_files'],
                                 folds_file                 = paths['folds_file'] )
dataset = data_loader.load() 

100%|████████████████████████████████████████████████████████████████████████████████| 100/100 [00:00<00:00, 825.54it/s]
100%|████████████████████████████████████████████████████████████████████████████████| 100/100 [00:00<00:00, 741.72it/s]
100%|████████████████████████████████████████████████████████████████████████████████| 100/100 [00:00<00:00, 744.05it/s]


In [6]:
# load mi dataset
augmented_data_file = mi_model_dir + 'results.hkl'
mi_model_params = None
mi_data_loader = MultipleImputedDataLoader_ts(                   
                    augmented_data_file = augmented_data_file,
                    dataset             = dataset,
                    model_params        = mi_model_params) 
mi_dataset = mi_data_loader.load() 

In [10]:
#load afa_dataset
augmented_data_file = agent_dir + afa_dataset_name + '_' + 'results.hkl'
afa_agent_params = None
afa_data_loader = AFADataLoader_ts(                   
                    augmented_data_file = augmented_data_file,
                    dataset             = mi_dataset,
                    model_params        = afa_agent_params) 
afa_dataset = afa_data_loader.load() 

## Compute estimates 

In [12]:
J_bootstraps = afa_dataset.estimate_counterfactual_cost_mi(  mi_model_name  = mi_model_name , 
                                                             fold = 0, split = "val", 
                                                             n_samples = None, 
                                                             n_bootstraps = 10)

In [13]:
J_bootstraps 

{'mi_simple': array([3.51765217, 3.36538975, 3.40711848, 3.61929933, 3.69092298,
        3.18728554, 3.71480621, 3.6586901 , 3.51640446, 3.49749789])}

In [14]:
# save estimate
from afa.afa_models.afa_estimators.utils import save_results_bootstrapping
save_results_bootstrapping( J_bootstraps , agent_dir, afa_dataset_name = afa_dataset_name )

## Compute estimates for convergence
If we know the ground truth, we might be interesting in plotting convergence, for this we might want to compute estimates J for different amount of available datapoints. 

In [15]:
from afa.afa_models.afa_estimators.utils_ts import define_afa_estimator_ts
from afa.afa_models.afa_estimators.utils import compute_counterfactual_cost_convergence

In [17]:
# init estiamtor 
estimators = [define_afa_estimator_ts(    estimator_name   = mi_model_name ,
                                              estimator_type   = 'simple_blocking' ,
                                              estimator_params = None) ]

In [18]:
# define estimators 
J_bootstraps_convergence, convergence_steps  = compute_counterfactual_cost_convergence(  afa_dataset = afa_dataset, 
                                                                                         estimators = estimators, 
                                                                                         fold = 0, split = "val", 
                                                                                         n_samples = 1, 
                                                                                         n_bootstraps = 10)

Estimate counterfactual average cost
  - x-axis (number of datapoints) =  [ 10  37 140]
  - number of bootstraps for estimation: 10


In [19]:
# save estimators
save_results_bootstrapping( J_bootstraps_convergence , agent_dir, convergence_steps = convergence_steps, afa_dataset_name = afa_dataset_name)