# (5.0) Solve AFAPE for dataset created from blocking
Use different estimators to compute E[C|do(R_bar = 1)]. Also give valid confidence intervals. 

In [1]:
%load_ext autoreload
%autoreload 2

## Define paths

Paths for data

In [2]:
from afa.configurations.utils_static import specify_default_paths_static
# which dataset to work on 
dataset_name   = "synthetic_1"

# name for of missingness scenario 
miss_scenario  = 'MCAR_1'

# automatically specify some path locations (change paths manually if needed) 
paths = specify_default_paths_static(dataset_name = dataset_name , miss_scenario = miss_scenario) 

# name for agent 
agent_name            = 'DQN'
agent_dir = paths['data_dir'] + 'afa_agents' + '/' + agent_name + '/'

# how to name the afa_dataset
afa_dataset_name = 'blocking'

Define estimators

In [3]:
# define estimators 
estimator_params_list = [
        {'name': 'Blocking',
        'estimator_type' : 'simple_blocking' }, 
        {'name': 'CC',
        'estimator_type' : 'cc' },
        {'name' : 'IPW,miss',
        'estimator_type' : 'ipw_miss' , 
        'ps_model_name' :  'ps_lr' ,
        'normalize' : True}
#     'IPW,miss_gt' : {  
#         'estimator_type' : 'ipw_miss' , 
#         'ps_model_name' :  'ps_model_gt', 
#         'normalize' : True}, 
]

In [4]:
# load additional propensity score models (required by estimators)
ps_model_names = []
for estimator_params in estimator_params_list: 
    estimator_name = estimator_params['name']
    
    if 'ps_model_name' in estimator_params.keys():
        ps_model_names.append( estimator_params['ps_model_name'] ) 
        
paths['miss_model_files']['ps_values_dirs'] = [paths['data_dir'] + 'ps_models/' + ps_model_name + '/' + 'ps_values/' for ps_model_name in ps_model_names]

## Load afa dataset

In [5]:
from afa.data_modelling.datasets.data_loader.data_loader_static import DataLoader_static
from afa.afa_datasets.afa_data_loader.afa_data_loader_static import AFADataLoader_static

2023-02-20 18:02:11.075638: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-02-20 18:02:11.195927: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2023-02-20 18:02:11.195945: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2023-02-20 18:02:11.769213: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory
2023-

In [6]:
# load dataset 
data_loader = DataLoader_static( data_file                  = paths['data_file'],
                                 superfeature_mapping_file  = paths['superfeature_mapping_file'],
                                 problem_file               = paths['problem_file'],
                                 afa_problem_files          = paths['afa_problem_files'], 
                                 miss_model_files           = paths['miss_model_files'], 
                                 folds_file                 = paths['folds_file'] )
dataset = data_loader.load() 

In [7]:
#load afa_dataset
augmented_data_file = agent_dir + afa_dataset_name + '_' + 'results.hkl'
afa_agent_params = None
afa_data_loader = AFADataLoader_static(                   
                    augmented_data_file = augmented_data_file,
                    dataset             = dataset,
                    model_params        = afa_agent_params) 
afa_dataset = afa_data_loader.load() 

2023-02-20 18:02:16.512497: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:967] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-02-20 18:02:16.512654: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2023-02-20 18:02:16.512728: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcublas.so.11'; dlerror: libcublas.so.11: cannot open shared object file: No such file or directory
2023-02-20 18:02:16.512789: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcublasLt.so.11'; dlerror: libcublasLt.so.11: cannot open shared object file: No such file or directory
2023-02-20 18:02:16.512850: W tensorf

## Initialize the estimators

In [8]:
from afa.afa_models.afa_estimators.utils_static import define_afa_estimator_static

In [9]:
estimators = []
for estimator_params in estimator_params_list:
    estimator = define_afa_estimator_static(  estimator_name   = estimator_params['name'] ,
                                              estimator_type   = estimator_params['estimator_type'] ,
                                              estimator_params = estimator_params) 
    estimators.append(estimator)

## Compute estimates 

In [10]:
J_bootstraps = afa_dataset.estimate_counterfactual_cost( estimators = estimators, 
                                                         fold = 0, split = "val", 
                                                         n_samples = 1, 
                                                         n_bootstraps = 10, 
                                                         n_max = None)

In [11]:
J_bootstraps 

{'Blocking': array([1.85714286, 0.95652174, 1.90740741, 1.39130435, 1.68      ,
        0.95652174, 0.89583333, 1.68      , 1.75      , 1.24      ]),
 'CC': array([1.85714286, 0.95652174, 1.90740741, 1.39130435, 1.68      ,
        0.95652174, 0.89583333, 1.68      , 1.75      , 1.24      ]),
 'IPW,miss': array([3.9375    , 1.4375    , 3.5       , 1.64285714, 2.625     ,
        3.5       , 1.57142857, 3.58333333, 3.07142857, 2.625     ])}

In [12]:
# save estimate
from afa.afa_models.afa_estimators.utils import save_results_bootstrapping
save_results_bootstrapping( J_bootstraps , agent_dir, afa_dataset_name = afa_dataset_name )

## Compute estimates for convergence
If we know the ground truth, we might be interesting in plotting convergence, for this we might want to compute estimates J for different amount of available datapoints. 

In [13]:
from afa.afa_models.afa_estimators.utils import compute_counterfactual_cost_convergence

In [14]:
J_bootstraps_convergence, convergence_steps  = compute_counterfactual_cost_convergence(  afa_dataset = afa_dataset, 
                                                                                         estimators = estimators, 
                                                                                         fold = 0, split = "val", 
                                                                                         n_samples = 1, 
                                                                                         n_bootstraps = 10)

Estimate counterfactual average cost
  - x-axis (number of datapoints) =  [10 17 28]
  - number of bootstraps for estimation: 10


In [15]:
# save estimators
save_results_bootstrapping( J_bootstraps_convergence , agent_dir, convergence_steps = convergence_steps, afa_dataset_name = afa_dataset_name)