# (4.1) Train a state-action value function (Q-function) on an AFA dataset
In this notebook we train a Q-function that estimates the expected cost to go if we were in a fully observed environment. 

In [1]:
%load_ext autoreload
%autoreload 2

## Define paths

Paths for data

In [2]:
from afa.configurations.utils_ts import specify_default_paths_ts
# which dataset to work on 
dataset_name   = "synthetic_1"

# name for of missingness scenario 
miss_scenario  = 'MCAR_1'

# automatically specify some path locations (change paths manually if needed) 
paths = specify_default_paths_ts(dataset_name = dataset_name , miss_scenario = miss_scenario) 

# name for agent 
agent_name            = 'DQN' #'Random 10%' # 'DQN' #'Random 10%' # 'DQN' ##'Random 10%'
agent_dir = paths['data_dir'] + 'afa_agents' + '/' + agent_name + '/'

# how to name the afa_dataset
afa_dataset_name = 'blocking'

In [3]:
# name for predictor 
q_model_name  = 'q_model' #'logistic_regression'

# new (where to save the model) 
q_model_dir = paths['data_dir'] + 'q_models' + '/' + q_model_name + '/'

# reporting
explanation_file = q_model_dir +  'reports/' + 'model_report' 

## Load afa dataset

In [5]:
from afa.data_modelling.datasets.data_loader.data_loader_ts import DataLoader_ts
from afa.afa_datasets.afa_data_loader.afa_data_loader_ts import AFADataLoader_ts

In [6]:
# load dataset 
data_loader = DataLoader_ts(     data_file                  = paths['data_file'],
                                 temporal_data_file         = paths['temporal_data_file'],
                                 superfeature_mapping_file  = paths['superfeature_mapping_file'],
                                 problem_file               = paths['problem_file'],
                                 afa_problem_files          = paths['afa_problem_files'], 
                                 miss_model_files           = paths['miss_model_files'], 
                                 folds_file                 = paths['folds_file'] )
dataset = data_loader.load() 

Padding sequences: 100%|████████████████████████████████████████████████████████████████████████████████| 100/100 [00:00<00:00, 1160.29it/s]
Padding sequences: 100%|████████████████████████████████████████████████████████████████████████████████| 100/100 [00:00<00:00, 1266.74it/s]
Padding sequences: 100%|████████████████████████████████████████████████████████████████████████████████| 100/100 [00:00<00:00, 1213.50it/s]


In [7]:
# load afa_dataset
augmented_data_file = agent_dir + afa_dataset_name + '_' + 'results.hkl'
afa_agent_params = None
afa_data_loader = AFADataLoader_ts(                   
                    augmented_data_file = augmented_data_file,
                    dataset             = dataset,
                    model_params        = afa_agent_params) 
afa_dataset = afa_data_loader.load() 

2023-04-27 16:42:19.783182: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:967] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-04-27 16:42:19.783620: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcufft.so.10'; dlerror: libcufft.so.10: cannot open shared object file: No such file or directory
2023-04-27 16:42:19.783759: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcurand.so.10'; dlerror: libcurand.so.10: cannot open shared object file: No such file or directory
2023-04-27 16:42:19.783863: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcusolver.so.11'; dlerror: libcusolver.so.11: cannot open shared object file: No such file or directory
2023-04-27 16:42:19.783960: W tensorflow/co

## Initialize Q function 

In [8]:
from afa.afa_models.afa_nuisance_models.afa_q_models.afa_q_model_ts import AFAQModel_ts

In [9]:
q_model_params = {
    'name' : q_model_name, 
    'directory': q_model_dir,
    'base_model_params' : {   
        'hidden_size': 16,
        'epochs' :   10,
        'batch_size' : 32
    }
}

In [24]:
afa_q_model = AFAQModel_ts( name            = q_model_params['name'], 
                            model_params    = q_model_params, 
                            afa_dataset     = afa_dataset, 
                            directory       = q_model_params['directory'])   

In [28]:
afa_q_model.fit(  afa_dataset, 
                  fold = 0, 
                  train_split = 'train_afa_nuisance', 
                  valid_split = 'test', 
                  fit_again = False)

Convert superR to R: 100%|██████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 2529.74it/s]
Train Q-function: 100%|█████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:03<00:00,  3.32it/s]
Convert superR to R: 100%|██████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 1657.50it/s]


In [30]:
q_values, v_values  = afa_q_model.predict(   afa_dataset, 
                                              fold = None, 
                                              split = None)

Convert superR to R: 100%|██████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 4796.23it/s]


> [0;32m/mnt/c/Users/henrik.vonkleist/Nextcloud/PhD/Code/Active Feature Acquisition/afa_ts/afa/afa_models/afa_nuisance_models/afa_nuisance_dataset_ts.py[0m(82)[0;36mcompute_afa_trajectories[0;34m()[0m
[0;32m     80 [0;31m        [0mpdb[0m[0;34m.[0m[0mset_trace[0m[0;34m([0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m     81 [0;31m        [0;31m# fill feature_ordered with the acquired features[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m---> 82 [0;31m        [0mtarget_temporal_feature_acquired[0m [0;34m=[0m [0mself[0m[0;34m.[0m[0mtemporal_feature[0m[0;34m[[0m[0;34m:[0m[0;34m,[0m[0;34m:[0m[0;34m,[0m[0mself[0m[0;34m.[0m[0mtarget_feature_inds[0m[0;34m][0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m     83 [0;31m        [0mtarget_temporal_feature_acquired[0m[0;34m[[0m[0;34m~[0m[0mtarget_R_hat[0m[0;34m][0m [0;34m=[0m [0;36m0[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m     84 [0;31m[0;34m[0m[0m
[0m


ipdb>  n


> [0;32m/mnt/c/Users/henrik.vonkleist/Nextcloud/PhD/Code/Active Feature Acquisition/afa_ts/afa/afa_models/afa_nuisance_models/afa_nuisance_dataset_ts.py[0m(83)[0;36mcompute_afa_trajectories[0;34m()[0m
[0;32m     81 [0;31m        [0;31m# fill feature_ordered with the acquired features[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m     82 [0;31m        [0mtarget_temporal_feature_acquired[0m [0;34m=[0m [0mself[0m[0;34m.[0m[0mtemporal_feature[0m[0;34m[[0m[0;34m:[0m[0;34m,[0m[0;34m:[0m[0;34m,[0m[0mself[0m[0;34m.[0m[0mtarget_feature_inds[0m[0;34m][0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m---> 83 [0;31m        [0mtarget_temporal_feature_acquired[0m[0;34m[[0m[0;34m~[0m[0mtarget_R_hat[0m[0;34m][0m [0;34m=[0m [0;36m0[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m     84 [0;31m[0;34m[0m[0m
[0m[0;32m     85 [0;31m        [0;32mfor[0m [0mk[0m [0;32min[0m [0mrange[0m[0;34m([0m[0;36m1[0m[0;34m,[0m [0mn_steps[0m[0;34m)[0m[0;34m:[0m[0;

ipdb>  target_temporal_feature_acquired


array([[[-1.38872265, -0.37464611,  1.0090345 ],
        [-0.38636235,  1.29420088, -0.18079483],
        [-1.66834288,  1.19377219, -0.46709523],
        [-1.06652083, -0.13601669,  0.1591873 ],
        [ 0.2531797 , -0.18362632,  0.926272  ]],

       [[ 0.1137695 ,  0.57393379, -0.12709364],
        [-0.26760753,  0.22835893,  0.85295297],
        [-1.06599963, -0.2256145 , -0.08851245],
        [ 0.47434833,         nan,         nan],
        [-0.58221457, -0.02412887,  0.53016757]],

       [[-0.29713627,         nan,         nan],
        [ 0.99672297, -1.52954639,  0.1291835 ],
        [ 0.00502995, -0.04963145,  0.7019105 ],
        [-1.27682788, -1.10937407,  0.38046483],
        [        nan, -0.85908411,  0.24009836]],

       ...,

       [[-0.45269892, -0.5074756 , -0.0347233 ],
        [-0.53828857, -1.5893523 ,  0.0510782 ],
        [-0.9836936 ,  0.04067825, -0.6014946 ],
        [-0.14542348,  0.0747913 , -1.40108765],
        [-0.59675417,  1.3119413 , -0.09357712]],


ipdb>  target_temporal_feature_acquired.shape


(100, 5, 3)


ipdb>  target_R_hat.shape


(100, 5, 3)


ipdb>  target_R_bar = np.isnan(target_R_hat)
ipdb>  target_R_bar .shape


(100, 5, 3)


ipdb>  target_R_bar 


array([[[False, False, False],
        [False, False, False],
        [False, False, False],
        [False, False, False],
        [False, False, False]],

       [[False, False, False],
        [False, False, False],
        [False, False, False],
        [False, False, False],
        [False, False, False]],

       [[False, False, False],
        [False, False, False],
        [False, False, False],
        [False, False, False],
        [False, False, False]],

       ...,

       [[False, False, False],
        [False, False, False],
        [False, False, False],
        [False, False, False],
        [False, False, False]],

       [[False, False, False],
        [False, False, False],
        [False, False, False],
        [False, False, False],
        [False, False, False]],

       [[False, False, False],
        [False, False, False],
        [False, False, False],
        [False, False, False],
        [False, False, False]]])


ipdb>  (target_R_bar - target_R_hat).max()


*** TypeError: numpy boolean subtract, the `-` operator, is not supported, use the bitwise_xor, the `^` operator, or the logical_xor function instead.


ipdb>  (target_R_bar *1- target_R_hat*1).max()


0


ipdb>  (target_R_bar *1- target_R_hat*1).min()


-1


ipdb>  target_temporal_feature_acquired[~target_R_hat] = 0
ipdb>  np.isnan(target_R_hat).sum()


0


ipdb>  target_temporal_feature_acquired = self.temporal_feature[:,:,self.target_feature_inds]
ipdb>  target_temporal_feature_acquired[~target_R_hat] = 0
ipdb>  np.isnan(target_temporal_feature_acquired).sum()


36


ipdb>  target_R_bar = np.isnan(self.temporal_feature)
ipdb>  (target_R_bar - target_R_hat).max()


*** TypeError: numpy boolean subtract, the `-` operator, is not supported, use the bitwise_xor, the `^` operator, or the logical_xor function instead.


ipdb>  (target_R_bar *1- target_R_hat*1).min()


*** ValueError: operands could not be broadcast together with shapes (100,5,4) (100,5,3)


ipdb>  (target_R_bar[:,:,self.target_feature_inds] *1- target_R_hat*1).min()


-1


ipdb>  (target_R_bar[:,:,self.target_feature_inds] *1- target_R_hat*1).max()


1


ipdb>  self.target_superR_hat


array([[[0, 1],
        [0, 1],
        [0, 0],
        [0, 1],
        [0, 1]],

       [[0, 1],
        [1, 0],
        [0, 1],
        [1, 0],
        [0, 0]],

       [[0, 0],
        [1, 0],
        [0, 1],
        [0, 1],
        [1, 0]],

       [[0, 1],
        [0, 1],
        [0, 1],
        [0, 1],
        [1, 0]],

       [[0, 1],
        [0, 1],
        [0, 1],
        [0, 0],
        [0, 1]],

       [[0, 1],
        [1, 0],
        [0, 1],
        [0, 1],
        [0, 1]],

       [[0, 1],
        [0, 1],
        [1, 0],
        [0, 1],
        [0, 1]],

       [[0, 1],
        [0, 1],
        [0, 1],
        [0, 1],
        [1, 0]],

       [[0, 1],
        [0, 1],
        [1, 0],
        [0, 1],
        [1, 0]],

       [[0, 1],
        [0, 1],
        [0, 1],
        [0, 0],
        [0, 1]],

       [[0, 1],
        [1, 0],
        [0, 1],
        [0, 1],
        [0, 1]],

       [[0, 1],
        [0, 0],
        [0, 1],
        [0, 1],
        [0, 1]],

       [[0, 1],


ipdb>  exit


In [16]:
# add to afa_dataset 
results = {}
results['q_values_' + q_model_name] = q_values
results['v_values_' + q_model_name] = v_values

In [17]:
afa_dataset.set_augmented_data( 
                            results,
                            set_nuisances = True, 
                            )

In [18]:
afa_dataset.results['nuisances'].keys()

dict_keys(['q_values', 'actions', 'q_values_q_model', 'v_values_q_model'])

In [19]:
data, results_new = afa_dataset.get_augmented_data(get_nuisances = True)
results_new['nuisances'].keys()

dict_keys(['q_values', 'actions', 'q_values_q_model', 'v_values_q_model'])

## Save afa_dataset with Q and V values

In [20]:
# save afa_dataset
afa_dataset.save( directory = agent_dir, afa_dataset_name = afa_dataset_name ) 

## Test loading afa dataset with Q and V values

In [21]:
from afa.afa_datasets.afa_data_loader.afa_data_loader_static import AFADataLoader_static

In [22]:
augmented_data_file = agent_dir + afa_dataset_name + '_' + 'results.hkl'
afa_data_loader = AFADataLoader_static(                   
                    augmented_data_file = augmented_data_file,
                    dataset  = dataset,
                    model_params = afa_agent_params) 
afa_dataset = afa_data_loader.load() 

AttributeError: 'numpy.ndarray' object has no attribute 'numpy'

In [None]:
data, results_new = afa_dataset.get_augmented_data(get_nuisances = True)
results_new['nuisances'].keys()

### Write report

In [None]:
# report 
# explanation_file = q_model_dir + afa_dataset_name + '_' + 'q_values_report.md'  # +  'reports/' + 'model_report' 
# afa_dataset.explain(file= explanation_file, format = 'markdown')