# (3.0) Create AFA dataset by blocking actions 
In this notebook we run the agent on a dataset with missingness and block non-available feature acquisition acitons. We save the created dataset.

Note: AFA agents must be already trained 

In [1]:
%load_ext autoreload
%autoreload 2

## Define paths

Paths for data

In [6]:
from afa.configurations.utils_ts import specify_default_paths_ts
# which dataset to work on 
dataset_name   = "synthetic_2"

# name for of missingness scenario 
miss_scenario  = 'MCAR_1'

# automatically specify some path locations (change paths manually if needed) 
paths = specify_default_paths_ts(dataset_name = dataset_name , miss_scenario = miss_scenario)

# load ps values 
ps_model_gt_name  = 'ps_model_gt'  
ps_model_names    = [ps_model_gt_name, 'ps_lr' ] 
ps_model_names    = [] 
paths['miss_model_files']['ps_values_dirs'] = [paths['data_dir'] + 'ps_models/' + ps_model_name + '/' + 'ps_values/' for ps_model_name in ps_model_names]

Paths for model

In [7]:
# name for agent (and predictor) 
agent_name            = 'DQN'
predictor_model_name  = 'logistic_regression'

# new (where to save the model) 
agent_dir           = paths['data_dir']  + 'afa_agents' + '/' + agent_name + '/'
predictor_model_dir = paths['data_dir']  + 'predictor_models' + '/' + predictor_model_name + '/'

# how to name the afa_dataset
afa_dataset_name = 'blocking'

## Load dataset with missingness 
At first, we want to load the dataset 

Includes loading: 
- superfeature mapping
- problem
- afa_problem 
- missingness_model

In [8]:
from afa.data_modelling.datasets.data_loader.data_loader_ts import DataLoader_ts

In [9]:
data_loader = DataLoader_ts(     data_file                  = paths['data_file'],
                                 temporal_data_file         = paths['temporal_data_file'],
                                 superfeature_mapping_file  = paths['superfeature_mapping_file'],
                                 problem_file               = paths['problem_file'],
                                 afa_problem_files          = paths['afa_problem_files'],
                                 miss_model_files           = paths['miss_model_files'],
                                 folds_file                 = paths['folds_file'] )
dataset = data_loader.load() 

Padding sequences: 100%|██████████████████████████████████████████████████████████████████████████| 100000/100000 [00:57<00:00, 1742.36it/s]
Padding sequences: 100%|██████████████████████████████████████████████████████████████████████████| 100000/100000 [01:04<00:00, 1557.82it/s]
Padding sequences: 100%|██████████████████████████████████████████████████████████████████████████| 100000/100000 [00:58<00:00, 1704.22it/s]


In [10]:
# define predictor (has to be pretrained) 
predictor_model_params = {
    'name' : predictor_model_name, 
    'directory': predictor_model_dir,
    'base_model_params' : {   
        'model_type': 'ImputeThenRegress',
        'imputer_params' : {
              'model_type': 'simple_imputer',
              'mode' : 'imputation', 
            },
        'predictor_params' : {
              'model_type': 'LogisticRegression',
              'mode' : 'classification'
          }
    }
}

In [None]:
# define agent
agent_model_params =  {                       
                    None
                      }             

In [None]:
# define afa_agent
afa_agent_params = {
    'name' : agent_name,
    'agent_type' : 'mcar_mechanism',
    'directory' : agent_dir, 
    'predictor_params' : predictor_model_params, 
    'agent_params' : agent_model_params,
    'probability' : 0.1
}

## Initialize Agent 

In [None]:
from afa.afa_models.afa_agents.utils_ts import define_afa_agent_ts

In [None]:
afa_agent = define_afa_agent_ts(    name             = afa_agent_params['name'],  
                                    agent_type       = afa_agent_params['agent_type'],   
                                    afa_agent_params = afa_agent_params ,
                                    agent_directory  = afa_agent_params['directory'])

## Create afa dataset 

In [None]:
from afa.afa_datasets.afa_dataset_ts import AFADataset_ts

In [None]:
afa_results = afa_agent.predict(dataset,  n_samples = 2) 

# create an afa_dataset out of the generated afa_results
afa_dataset = AFADataset_ts(dataset = dataset, 
                            model = afa_agent, 
                            results = afa_results) 

### Run test for consistency check 

In [None]:
# save afa_dataset
afa_dataset.save( directory = agent_dir, afa_dataset_name = afa_dataset_name ) 

## Test loading afa dataset 

In [None]:
from afa.afa_datasets.afa_data_loader.afa_data_loader_ts import AFADataLoader_ts

In [None]:
augmented_data_file = agent_dir + afa_dataset_name + '_' + 'results.hkl'
afa_data_loader = AFADataLoader_ts(                   
                    augmented_data_file = augmented_data_file,
                    dataset  = dataset,
                    model_params = afa_agent_params) 
afa_dataset = afa_data_loader.load() 

In [None]:
afa_dataset.results.keys()

In [None]:
data = afa_dataset.get_data(fold = None, split = None, n_samples = 1) 
# data = afa_dataset.get_data(fold = 0, split = 'val', n_samples = 1) 

### Write report

In [None]:
# report 
explanation_file = agent_dir + afa_dataset_name + '_' + 'afa_dataset_report.md'  # +  'reports/' + 'model_report' 
afa_dataset.explain(file= explanation_file, format = 'markdown')