# (0.1) Train multiple-imputation (MI) models
One option to resolve missingness for AFA is to use Multiple Imputation. 
This notebook trains MI models and saves the multiple imputed datasets. 

In [None]:
%load_ext autoreload
%autoreload 2

## Define paths

In [None]:
from afa.configurations.utils_ts import specify_default_paths_ts
# which dataset to work on 
dataset_name   = "synthetic_1"

# name for of missingness scenario 
miss_scenario  = 'MCAR_1'

# automatically specify some path locations (change paths manually if needed) 
paths = specify_default_paths_ts(dataset_name = dataset_name , miss_scenario = miss_scenario) 

# name for ps_model 
mi_model_name  = 'mi_simple'

# new (where to save the model) 
mi_model_dir = paths['data_dir']  + 'mi_models' + '/' + mi_model_name + '/'


### Define model specifications

In [None]:
mi_model_params = {
    'name' : mi_model_name, 
    'directory' : mi_model_dir,
    'base_model_params' : {   'model_type': 'simple_imputer' }
    }

## Load dataset with missingness 
At first, we want to load the dataset 

Includes loading: 
- superfeature mapping
- problem
- afa_problem 
- missingness_model

In [None]:
from afa.data_modelling.datasets.data_loader.data_loader_ts import DataLoader_ts

In [None]:
data_loader = DataLoader_ts(     data_file                  = paths['data_file'],
                                 temporal_data_file         = paths['temporal_data_file'],
                                 superfeature_mapping_file  = paths['superfeature_mapping_file'],
                                 problem_file               = paths['problem_file'],
                                 afa_problem_files          = paths['afa_problem_files'], 
                                 miss_model_files           = paths['miss_model_files'], 
                                 folds_file                 = paths['folds_file'] )
dataset = data_loader.load() 

## Define MI model

In [None]:
from afa.data_modelling.missingness.multiple_imputation.multiple_imputation_model_ts import MultipleImputationModel_ts

mi_model = MultipleImputationModel_ts(   name                         = mi_model_params['name'], 
                                         m_graph                      = dataset.miss_model.m_graph, 
                                         superfeature_mapping         = dataset.superfeature_mapping,
                                         target_superfeature_names    = dataset.afa_problem.target_superfeature_names,
                                         model_params                 = mi_model_params,
                                         directory                    = mi_model_params['directory'] ) 

## Train MI model 

In [None]:
mi_model.fit(dataset, fold = 0, train_split = 'train', valid_split = 'val', fit_again = False)

## Create multiple imputed dataset

In [None]:
from afa.data_modelling.missingness.multiple_imputation.multiple_imputed_dataset_ts import MultipleImputedDataset_ts

In [None]:
mi_results = mi_model.predict(dataset, n_samples = 5)

# create an mi_dataset out of the generated imputations
mi_dataset = MultipleImputedDataset_ts(  dataset = dataset, model = mi_model, results = mi_results) 

## Evaluate imputation model on ground truth dataset

In [None]:
# TO DO

## Save MI dataset

In [None]:
mi_dataset.save( model_dir = mi_model_dir) 