In [None]:
import json
import os
from ovejero import model_trainer

# Generating a Configuration File for Training a Model

__Author:__ Sebastian Wagner-Carena

__Last Run:__ 08/04/2020

__Goals:__ Learn how to use json function in python to write out a configuration file for model training.

There are four dictionaries associated with training the bnn models in ovejero. 

1) **training_params**: The parameters used for the optimization and training of the model. 

2) **validation_params**: The parameters used to process the validation set.

3) **dataset_params**: The parameters used to process the dataset and generate the TFRecord file.

4) **inference_params**: The parameters used at inference time (mainly plotting names)

5) **forward_mod_params**: The parameters used to compare to the forward modeling outputs

We will start by setting the training parameters

In [None]:
# Start the base dictionary
training_params = {}

# Probably the most important parameters - the type of BNN we want to train
training_params['bnn_type'] = 'gmm'
training_params['dropout_type'] = 'standard'

# First the optimization specific parameters
training_params['batch_size'] = 10
training_params['n_epochs'] = 10
training_params['learning_rate'] = 1e-4
training_params['decay'] = 0.000003
training_params['kernel_regularizer'] = 1e-5
training_params['dropout_rate'] = 0.1

# We still have to populate the dropout regularizer in the dictionary, although 
# we will not use it for standard dropout.
training_params['dropout_regularizer'] = 1e-6

# Now set the parameters that will point us to the tf_record
# For this example we want to set the path to the test files
training_params['root_path'] = './test_data/'
training_params['tf_record_path'] = 'tf_record_test'

# Set the final list of parameters we want to use for analysis
training_params['final_params'] = ['external_shear_g1','external_shear_g2','lens_mass_center_x',
                                   'lens_mass_center_y','lens_mass_e1','lens_mass_e2','lens_mass_gamma',
                                   'lens_mass_theta_E_log']
# Using the same ordering as final_params, we also want to set the flip_pairs. If using lenstronomy convention
# this does not need to be used. Specify an empty list.
training_params['flip_pairs'] = []
# The dimensions of the images
training_params['img_dim'] = 128

# The path to load/save the model weights to for this configuration.
training_params['model_weights'] = training_params['root_path']+'test_model.h5'

# Where to save the tensorboard logs to
training_params['tensorboard_log_dir'] = training_params['root_path']+'test.log'

# Set the random seed at train time for reproducibility!
training_params['random_seed'] = 1138

# Set the augmentation parameters
# A boolean that dictates whether or not the images should be normalzied to have standard deviation 1
training_params['norm_images'] = True
# The number of pixels to uniformly shift the images by. If set to 0 no shifting will occur
training_params['shift_pixels'] = 2
# A tuple of lists that contain the x and y parameters that need to be rescaled to account for this shift
training_params['shift_params'] = (['lens_mass_center_x'],['lens_mass_center_y'])
# What the pixel_scale of the images is.
training_params['pixel_scale'] = 0.051

# The path to the baobab configuration file used to generate the training data. Noise will be added on the fly
# using the noise parameters in this file.
training_params['baobab_config_path'] =  training_params['root_path']+'test_baobab_cfg.py'

# Make sure we've set the right number of training parameters (not a full proof check)
if len(training_params) < 22:
    raise RuntimeError('Missing something in training_params!')
elif len(training_params) > 22:
    raise RuntimeError('Too much stuff in training_params!')
else:
    print('All set!')

Now we want to set up the **val_params**

In [None]:
# Start the base dictionary
val_params = {}

# All we really need to set here is the path to the validation data and the desired output path to the TFRecord.
# The rest can just be pulled from the training_params.
val_params['root_path'] = './test_data/'
val_params['tf_record_path'] = 'tf_record_test_val'

# Make sure we've set the right number of validation parameters (not a full proof check)
if len(val_params) < 2:
    raise RuntimeError('Missing something in training_params!')
elif len(val_params) > 2:
    raise RuntimeError('Too much stuff in training_params!')
else:
    print('All set!')

Now we want to repeat the same for the **dataset_params**

In [None]:
# Start the base dictionary
dataset_params = {}

# First set path for the old and new lensing parameters. If you don't mind rewriting the old file, these can be the
# same.
dataset_params['lens_params_path'] = 'metadata.csv'
dataset_params['new_param_path'] = 'new_metadata.csv'
# Also set where to save the normalization weights to
dataset_params['normalization_constants_path'] = 'norms.csv'

# Set the lens parameters to be pulled from metadata.csv.
dataset_params['lens_params'] = ['external_shear_gamma_ext','external_shear_psi_ext','lens_mass_center_x',
                                 'lens_mass_center_y','lens_mass_e1','lens_mass_e2','lens_mass_gamma',
                                 'lens_mass_theta_E']

# List of parameters that need to be converted to log space
dataset_params['lens_params_log'] = ['lens_mass_theta_E']

# List of parameters that need to be converted from radius and angle to cartesian coordinates
dataset_params['gampsi'] = {}
dataset_params['gampsi']['gampsi_parameter_prefixes'] = ['external_shear']
dataset_params['gampsi']['gampsi_params_rat'] = ['external_shear_gamma_ext']
dataset_params['gampsi']['gampsi_params_ang'] = ['external_shear_psi_ext']

# Make sure we've set the right number of dataset parameters (not a full proof check)
if len(dataset_params) < 6:
    raise RuntimeError('Missing something in dataset_params!')
elif len(dataset_params) > 6:
    raise RuntimeError('Too much stuff in dataset_params!')
elif len(dataset_params['gampsi']) != 3:
    raise RuntimeError('Gamma and angle parameters are off!')
else:
    print('All set!')

There are also some parameters we use to make our plots at inference time in **inference_params**

In [None]:
# Start the base dictionary
inference_params = {}

# The pyplot names that will be used for our final parameters.
inference_params['final_params_print_names'] = ['$\gamma_1$','$\gamma_2$','$x_\mathrm{lens}$',
                                   '$y_\mathrm{lens}$','$e_1$','$e_2$','$\gamma_\mathrm{lens}$',
                                   '$\log(\\theta_E)$']

# Make sure we've set the right number of inference parameters (not a full proof check)
if len(inference_params) < 1:
    raise RuntimeError('Missing something in dataset_params!')
elif len(inference_params) > 1:
    raise RuntimeError('Too much stuff in dataset_params!')
else:
    print('All set!')

You may be interested in doing forward modeling on a few lenses in your validation set to compare the BNN outputs to those of a more traditional pipeline. Ovejero uses Lenstronomy and Baobab for this, but a few extra user-level parameters need to be specified in __forward_mod_params__. If you're not interested in this funcitonality, feel free to set these parameters to None.

In [None]:
# Start the base dictionary
forward_mod_params = {}

# The list of lens model. Check forward_modeling.py to see what is supported.
forward_mod_params['lens_model_list'] = ['PEMD','SHEAR_GAMMA_PSI']

# The list of source models.
forward_mod_params['source_model_list'] = ['SERSIC_ELLIPSE']

# Make sure we've set the right number of forward modeling parameters (not a full proof check)
if len(forward_mod_params) < 2:
    raise RuntimeError('Missing something in dataset_params!')
elif len(forward_mod_params) > 2:
    raise RuntimeError('Too much stuff in dataset_params!')
else:
    print('All set!')

Finally, we can combine this into our configuration dictionairy and write it to a json file!

In [None]:
json_path = '../test/test_data/' + 'test.json'
config_dict = {'training_params':training_params, 'validation_params':val_params,'dataset_params':dataset_params,
              'inference_params':inference_params, 'forward_mod_params':forward_mod_params}
# Final check that the config file meets requirements
model_trainer.config_checker(config_dict)
with open(json_path,'w') as json_f:
    json.dump(config_dict,json_f,indent=4)