This notebook provides details on how we generated training sets and trained neural networks with paltas. 
For an introduction to the paltas code base, see the Notebooks folder in the paltas main repo.

More scripts I used can be found in the training_scripts folder

### Configuration for generating the NPE training set: ###

In [1]:
# Configuration for training of CNN for STRIDES30 test

import numpy as np
from scipy.stats import norm, truncnorm
import paltas.Sampling.distributions as dist
from paltas.MainDeflector.simple_deflectors import PEMDShear
from paltas.Sources.sersic import SingleSersicSource
from paltas.PointSource.single_point_source import SinglePointSource

# calculated using .fits header
output_ab_zeropoint = 25.1152

kwargs_numerics = {'supersampling_factor':1}

# size of cutout
numpix = 80

# quads_only
#doubles_quads_only = True
# point source magnification cut
#ps_magnification_cut = 2

# load in a PSF kernel
from astropy.io import fits
from lenstronomy.Util import kernel_util

#psf_fits_file = '/home/users/sydney3/paltas/datasets/hst_psf/STDPBF_WFC3UV_F814W.fits'
psf_fits_file = '/Users/smericks/Desktop/StrongLensing/paltas/datasets/hst_psf/STDPBF_WFC3UV_F814W.fits'

# load in focus diverse PSF maps
with fits.open(psf_fits_file) as hdu:
    psf_kernels = hdu[0].data
psf_kernels = psf_kernels.reshape(-1,101,101)
psf_kernels[psf_kernels<0] = 0

# normalize psf_kernels to sum to 1
psf_sums = np.sum(psf_kernels,axis=(1,2))
psf_sums = psf_sums.reshape(-1,1,1)
normalized_psfs = psf_kernels/psf_sums

# pick random weights to create PSF
def draw_psf_kernel():
	weights = np.random.uniform(size=np.shape(normalized_psfs)[0])
	weights /= np.sum(weights)
	weighted_sum = np.sum(weights.reshape(len(weights),1,1) * normalized_psfs,axis=0)
	return kernel_util.degrade_kernel(weighted_sum,4)

config_dict = {
	'main_deflector':{
		'class': PEMDShear,
		'parameters':{
			'z_lens': truncnorm(-2.5,np.inf,loc=0.5,scale=0.2).rvs,
			'gamma': truncnorm(-(2./.2),np.inf,loc=2.0,scale=0.2).rvs,
            'theta_E': truncnorm(-(.8/.15),np.inf,loc=0.8,scale=0.15).rvs,
            'e1':norm(loc=0,scale=0.2).rvs,
            'e2':norm(loc=0,scale=0.2).rvs,
            # see cross_object below
			'center_x':None,
			'center_y':None,
			'gamma1':norm(loc=0,scale=0.12).rvs,
            'gamma2':norm(loc=0,scale=0.12).rvs,
			'ra_0':0.0,
			'dec_0':0.0,
		}
	},
    'source':{
		'class': SingleSersicSource,
		'parameters':{
			'z_source':truncnorm(-5,np.inf,loc=2.,scale=0.4).rvs,
            # range: 20 to 27, centered at 23.5
            'mag_app':truncnorm(-3./2.,3./2.,loc=23.5,scale=7./3.).rvs,
			'output_ab_zeropoint':output_ab_zeropoint,
			'R_sersic':truncnorm(-(.5/.5),np.inf,loc=0.5,scale=0.5).rvs,
			'n_sersic':truncnorm(-1.25,np.inf,loc=3.,scale=1.).rvs,
			'e1':truncnorm(-2.5,2.5,loc=0,scale=0.2).rvs,
            'e2':truncnorm(-2.5,2.5,loc=0,scale=0.2).rvs,
            # see cross_object below
			'center_x':None,
			'center_y':None}

	},
    'lens_light':{
		'class': SingleSersicSource,
		'parameters':{
			'z_source':None,
            # range: 17 to 23
            'mag_app':truncnorm(-3./2.,3./2.,loc=20,scale=2.).rvs,
			'output_ab_zeropoint':output_ab_zeropoint,
			'R_sersic':truncnorm(-(1./.8),np.inf,loc=1.0,scale=0.8).rvs,
			'n_sersic':truncnorm(-1.25,np.inf,loc=3.,scale=2.).rvs,
			'e1':truncnorm(-2.5,2.5,loc=0,scale=0.2).rvs,
            'e2':truncnorm(-2.5,2.5,loc=0,scale=0.2).rvs,
            # see cross_object below
			'center_x':None,
			'center_y':None}
	},
    'point_source':{
		'class': SinglePointSource,
		'parameters':{
            # see cross_object below for z,x,y
            'z_point_source':None,
			'x_point_source':None,
			'y_point_source':None,
            # range: 19 to 25
            'mag_app':truncnorm(-3./2.,3./2.,loc=22.,scale=2.).rvs,
			'output_ab_zeropoint':output_ab_zeropoint,
			'mag_pert': dist.MultipleValues(dist=truncnorm(-1/0.3,np.inf,1,0.3).rvs,num=10),
            'compute_time_delays':False
		}
	},
    'cosmology':{
		'parameters':{
			'cosmology_name': 'planck18'
		}
	},
    'psf':{
		'parameters':{
			'psf_type':'PIXEL',
			'kernel_point_source':draw_psf_kernel,
			'point_source_supersampling_factor':1
		}
	},
	'detector':{
		'parameters':{
			'pixel_scale':0.04,'ccd_gain':1.5,'read_noise':3.0,
			'magnitude_zero_point':output_ab_zeropoint,
			'exposure_time':1400.,'sky_brightness':21.9,
			'num_exposures':1,'background_noise':None
		}
	},
	'drizzle':{
		'parameters':{
        		'supersample_pixel_scale':0.040,'output_pixel_scale':0.040,
        		'wcs_distortion':None,
        		'offset_pattern':[(0,0),(0.5,0.5)],
        		'psf_supersample_factor':1
		}
	},
    'cross_object':{
		'parameters':{
            ('main_deflector:center_x,lens_light:center_x'):dist.DuplicateScatter(
                dist=norm(loc=0,scale=0.07).rvs,scatter=0.005),
            ('main_deflector:center_y,lens_light:center_y'):dist.DuplicateScatter(
                dist=norm(loc=0,scale=0.07).rvs,scatter=0.005),
            ('source:center_x,source:center_y,point_source:x_point_source,'+
                'point_source:y_point_source'):dist.DuplicateXY(
                x_dist=norm(loc=0.0,scale=0.1).rvs,
                y_dist=norm(loc=0.0,scale=0.1).rvs),
			('main_deflector:z_lens,lens_light:z_source,source:z_source,'+
                 'point_source:z_point_source'):dist.RedshiftsPointSource(
				z_lens_min=0,z_lens_mean=0.5,z_lens_std=0.2,
				z_source_min=0,z_source_mean=2,z_source_std=0.4)
		}
	}
}

### Configuration for training the NPE network ###

In [2]:
import os
import random

####################
# Things that change
####################

training_folder = '/scratch/users/sydney3/paper_results/broad_training/'
write_folder = '/scratch/users/sydney3/paper_results/broad_training/diag_no_R_src/'
num_training_folders = 10

input_norm_path = (write_folder + 'norms.csv')

# loops thru training set
n_epochs = 100
# Steps Per Decay
steps_per_decay = 1e3

# A string with which loss function to use.
loss_function = 'diag'

# Whether or not to normalize the images by the standard deviation
norm_images = False
log_norm_images = True

# Where to save the model weights
model_weights = (write_folder +
    'xresnet34_{epoch:03d}-{val_loss:.2f}.h5')

model_weights_init = None
#model_weights_init = (write_folder + 'xresnet34_165--19.18_last.h5')

# The learning rate for the model
learning_rate = 5e-4
#learning_rate = 5e-3*(0.98**(165*5e5/(512*1e3)))
# Whether or not to use random rotation of the input images

random_rotation = True

# Save training results to .csv file
csv_path = (write_folder + 'log.csv')

##########################
# Things that don't change
##########################
batch_size = 512
img_size = (80,80,1)
learning_params = ['main_deflector_parameters_theta_E',
        'main_deflector_parameters_gamma1','main_deflector_parameters_gamma2',
        'main_deflector_parameters_gamma','main_deflector_parameters_e1',
        'main_deflector_parameters_e2','main_deflector_parameters_center_x',
        'main_deflector_parameters_center_y','source_parameters_center_x',
        'source_parameters_center_y']
flip_pairs = None
weight_terms = None

# prep training / validation paths
folder_indices = range(0,num_training_folders)
npy_folders_train = [
        (training_folder+'train_%d/'%(i)) for i in folder_indices]
tfr_train_paths = [
        os.path.join(path,'data.tfrecord') for path in npy_folders_train]
npy_folder_val = (training_folder+'validate/')
tfr_val_path = os.path.join(npy_folder_val,'data.tfrecord')
metadata_paths_train = [
        os.path.join(path,'metadata.csv') for path in npy_folders_train]
metadata_path_val = os.path.join(npy_folder_val,'metadata.csv')
# The detector kwargs to use for on-the-fly noise generation
kwargs_detector = None
# A string specifying which model to use
model_type = 'xresnet34'
# A string specifying which optimizer to use
optimizer = 'Adam'

### Generating SNPE Training Sets ###

See making_predictions.ipynb, which shows how these configuration files are
generated from NPE predictions.

### Example configuration for training an SNPE network ###

In [None]:
import os
import random
import numpy as np

####################
# Things that change
####################

training_folder = '/scratch/users/sydney3/paper_results/sequential_training/shifted_test_set/narrow000/'
write_folder = '/scratch/users/sydney3/paper_results/sequential_training/shifted_test_set/narrow000/'
num_training_folders = 1

# APT proposal
proposal_means = np.asarray([ 0.567,  0.027,  0.167,  1.911,  0.173,  0.037, -0.018,  0.034, -0.035,  0.011,  0.728])
proposal_prec = np.linalg.inv(np.diag(np.asarray([0.023, 0.039, 0.054, 0.11 , 0.093, 0.121, 0.009, 0.008, 0.007, 0.007, 0.284])**2))
# APT prior
prior_means = np.asarray([0.8,0.,0.,2.0,0.,0.,0.,0.,0.,0.,0.5])
prior_prec = np.linalg.inv(np.diag(np.asarray([0.15,0.12,0.12,0.2,0.2,0.2,0.07,0.07,0.1,0.1,0.5])**2))

input_norm_path = ('/scratch/users/sydney3/paper_results/broad_training/' + 'norms.csv')

# loops thru training set
n_epochs = 50
# Steps Per Decay
steps_per_decay = 100

# A string with which loss function to use.
loss_function = 'diagapt'

# Whether or not to normalize the images by the standard deviation
norm_images = False
log_norm_images = True

# Where to save the model weights
model_weights = (write_folder +
    'xresnet34_{epoch:03d}-{val_loss:.2f}.h5')

model_weights_init = None
#model_weights_init = ('/scratch/users/sydney3/paper_results/broad_training/'+
#                      'xresnet34_053--14.10_best.h5')

# The learning rate for the model picks up where it left off!!
#learning_rate = 5e-4
learning_rate = 5e-4*(0.98**(53*5e5/(512*1e3)))

# Whether or not to use random rotation of the input images
#  NEEDS TO BE TURNED OFF FOR SEQUENTIAL
random_rotation = False

# Save training results to .csv file
csv_path = (write_folder + 'log.csv')

##########################
# Things that don't change
##########################
batch_size = 512
img_size = (80,80,1)
learning_params = ['main_deflector_parameters_theta_E',
        'main_deflector_parameters_gamma1','main_deflector_parameters_gamma2',
        'main_deflector_parameters_gamma','main_deflector_parameters_e1',
        'main_deflector_parameters_e2','main_deflector_parameters_center_x',
        'main_deflector_parameters_center_y','source_parameters_center_x',
        'source_parameters_center_y','source_parameters_R_sersic']
flip_pairs = None
weight_terms = None

# prep training / validation paths
folder_indices = range(0,num_training_folders)
npy_folders_train = [
        (training_folder+'train_%d/'%(i)) for i in folder_indices]
tfr_train_paths = [
        os.path.join(path,'data.tfrecord') for path in npy_folders_train]
npy_folder_val = (training_folder+'validate/')
tfr_val_path = os.path.join(npy_folder_val,'data.tfrecord')
metadata_paths_train = [
        os.path.join(path,'metadata.csv') for path in npy_folders_train]
metadata_path_val = os.path.join(npy_folder_val,'metadata.csv')
# The detector kwargs to use for on-the-fly noise generation
kwargs_detector = None
# A string specifying which model to use
model_type = 'xresnet34'
# A string specifying which optimizer to use
optimizer = 'Adam'