In [None]:
%load_ext autoreload
%autoreload 2

import numpy as np
from manada import substructure
from lenstronomy.LensModel.lens_model import LensModel
from lenstronomy.LightModel.light_model import LightModel
from lenstronomy.ImSim.image_model import ImageModel
from lenstronomy.SimulationAPI.data_api import DataAPI
from lenstronomy.Workflow.fitting_sequence import FittingSequence
from lenstronomy.Data.psf import PSF
from lenstronomy.SimulationAPI.observation_api import SingleBand
import lenstronomy.Util.util as util
import lenstronomy.Util.image_util as image_util
import matplotlib.pyplot as plt
import os, corner

# Understanding Main Deflector Modeling Residuals

__Author:__ Sebastian Wagner-Carena

__Goals:__ Understand what the residuals look like after attempting to fit a smooth lens model to a deflector that include substructure contributions. 

One powerful data vector we have in the detection of substructure in strong lensing is the residual left after modeling away the smooth lens potential of the main deflector. In theory, the remaining signal can only be described by small scale, localized variations in the deflection field - a sign that there is substructure in our model. It is therefore tempting to use these residual maps as inputs to a machine learning based approach to substructure detection; in theory, this can even make your training agnostic to the exact lens model being subtracted. 

However, modeling and subtracting the main deflector can take dozens of CPU hours even with well optimized code. A tempting workaround is to produce two images: one that includes only the main deflector and one that includes the main deflector with its substrcuture. One can then subtract the main deflector only image from the substructure image and be left with what appears to be a reasonable residual. However, as we will see in these examples, this type of residual can produce a substantial training bias. With subtructure, __the true input lens is not the lens that would be returned by forward modeling__. The presence of substructure creates an overall increase in the total mass of the main deflector which is __degenerate with the einstein radius__ of the smooth lens model. 

In this notebook we will explore that degeneracy and show how one can create more realistic residual maps for the purposes of training.

### Generating a Lens Image with substructure

The first step is to generate a lensing image with substructure. In order to best illustrate the dangers of the erronous approach to residual maps outlined above, we will use a fairly flexible lens model - the __power law elliptical mass distribution (PEMD)__. We will also include external shear.

For the substructure, we will (for now) use a very simple model. The substructure will all be __truncated NFW__ profiles with the parameters drawn either normally (in the case of mass and concentration parameters) or uniformly (in the case of parameters relating to the substructure's position in the halo). The number of substructure objects is fixed. 

In a similar vein, for the source we will use a simple __Sersic__ light profile. 

In [None]:
# Set up each of our three models

# Setup the random seed so that the notebook always returns the same outputs. Also set up the numerics kwargs
seed=864
np.random.seed(seed)
kwargs_numerics = {'supersampling_factor':1}

# Parameters for our PEMD lens model
main_lens_model_list = ['PEMD','SHEAR']
kwargs_spemd = {'gamma': 1.9,'theta_E': 1., 'e1': 0.1, 'e2': 0, 'center_x': 0.1, 'center_y': 0}
kwargs_shear = {'gamma1': 0.05, 'gamma2': 0.02}
main_lens_kwargs_list = [kwargs_spemd,kwargs_shear]

# Parameters for our substructure drawn from the manada package
n_subs = 20
r_min, r_max = -3,2
alpha_rs_mean, alpha_rs_std = 0.05,0.05
rs_mean, rs_std = 0.05, 0.05
sub_model_list, sub_kwargs_list = substructure.substructure_realization(n_subs,r_max,r_min,rs_mean,rs_std,
                                                                        alpha_rs_mean,alpha_rs_std,
                                                                        kwargs_spemd['center_x'],
                                                                        kwargs_spemd['center_y'])

# Combine both into our current lens model
complete_lens_model_list = main_lens_model_list + sub_model_list
complete_lens_model_kwargs = main_lens_kwargs_list + sub_kwargs_list
complete_lens_model = LensModel(complete_lens_model_list)

# We're also going to generate a lens model without the substructure
simple_lens_model = LensModel(main_lens_model_list)

# Now build our source model
source_model_list = ['SERSIC_ELLIPSE']
source_kwargs_list = [{'amp':5.0, 'R_sersic': 0.2, 'n_sersic': 1,'e1': 0.2, 'e2': 0.1, 'center_x': 0., 'center_y': 0}]
source_light_model = LightModel(source_model_list)

# Finally build our detector model (we have to add some realistic noise for our forward modeling later to
# work).
kwargs_psf = {'psf_type': 'GAUSSIAN', 'fwhm': 0.1}
psf_model = PSF(**kwargs_psf)
kwargs_detector = {'pixel_scale':0.08, 'ccd_gain':2.5, 'read_noise':4.0, 'magnitude_zero_point':25.9463, 
                   'exposure_time':5400.0, 'sky_brightness':22, 'num_exposures':1, 'background_noise':None}
numpix = 100  
data_api = DataAPI(numpix=numpix,**kwargs_detector)

# Now we can make the image models for our two lensing systems (with and without substructure)
complete_image_model = ImageModel(data_api.data_class, psf_model, complete_lens_model, source_light_model, None, 
                                  None, kwargs_numerics=kwargs_numerics)
simple_image_model = ImageModel(data_api.data_class, psf_model, simple_lens_model, source_light_model, None, 
                                  None, kwargs_numerics=kwargs_numerics)
single_band = SingleBand(**kwargs_detector)

In [None]:
# Now we can lens our light

# We first evaluate the source light at the beta coordinate and plot them at the alpha coordinates.
source_lensed_sub = complete_image_model.image(complete_lens_model_kwargs, source_kwargs_list, None, None)
source_lensed_sub_noise = source_lensed_sub+single_band.noise_for_model(source_lensed_sub)

# Now the model without substructure. Don't add noise since we want to understand the difference from a perfect
# signal 
source_lensed_simple = simple_image_model.image(main_lens_kwargs_list, source_kwargs_list, None, None)

### Simple Residual Estimate

Now we have one model with substructure and one without. A simple approximation would be to compare the lensed light from both images and call that our residual. Let's see what that looks like.

In [None]:
# We can visiualize our lensed source in both cases
f, ax = plt.subplots(1, 2, figsize=(16, 5), sharex=False, sharey=False)

# Plot the image with the substructure
im = ax[0].matshow(source_lensed_sub, origin='lower')
ax[0].set_title("Lensed Source With Substructure")
ax[0].get_xaxis().set_visible(False)
ax[0].get_yaxis().set_visible(False)
ax[0].autoscale(False)

# Plot the image without substructure
im = ax[1].matshow(source_lensed_simple, origin='lower')
ax[1].set_title("Lensed Source Without Substructure (No Noise)")
ax[1].get_xaxis().set_visible(False)
ax[1].get_yaxis().set_visible(False)
ax[1].autoscale(False)
plt.show()

# Plot the residual
f, ax = plt.subplots(1, 2, figsize=(16, 5), sharex=False, sharey=False)
im = ax[0].matshow(source_lensed_sub_noise-source_lensed_simple, origin='lower')
f.colorbar(im)
ax[0].set_title("Simplistic Residual Estimate (Noise)")
ax[0].get_xaxis().set_visible(False)
ax[0].get_yaxis().set_visible(False)
ax[0].autoscale(False)

# Plot the residual no noise
im = ax[1].matshow(source_lensed_sub-source_lensed_simple, origin='lower')
ax[1].set_title("Simplistic Residual Estimate (No Noise)")
ax[1].get_xaxis().set_visible(False)
ax[1].get_yaxis().set_visible(False)
ax[1].autoscale(False)
plt.show()

That looks like a big difference! There are some spatially concentrated residuals that clearly correspond to specific substructure, but it seems that on the larger scales our ring has also changed substantially between the two images. What's driving this change? 

Adding our 20 subhalos has also added mass to our lens, which means we will see changes in the __lensing at large and small scales__. In fact, most of our subhalos appear to only be impacting the lensing image through the large scale change, and only a few are leaving a visible small scall impact. The issue is that __large scale changes to our lensing potential from substructure are degenerate with the parameters of our main deflector__.

To illustrate this, let's go ahead and fit a lens model to the lensing image with substructure.

### Remodeling the Main Deflector

Here we use the lenstronomy fitting sequence functions to forward model the PEMD parameters for the image of our lens with substructure.

In [None]:
# The lists of all the model types and dicts of parameters
ls_lens_model_list = []
fixed_lens = []
kwargs_lens_init = []
kwargs_lens_sigma = []
kwargs_lower_lens = []
kwargs_upper_lens = []
ls_source_model_list = []
fixed_source = []
kwargs_source_init = []
kwargs_source_sigma = []
kwargs_lower_source = []
kwargs_upper_source = []

# Add initial values, boundaries, and spread for the PEMD parameters. No parameter needs to be kept fixed
ls_lens_model_list.append('PEMD')
fixed_lens.append({})
kwargs_lens_init.append({'theta_E': 1.1, 'e1': 0.1, 'e2': 0.,'center_x': 0.1, 'center_y': 0., 'gamma': 1.9})
kwargs_lens_sigma.append({'theta_E': .2, 'e1': 0.05, 'e2': 0.05,'center_x': 0.05, 'center_y': 0.05, 'gamma': 0.2})
kwargs_lower_lens.append({'theta_E': 0.01, 'e1': -0.5, 'e2': -0.5,'center_x': -10, 'center_y': -10, 'gamma': 0.01})
kwargs_upper_lens.append({'theta_E': 10., 'e1': 0.5, 'e2': 0.5,'center_x': 10, 'center_y': 10, 'gamma': 10})

# Do the same for the shear. Fixing ra_0 and dec_0 for simplicity (i.e. not allowing the coordinate system to move).
ls_lens_model_list.append('SHEAR')
fixed_lens.append({'ra_0': 0, 'dec_0': 0})
kwargs_lens_init.append({'gamma1': 0.05, 'gamma2': 0.02})
kwargs_lens_sigma.append({'gamma1': 0.05, 'gamma2': 0.05})
kwargs_lower_lens.append({'gamma1': -10, 'gamma2': -10})
kwargs_upper_lens.append({'gamma1': 10, 'gamma2': 10})

# An finally for our source parameters. 
ls_source_model_list.append('SERSIC_ELLIPSE')
fixed_source.append({})
kwargs_source_init.append({'R_sersic': 0.2, 'n_sersic': 1,'e1': 0.2, 'e2': 0.1, 'center_x': 0., 'center_y': 0})
kwargs_source_sigma.append({'n_sersic': 0.5, 'R_sersic': 0.1,'e1': 0.05, 'e2': 0.05, 'center_x': 0.2, 
                            'center_y': 0.2})
kwargs_lower_source.append({'e1': -0.5, 'e2': -0.5,'R_sersic': 0.001, 'n_sersic': .5, 'center_x': -10,
                            'center_y': -10})
kwargs_upper_source.append({'e1': 0.5, 'e2': 0.5, 'R_sersic': 10,'n_sersic': 5., 'center_x': 10, 'center_y': 10})

# Turn these into the list objects desired by lenstornomy
ls_lens_params = [kwargs_lens_init, kwargs_lens_sigma,fixed_lens, kwargs_lower_lens, kwargs_upper_lens]
ls_source_params = [kwargs_source_init, kwargs_source_sigma,fixed_source, kwargs_lower_source, kwargs_upper_source]
ls_kwargs_params = {'lens_model': ls_lens_params,'source_model': ls_source_params}
ls_kwargs_model = {'lens_model_list':ls_lens_model_list,'source_light_model_list':ls_source_model_list}

# Populate some of the observational and numerics kwargs we will need
ls_kwargs_likelihood = {'source_marg': False}
ls_kwargs_model = {'lens_model_list': ls_lens_model_list,'source_light_model_list': ls_source_model_list}
kwargs_numerics['supersampling_convolution']=False

# Now we can write in the image (with a few options specified for lenstronomy)
_, _, ra_0, dec_0, _, _, Mpix2coord, _ = util.make_grid_with_coordtransform(numPix=numpix,
                                                                            deltapix=kwargs_detector['pixel_scale'], 
                                                                            center_ra=0,center_dec=0,subgrid_res=1,
                                                                            inverse=False)

# This is where we specify that the image we want to conduct inference on is the lens WITH substructure
ls_kwargs_data = {'background_rms': single_band.background_noise,'exposure_time': single_band.exposure_time,
                  'ra_at_xy_0': ra_0,'dec_at_xy_0': dec_0,'transform_pix2angle': Mpix2coord,
                  'image_data': source_lensed_sub}

# A few more lenstronomy options
ls_multi_band_list = [[ls_kwargs_data, kwargs_psf, kwargs_numerics]]
ls_kwargs_data_joint = {'multi_band_list': ls_multi_band_list,'multi_band_type': 'multi-linear'}

In [None]:
fitting_seq.best_fit()

In [None]:
# Now we initialize the fitting sequence (behind the scenes this uses emcee with parallelization)
ls_kwargs_constraints = {}
fitting_seq = FittingSequence(ls_kwargs_data_joint,ls_kwargs_model,ls_kwargs_constraints,
                              ls_kwargs_likelihood, ls_kwargs_params)

# And finally we can run our fitting sequence
walker_ratio = 10
n_samps = 3000
chains_save_path = 'Main_Deflector_Modeling_Residuals_chains_2.hd5'
if os.path.isfile(chains_save_path):
    print('Using chains found at %s'%(chains_save_path))
    start_from_backup = True
else:
    print('No chains found at %s'%(chains_save_path))
    start_from_backup = False
fitting_kwargs_list = [['MCMC',{'n_burn': 0,'n_run': n_samps, 'walkerRatio': walker_ratio,'sigma_scale': 0.1, 
                                'backup_filename': chains_save_path,'start_from_backup': start_from_backup}]]
chain_list = fitting_seq.fit_sequence(fitting_kwargs_list)

In [None]:
# Now let's take a look at the chains, and the fit
chain_params = chain_list[0][2]
# Get a mapping to original values of parameters
true_values = []
for param in chain_params:
    if 'lens0' in param:
        true_values.append(kwargs_spemd[param[:-6]])
    if 'lens1' in param:
        true_values.append(kwargs_shear[param[:-6]])
    if 'source_light0' in param:
        true_values.append(source_kwargs_list[0][param[:-14]])
chains = chain_list[0][1].reshape((-1,len(chain_params)*walker_ratio,len(chain_params)))

# Don't forget to include some burnin!
burnin = 2000
chains = chains[burnin:].reshape((-1,chains.shape[-1]))
plot_limits = None
color_contour = '#1b9e77'
truth_color = '#d95f02'
hist_kwargs = {'density':True,'color':color_contour}
fontsize = 13
dpi = 200
fig = corner.corner(chains,bins=20,labels=chain_params,show_titles=False,plot_datapoints=False,
                    label_kwargs=dict(fontsize=fontsize),truths=true_values,levels=[0.68,0.95],dpi=dpi, 
                    color=color_contour,fill_contours=True,range=plot_limits,truth_color=truth_color,
                    hist_kwargs=hist_kwargs)

It's clear that our emcee posterior has not converged to the correct values, but we expect this: the substructure is degenerate with parameters of our main deflector model. For example, the einstein radius is not estimated to be 1.01 rather than 1, corresponding nicely to the residual effect we plotted earlier. To further cemment this we can do the same comparison as before, but now between the lens light for the model our emcee converged to and the true model with substructure.

### Investigating Residuals with the Modeled Lens

There are two questions we can ask ourselves about the residuals with the modeled lens.

    1) Is there still a residual that is detectable above the noise?
    2) Ignoring the noise, what does the residual actually look like?

In [None]:
# We can pull all the kwargs from the mean of our chains after burnin.
kwargs_spemd_emcee = {}
kwargs_shear_emcee = {}
kwargs_source_emcee = {'amp':4.7536}
for pi, param in enumerate(chain_params):
    if 'lens0' in param:
        kwargs_spemd_emcee[param[:-6]] = np.mean(chains[:,pi])
    if 'lens1' in param:
        kwargs_shear_emcee[param[:-6]] = np.mean(chains[:,pi])
    if 'source_light0'  in param:
        kwargs_source_emcee[param[:-14]] = np.mean(chains[:,pi])

main_lens_kwargs_list_emcee = [kwargs_spemd_emcee,kwargs_shear_emcee]
main_lens_kwargs_source_emcee = [kwargs_source_emcee]


# We can use the same model without substructure but just pass in the new parameters
source_lensed_simple_emcee = simple_image_model.image(main_lens_kwargs_list_emcee, main_lens_kwargs_source_emcee, 
                                                None, None)

In [None]:
# We can visiualize our lensed source in both cases
f, ax = plt.subplots(1, 2, figsize=(16, 5), sharex=False, sharey=False)

# Plot the image with the substructure
im = ax[0].matshow(source_lensed_sub, origin='lower')
ax[0].set_title("Lensed Source With Substructure")
ax[0].get_xaxis().set_visible(False)
ax[0].get_yaxis().set_visible(False)
ax[0].autoscale(False)

# Plot the image without substructure
im = ax[1].matshow(source_lensed_simple_emcee, origin='lower')
ax[1].set_title("Lensed Source Without Substructure (No Noise)")
ax[1].get_xaxis().set_visible(False)
ax[1].get_yaxis().set_visible(False)
ax[1].autoscale(False)
plt.show()

# Plot the residual
f, ax = plt.subplots(1, 2, figsize=(16, 5), sharex=False, sharey=False)
im = ax[0].matshow(source_lensed_sub_noise-source_lensed_simple_emcee, origin='lower')
f.colorbar(im)
ax[0].set_title("True Residual Estimate (Noise)")
ax[0].get_xaxis().set_visible(False)
ax[0].get_yaxis().set_visible(False)
ax[0].autoscale(False)

# Plot the residual no noise
im = ax[1].matshow(source_lensed_sub-source_lensed_simple_emcee, origin='lower')
ax[1].set_title("True Residual Estimate (No Noise)")
ax[1].get_xaxis().set_visible(False)
ax[1].get_yaxis().set_visible(False)
ax[1].autoscale(False)
plt.show()

That looks very different from the residual we calculated above! There's still signal there, but it's 3-4x weaker and therefore much harder to distinguish from the noise. But this is exactly the type of residual you would get if you conducted a forward model on the lens. If we train a network using the first set of residuals we showed, we would falsely belive that our network could easily detect the presence of substructure, when in reality it was simply detecting a change in the einstein ring caused by the inclusion of more mass.

Similar issues can arise even when not training a model on the residual; in generating a training set we have to be careful that the addition of the substructure __does not produce signal simply because our assumptions about the lens profile were too simplistic__!