# 3/8/21 - This notebook analyzes the results of the G3629152_1933's models.

In [1]:
### libraries
%matplotlib inline
# set workspace path
from pyprojroot import here
workspace_path = str(here())#'/data/sknabel/autolens_workspace'
%cd $workspace_path
print(f"Working Directory has been set to `{workspace_path}`")

import matplotlib.pyplot as plt
from autoconf import conf
import autolens as al
import autolens.plot as aplt
import autofit as af
import pandas as pd
import numpy as np
from astropy.io import fits
from astropy.visualization import astropy_mpl_style
plt.style.use(astropy_mpl_style)
#from astropy.stats import sigma_clip as clip
import astropy.cosmology as cosmo
from os import path
import time

# set datetime variable
datetime = time.strftime("%d%m%Y-%H%M%S")

# paths
autoz_path = '/data/sknabel/autoz_lens_model/'
file_path = f'{autoz_path}files/'
csv_path = f'{file_path}csv/'
fits_path = f'{file_path}fits/'
png_path = f'{autoz_path}visuals/png/'
pdf_path = f'{autoz_path}visuals/pdf/'

In /soft/anaconda3/lib/python3.7/site-packages/matplotlib/mpl-data/stylelib/_classic_test.mplstyle: 
The text.latex.preview rcparam was deprecated in Matplotlib 3.3 and will be removed two minor releases later.
In /soft/anaconda3/lib/python3.7/site-packages/matplotlib/mpl-data/stylelib/_classic_test.mplstyle: 
The mathtext.fallback_to_cm rcparam was deprecated in Matplotlib 3.3 and will be removed two minor releases later.
In /soft/anaconda3/lib/python3.7/site-packages/matplotlib/mpl-data/stylelib/_classic_test.mplstyle: Support for setting the 'mathtext.fallback_to_cm' rcParam is deprecated since 3.3 and will be removed two minor releases later; use 'mathtext.fallback : 'cm' instead.
In /soft/anaconda3/lib/python3.7/site-packages/matplotlib/mpl-data/stylelib/_classic_test.mplstyle: 
The validate_bool_maybe_none function was deprecated in Matplotlib 3.3 and will be removed two minor releases later.
In /soft/anaconda3/lib/python3.7/site-packages/matplotlib/mpl-data/stylelib/_classic_tes

/data/sknabel/autoz_lens_model
Working Directory has been set to `/data/sknabel/autoz_lens_model`


In [2]:
# this should be constant across experiments

gama_id = 3629152
links_id = 1933

object_folder = f'{fits_path}G{gama_id}_{links_id}/'
output_folder = f'{autoz_path}output/G{gama_id}_{links_id}/'

# load object data table
links = pd.read_csv(f'{csv_path}/latest/links_sample_latest.csv')
lens_galaxy_data = links[links.GAMA_ID == gama_id]
stellar_mass = lens_galaxy_data.lambdar_log_mstar.values
zlens=lens_galaxy_data.zlens.values
zsource=lens_galaxy_data.zsource.values
einstein_radius=np.mean([lens_galaxy_data.theta_e_pm.values, lens_galaxy_data.theta_e_sis.values]) # take average of einstein radius estimates for prior
print(f'Lens and source redshifts at {zlens} and {zsource}.')
print(f'Einstein radius prior: {einstein_radius}')

# load performance log from csv
performance_log = pd.read_csv(f'{csv_path}G{gama_id}_{links_id}_performance_log.csv')
#print(performance_log)
print(f'Performance log: {performance_log}')

INFO:numexpr.utils:Note: NumExpr detected 40 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8.
INFO:numexpr.utils:NumExpr defaulting to 8 threads.


Lens and source redshifts at [0.40662315] and [0.78711778].
Einstein radius prior: 0.7778450206448699
Performance log:    Unnamed: 0  Unnamed: 0.1  Unnamed: 0.1.1  Unnamed: 0.1.1.1  \
0           0           0.0             0.0               0.0   
1           1           1.0             1.0               0.0   
2           2           2.0             0.0               NaN   
3           3           0.0             NaN               NaN   
4           0           NaN             NaN               NaN   

   Unnamed: 0.1.1.1.1 Experiment  phase1_time  phase1_likelihood  phase2_time  \
0                 0.0          1   403.158289         445.398771  1164.151751   
1                 NaN          2   151.080091         469.442803  1602.797939   
2                 NaN         2a   163.102591         469.267740  2655.558353   
3                 NaN          3   176.824480         469.505882   712.292176   
4                 NaN          3   162.736001         469.271822   687.881434   

   

In [141]:
# set up aggregator to examine the results on Experiment 3-2
agg = af.Aggregator(directory=f'{output_folder}')
agg = agg.filter(agg.directory.contains("model_fit"))

samples_gen = agg.values("samples")

"""
When we print this the length of this generator converted to a list of outputs we see 3 different NestSamples 
instances. These correspond to each fit of each phase to each of our 3 images.
"""
print("NestedSampler Samples: \n")
print(samples_gen)
print()
print("Total Samples Objects = ", len(list(samples_gen)), "\n")

# Or do this
samples = list(agg.values("samples"))

print(samples[0].max_log_likelihood_vector)
print(samples[0].max_log_posterior_vector)

Aggregator loading phases... could take some time.

 A total of 12 phases and results were found.
Filter found a total of 5 results
NestedSampler Samples: 

<map object at 0x7f8bf439fa20>

Total Samples Objects =  5 

[-0.18103363748883755, 0.05487794833439358, 0.03831901356845069, 3.7222800869020216, 0.446909482897077, 3762608466.9900746, 0.03341642059411545, 1.9033877977283176, -0.23952487865191285, -1.8186867595354448, 1.2609450140936616]
[-0.20694259465582404, 0.3470974546929809, 0.050757331981117274, 2.427061870428737, 1.0726518279871817e-06, 17633324578.723415, 1.169416755237951e-06, 2.069675587984034, -2.663969610449517, -0.07076579135373473, 3.2625348915634533]


In [142]:
from collections import Counter # Counter counts the number of occurrences of each item
from itertools import tee, count
import string

def uniquify(seq, suffs = count(1)):
    """Make all the items unique by adding a suffix (1, 2, etc).

    `seq` is mutable sequence of strings.
    `suffs` is an optional alternative suffix iterable.
    """
    not_unique = [k for k,v in Counter(seq).items() if v>1] # so we have: ['name', 'zip']
    # suffix generator dict - e.g., {'name': <my_gen>, 'zip': <my_gen>}
    suff_gens = dict(zip(not_unique, tee(suffs, len(not_unique))))  
    for idx,s in enumerate(seq):
        try:
            suffix = str(next(suff_gens[s]))
        except KeyError:
            # s was unique
            continue
        else:
            seq[idx] += suffix


# iterate through samples generator to fill list
n = 0
for sample in samples:
    parameters = sample.model.parameter_names
    print(parameters)
    uniquify(parameters, (f'_{x!s}' for x in string.ascii_lowercase))
    vector = sample.max_log_likelihood_vector
    print(parameters)
    print(vector)
    result = pd.DataFrame([vector], columns=parameters, index=[n])
    print(sample)
    print(result)#.columns)
    if n==0:
        max_log_likelihoods = result
    else:
        max_log_likelihoods = pd.concat([max_log_likelihoods, result])
    #print(max_log_likelihoods)
    n=n+1
    
max_log_likelihoods
    

['elliptical_comps_0', 'elliptical_comps_1', 'intensity', 'sersic_index', 'mass_to_light_ratio', 'mass_at_200', 'intensity', 'effective_radius', 'centre_0', 'centre_1', 'effective_radius']
['elliptical_comps_0', 'elliptical_comps_1', 'intensity_a', 'sersic_index', 'mass_to_light_ratio', 'mass_at_200', 'intensity_b', 'effective_radius_a', 'centre_0', 'centre_1', 'effective_radius_b']
[-0.18103363748883755, 0.05487794833439358, 0.03831901356845069, 3.7222800869020216, 0.446909482897077, 3762608466.9900746, 0.03341642059411545, 1.9033877977283176, -0.23952487865191285, -1.8186867595354448, 1.2609450140936616]
<autofit.non_linear.samples.NestSamples object at 0x7f8bf7df8c18>
   elliptical_comps_0  elliptical_comps_1  intensity_a  sersic_index  \
0           -0.181034            0.054878     0.038319       3.72228   

   mass_to_light_ratio   mass_at_200  intensity_b  effective_radius_a  \
0             0.446909  3.762608e+09     0.033416            1.903388   

   centre_0  centre_1  effec

Unnamed: 0,elliptical_comps_0,elliptical_comps_1,intensity_a,sersic_index,mass_to_light_ratio,mass_at_200,intensity_b,effective_radius_a,centre_0,centre_1,effective_radius_b
0,-0.181034,0.054878,0.038319,3.72228,0.446909,3762608000.0,0.033416,1.903388,-0.239525,-1.818687,1.260945
1,-0.179755,0.041359,0.054072,2.93994,0.001203,9537455000.0,0.047161,0.877885,-0.349244,-1.937543,1.756428
2,-0.142112,0.092537,0.050157,2.414843,0.070413,168066200.0,0.097908,1.980444,-0.227487,-1.876202,0.553743
3,-0.225547,0.271619,0.038813,3.544347,0.00526,1058474000000.0,0.019127,1.815676,0.002692,-1.112506,2.451885
4,-0.075013,0.303495,0.043064,3.462219,1.368334,194244600000.0,0.026541,1.740168,-0.312031,-1.813174,1.864348


## I don't know for sure what order these parameters are in. I assume it's lens and then source.. But why would intensity_b be listed before effective_radius_a?

In [143]:
ue3_vectors = np.array([
    samps.error_vector_at_upper_sigma(sigma=3.0) for samps in samples
])

le3_vectors = np.array([
    samps.error_vector_at_lower_sigma(sigma=3.0) for samps in samples
])
print(ue3_vectors)
print(le3_vectors)

[[3.51010926e-01 3.72959247e-01 5.23419420e-02 3.19366913e+00
  2.74225077e+01 9.04639380e+14 2.33548629e-01 2.35722172e-01
  2.88312057e+00 3.09859312e+00 2.38325667e+00]
 [3.38731619e-01 3.60343402e-01 4.80357855e-02 3.60731862e+00
  1.83182473e-01 7.87035873e+00 1.07732831e+01 8.58647477e+00
  2.69235211e+01 9.30949747e+14 2.56872028e-01]
 [3.22396518e-01 3.72921175e-01 5.29196410e-02 2.65831546e+00
  2.82253820e+01 9.08309239e+14 1.60593530e-01 2.44650349e-01
  2.90293564e+00 3.43404607e+00 2.31743425e+00]
 [3.83449660e-01 3.40019262e-01 5.34570716e-02 3.28807914e+00
  8.76600675e+05 8.55339470e+14 4.97809844e+05 5.35140290e-01
  2.78027592e+00 3.20083830e+00 2.36991665e+00]
 [3.12332411e-01 3.58876906e-01 6.30667542e-02 3.40106775e+00
  1.08683253e-01 7.10842282e+00 1.16384631e+01 8.43504739e+00
  2.48571871e+01 9.66177403e+14 2.54825194e-01]]
[[3.59244835e-01 3.83976422e-01 2.98635184e-02 1.64431101e+00
  7.28899604e-03 1.50646247e+11 4.53264560e-04 5.21366388e-01
  3.03760193e+0