In [None]:
import matplotlib.pyplot as plt
import numpy as np
import warnings, json, astropy, os, tabulate
import astropy.io.fits as fits
from astropy.io.fits import getdata
import astropy.units as u
from astropy.coordinates import SkyCoord
warnings.filterwarnings("ignore")
from qa_gawa import plot_pure, plot_comp, full_completeness_distances
import matplotlib 
font = {'family' : 'normal',
        'weight' : 'normal',
        'size'   : 12}

matplotlib.rc('font', **font)

## Introduction

This jupyter notebook is intended to show plots about the ability of Gawa tto recover simulated stellar systems. <br>
The two main inputs are the list of clusters simulated and the list of detected clusters. <br>

The first task is to match the list of simulated clusters to the list of detected clusters.

Run this jupyter notebook in the LIneA env with the following command:
<br>
`jupyter nbconvert --execute --to html --EmbedImagesPreprocessor.embed_images=True qa_gawa.ipynb`
<br>
and after the command has finished, run the following cell:
<br>
 cp qa_gawa.html ~/public_html/gawa_processes/00010/detections/qa
<br>
where 00010 is the process number.

Reading the configuration file and creating a folder for the results:

In [None]:
confg = "qa_gawa.json"

with open(confg) as fstream:
    param = json.load(fstream)

globals().update(param)

os.system("mkdir -p " + input_detection_path + "/qa")

config_gawa = input_detection_path + "/gawa.cfg"

with open(config_gawa) as fstream:
    param2 = json.load(fstream)
mask_file = param2['isochrone_masks'][param2['survey']]['model_file']

Cell below shows the list of params of the detection:

In [None]:
def recursive_print_dict( d, indent = 0 ):
    for k, v in d.items():
        if isinstance(v, dict):
            print("    " * indent, f"{k}:")
            recursive_print_dict(v, indent+1)
        else:
            print("    " * indent, f"{k}:{v}")

recursive_print_dict(param2)

Print all the masks file:

## Matching detections and simulations
### Reading data
Reading data from detections and simulations:

In [None]:
det_file = input_detection_path + '/clusters.fits'
data_det = getdata(det_file)
ra_det = data_det["ra"]
dec_det = data_det["dec"]

slices_file = input_detection_path + '/dslices.fits'
data_sl = getdata(slices_file)
d_slices_pc = data_sl["dist_pc"]
mM_slices = 5 * np.log10(d_slices_pc) - 5.

bin_size_mM = mM_slices[1] - mM_slices[0]
bins_mM = np.linspace(mM_slices[0] - bin_size_mM / 2, mM_slices[-1] + bin_size_mM / 2, len(mM_slices) + 1, endpoint=True)

f_sim = open(input_simulation_path + '/star_clusters_simulated.dat', 'r')
data_sim = f_sim.readlines()[1:]

file_sim = input_simulation_path + '/star_clusters_simulated.dat'
ra_sim, dec_sim, SNR_sim_all = np.loadtxt(file_sim, usecols=(9, 10, 6), unpack=True)

In [None]:
gr, g = np.loadtxt(mask_file, usecols=(0, 1), unpack=True)

fig, (ax1) = plt.subplots(1, 1, figsize=(10,6))
for i in range(len(mM_slices)):
    ax1.plot(gr, g + mM_slices[i], label='m-M={:.2f}'.format(mM_slices[i]))
ax1.set_xlim(param2['isochrone_masks'][param2['survey']]['mask_color_min'], param2['isochrone_masks'][param2['survey']]['mask_color_max'])
ax1.set_ylim(param2['isochrone_masks'][param2['survey']]['mask_mag_max'], param2['isochrone_masks'][param2['survey']]['mask_mag_min'])
ax1.set_xlabel(r'$g_0-r_0$')
ax1.set_ylabel(r'$g_0$')
ax1.set_title('Masks applied to detection')
ax1.legend()
plt.show()

### Matching with astropy search around sky function

In [None]:
C_sim = SkyCoord(ra=ra_sim*u.degree, dec=dec_sim*u.degree)
C_det = SkyCoord(ra=ra_det*u.degree, dec=dec_det*u.degree)

idx_sim, idx_det, d2d, d3d = C_det.search_around_sky(C_sim, dist2match_arcmin*u.arcmin)

idx_det_outliers = [i for i in range(len(data_det)) if i not in idx_det]

file_match = open(match_file, 'w')
print('#0-peak_id 1-ra 2-dec 3-iobj 4-jobj 5-dist_init_kpc 6-dist_err_kpc 7-dist_min_kpc 8-dist_max_kpc 9-coverfrac 10-coverfrac_bkg 11-wradius_arcmin 12-snr 13-Naper 14-Naper_tot 15-NWaper_tot 16-Naper_bkg 17-icyl 18-tile 19-slice 20-id_in_tile 21-id 22-HPX64 23-N 24-MV 25-SNR 26-N_f 27-MV_f 28-SNR_f 29-L 30-B 31-ra 32-dec 33-r_exp 34-ell 35-pa 36-mass 37-dist', file=file_match)

idx_det_prev = [] # avoid duplicates in detections

for i,j in zip(idx_sim, idx_det):
    # if idx_det not in idx_det_prev:
    print(*data_det[:][j], data_sim[i], sep=' ', file=file_match, end='')
    # idx_det_prev.append(idx_det)

for i in (idx_det_outliers):
    print(*data_det[i], ' -99.999 ' * len(data_sim[1].split()), sep=' ', file=file_match, end='\n')

file_match.close()

idx_not_det = [i for i in range(len(data_sim)) if i not in idx_sim]

file_unmatch = open(unmatch_file, 'w')
print('#0-HPX64 1-N 2-MV 3-SNR 4-N_f 5-MV_f 6-SNR_f 7-L 8-B 9-ra 10-dec 11-r_exp 12-ell 13-pa 14-mass 15-dist', file=file_unmatch)

for i in idx_not_det:
    print(data_sim[i], sep=' ', file=file_unmatch, end='')
file_unmatch.close()

## Reading the match file and showing a few plots

In [None]:
ra_det, dec_det, dist_init_kpc_det, dist_err_kpc_det, wrad_arcmin_det, \
SNR_det, HPX64, M_V_sim_det, SNR_sim, exp_rad_sim, dist_sim = np.loadtxt(match_file,
                                             usecols=(1, 2, 5, 6, 11, 12, 22, 24, 28, 33, 37), unpack=True)

SNR_sim_undet, ra_undet, dec_undet = np.loadtxt(unmatch_file, usecols=(6, 9, 10), unpack=True)


Below, a conditional is created where the clusters matched the simulated (confirmed)
and where clusters are just candidates.

In [None]:
true_positive = (SNR_sim > 0.)
false_positive = (SNR_sim <= 0.)

print('Total of clusters simulated: {:d}.'.format(len(ra_sim)))
print('Total of clusters detected: {:d} (True Positives).'.format(len(np.unique(HPX64[true_positive]))))
print('Minimum SNR detected: {:.4f}'.format(np.min(SNR_det)))
print('Total of clusters detected with SNR > 3: {:d}.'.format(len(np.unique(HPX64[(true_positive)&(SNR_det > 3.)]))))
print('Total of clusters detected with SNR > 5: {:d}.'.format(len(np.unique(HPX64[(true_positive)&(SNR_det > 5.)]))))
print('Total of clusters detected with SNR > 10: {:d}.'.format(len(np.unique(HPX64[(true_positive)&(SNR_det > 10.)]))))
print('Total of clusters undetected: {:d}.'.format(len(ra_undet)))

In [None]:
fig, (ax1) = plt.subplots(1, 1, figsize=(10,6))
ax1.hist(SNR_det, bins=20, range=(np.min(SNR_det) - 1., np.max(SNR_det) + 1), histtype='step', \
                 color = "r", lw=4, label='SNR')
ax1.hist(SNR_det[true_positive], bins=20, range=(np.min(SNR_det) - 1, np.max(SNR_det) + 1), histtype='step', \
                 color = "mediumblue", lw=2, label='SNR (true positives)')
ax1.hist(SNR_det[false_positive], bins=20, range=(np.min(SNR_det) - 1, np.max(SNR_det) + 1), histtype='step', \
                 color = "maroon", lw=3, label='SNR (false positives)')
ax1.set_xlabel('SNR')
ax1.set_ylabel('# Clusters')
ax1.set_yscale('log')
ax1.legend()

fig.suptitle('SNR Histogram (detections)')
plt.show()

In [None]:
fig, (ax1) = plt.subplots(1, 1, figsize=(10,6))
ax1.hist(dist_init_kpc_det, bins=20, range=(np.min(dist_init_kpc_det) - 1., np.max(dist_init_kpc_det) + 1), histtype='step', \
                 color = "r", lw=4, label='Distances (kpc)')
ax1.hist(dist_init_kpc_det[true_positive], bins=20, range=(np.min(dist_init_kpc_det) - 1, np.max(dist_init_kpc_det) + 1), histtype='step', \
                 color = "mediumblue", lw=2, label='Distances [kpc] (true positives)')
ax1.hist(dist_init_kpc_det[false_positive], bins=20, range=(np.min(dist_init_kpc_det) - 1, np.max(dist_init_kpc_det) + 1), histtype='step', \
                 color = "maroon", lw=3, label='Distances [kpc] (false positives)')
ax1.set_xlabel('Distances [kpc]')
ax1.set_ylabel('# Clusters')
# ax1.set_yscale('log')
ax1.legend()

fig.suptitle('Distances Histogram (detections)')
plt.show()

Below, a conditional is created where the clusters matched the simulated (confirmed)
and where clusters are just candidates.

In [None]:
cm = plt.cm.get_cmap('copper_r')

fig = plt.figure(figsize=(16, 10))
plt.scatter(ra_sim, dec_sim, c=SNR_sim_all, vmin=0, vmax=np.max(SNR_det), cmap=cm, s=100.0, marker='^',
            label='Simulations: ({:d})'.format(len(ra_sim)))
sc = plt.scatter(ra_det[true_positive], dec_det[true_positive], c=SNR_det[true_positive], vmin=0, vmax=np.max(SNR_det), marker='x', s=150.,
                 cmap=cm, label='True Positives: ({:d})'.format(len(np.unique(HPX64[true_positive]))))
plt.scatter(ra_det[false_positive], dec_det[false_positive], c=SNR_det[false_positive], s=200.0, cmap=cm,
            lw=2, alpha=0.75, label='Not matched: ({:d})'.format(len(ra_det[false_positive])))
plt.scatter(ra_undet, dec_undet, color='None', edgecolor='k', s=200.0,
            lw=2, alpha=0.75, label='Not detected: ({:d})'.format(len(ra_undet)))
plt.colorbar(sc,label = 'SNR detection')
plt.xlim(np.max(ra_sim)+0.5, np.min(ra_sim)-0.5)
plt.ylim(np.min(dec_sim)-0.5, np.max(dec_sim)+1.0)
plt.xlabel('RA')
plt.ylabel('DEC')
plt.title('Spatial distribution of clusters (SN > 3.)')
plt.legend(loc=1)
plt.show()

In [None]:
fig = plt.figure(figsize=(16, 10))
plt.scatter(ra_sim, dec_sim, c=SNR_sim_all, vmin=0, vmax=np.max(SNR_det), cmap=cm, s=100.0, marker='^',
            label='Simulations: ({:d})'.format(len(ra_sim)))
cond = (true_positive)&(SNR_det > 5)
sc = plt.scatter(ra_det[(true_positive)&(cond)], dec_det[(true_positive)&(cond)],
                 c=SNR_det[(true_positive)&(cond)], vmin=0, vmax=np.max(SNR_det), marker='x', s=150.,
                 cmap=cm, label='True Positives: ({:d})'.format(len(np.unique(HPX64[true_positive]))))
plt.scatter(ra_det[(false_positive)&(cond)], dec_det[(false_positive)&(cond)],
            c=SNR_det[(false_positive)&(cond)], s=200.0, cmap=cm,
            lw=2, alpha=0.75, label='Not matched: ({:d})'.format(len(ra_det[(false_positive)&(cond)])))
plt.scatter(ra_undet, dec_undet, color='None', edgecolor='k', s=200.0,
            lw=2, alpha=0.75, label='Not detected: ({:d})'.format(len(ra_undet)))
plt.colorbar(sc,label = 'SNR detection')
plt.xlim(np.max(ra_sim)+0.5, np.min(ra_sim)-0.5)
plt.ylim(np.min(dec_sim)-0.5, np.max(dec_sim)+1.0)
plt.xlabel('RA')
plt.ylabel('DEC')
plt.title('Spatial distribution of clusters with SNR > 5')
plt.legend(loc=1)
plt.show()

In [None]:
fig = plt.figure(figsize=(16, 10))
plt.scatter(ra_sim, dec_sim, c=SNR_sim_all, vmin=0, vmax=np.max(SNR_det), cmap=cm, s=100.0, marker='^',
            label='Simulations: ({:d})'.format(len(ra_sim)))
cond = (true_positive)&(SNR_det > 10)
sc = plt.scatter(ra_det[(true_positive)&(cond)], dec_det[(true_positive)&(cond)],
                 c=SNR_det[(true_positive)&(cond)], vmin=0, vmax=np.max(SNR_det), marker='x', s=150.,
                 cmap=cm, label='True Positives: ({:d})'.format(len(np.unique(HPX64[true_positive]))))
plt.scatter(ra_det[(false_positive)&(cond)], dec_det[(false_positive)&(cond)],
            c=SNR_det[(false_positive)&(cond)], s=200.0, cmap=cm,
            lw=2, alpha=0.75, label='Not matched: ({:d})'.format(len(ra_det[(false_positive)&(cond)])))
plt.scatter(ra_undet, dec_undet, color='None', edgecolor='k', s=200.0,
            lw=2, alpha=0.75, label='Not detected: ({:d})'.format(len(ra_undet)))
plt.colorbar(sc, label = 'SNR detection')
plt.xlim(np.max(ra_sim) + 0.5, np.min(ra_sim)-0.5)
plt.ylim(np.min(dec_sim) - 0.5, np.max(dec_sim)+1.0)
plt.xlabel('RA')
plt.ylabel('DEC')
plt.title('Spatial distribution of clusters with SNR > 10')
plt.legend(loc=1)
plt.show()

We can notice that all detected and true clusters (True Positives) have high signal-to-noise ratio (SNR),
while those with low SNR are false positives (FP), represented by small blue circles.

It is important to highlight in this case that all simulated clusters were detected. There may be cases where
not all simulated clusters are detected. In this case, the code should read the clusters
simulated again to see how complete the detection is.

In [None]:
# Convert to function:
half_light_radius_arcmin = 1.7 * 60. * np.rad2deg(np.arctan(exp_rad_sim / dist_sim))
'''
fig = plt.figure(figsize=(16, 10))
plt.scatter(10*half_light_radius_arcmin, wrad_arcmin_det, c='k', marker='s')
plt.plot(np.linspace(0., 1.1 * np.max(wrad_arcmin_det), 4), np.linspace(0., 1.1 * np.max(wrad_arcmin_det), 4), color = 'r')
plt.xlabel(r'$10 x r_{1/2}$ arcmin (simulations)')
plt.ylabel(r'wrad arcmin (detection)')
plt.xlim([0, 1.1*np.max(wrad_arcmin_det)])
plt.ylim([0, 1.1*np.max(wrad_arcmin_det)])
plt.show()
'''

In [None]:
fig = plt.figure(figsize=(16, 10))
plt.scatter(SNR_sim, SNR_det)
plt.plot(np.linspace(0., 1.1 * max(np.max(SNR_sim),np.max(SNR_det)), 4),
        np.linspace(0., 1.1 * max(np.max(SNR_sim),np.max(SNR_det)), 4), color = 'r')
plt.xlabel('SNR (simulations)')
plt.ylabel('SNR (detections)')
plt.xlim([0.1, 1.05 * max(np.max(SNR_sim),np.max(SNR_det))])
plt.ylim([0.1, 1.05 * max(np.max(SNR_sim),np.max(SNR_det))])
plt.show()

In [None]:
dist_sim_kpc = dist_sim / 1000
fig = plt.figure(figsize=(16, 10))
plt.errorbar(dist_sim_kpc[true_positive], dist_init_kpc_det[true_positive], yerr=dist_err_kpc_det[true_positive], xerr=None,
             fmt='o', c='k')
plt.plot(np.linspace(0.8 * min(np.min(dist_sim_kpc),np.min(dist_init_kpc_det)), 1.2 * max(np.max(dist_sim_kpc),np.max(dist_init_kpc_det)), 4),
        np.linspace(0.8 * min(np.min(dist_sim_kpc),np.min(dist_init_kpc_det)), 1.2 * max(np.max(dist_sim_kpc),np.max(dist_init_kpc_det)), 4), color = 'r')
plt.xlim([0.8 * min(np.min(dist_sim_kpc),np.min(dist_init_kpc_det)), 1.2 * max(np.max(dist_sim_kpc),np.max(dist_init_kpc_det))])
plt.ylim([0.8 * min(np.min(dist_sim_kpc),np.min(dist_init_kpc_det)), 1.2 * max(np.max(dist_sim_kpc),np.max(dist_init_kpc_det))])
plt.title('Comparing recovery distances')
plt.xlabel('Distances (kpc) from simulations')
plt.ylabel('Distances (kpc) from detections')
plt.show()

## Purity of detection distance modulus

In [None]:
SNR_range = np.arange(np.min(SNR_det[true_positive]), np.max(SNR_det[true_positive]), 1.)

comp_wrt_SNR = np.zeros(len(SNR_range))
pur_wrt_SNR = np.zeros(len(SNR_range))

for i, j in enumerate(SNR_range):
    comp_wrt_SNR[i] = len(SNR_det[(true_positive)&(SNR_det > j)]) / len(ra_sim)
    pur_wrt_SNR[i] = len(SNR_det[(true_positive)&(SNR_det > j)]) / len(SNR_det[(SNR_det > j)])

fig, ax = plt.subplots(figsize=(16, 10))
ax.plot(SNR_range, comp_wrt_SNR, label='Completeness', color='r', lw=2)
ax.plot(SNR_range, pur_wrt_SNR, label='Purity', color='b', lw=2)
ax.set_xlim([0., 1.1* np.max(SNR_range)])
ax.set_ylim([0, 1.1])
ax.set_title('Purity/Completeness versus SNR')
ax.set_xlabel('SNR from detections')
ax.set_ylabel('Completeness / Purity')
ax.legend()
plt.show()

fig, ax = plt.subplots(figsize=(16, 10))
ax.plot(SNR_range, comp_wrt_SNR, label='Completeness', color='r', lw=2)
ax.plot(SNR_range, pur_wrt_SNR, label='Purity', color='b', lw=2)
ax.set_xlim([3., 10.])
ax.set_ylim([0.9, 1.05])
ax.set_title('Purity/Completeness versus SNR')
ax.set_xlabel('SNR from detections')
ax.set_ylabel('Completeness / Purity')
ax.legend()
plt.show()

Below we will calculate the detection purity given the detected distance.

In [None]:
# 0-peak_id 1-ra 2-dec 3-iobj 4-jobj 5-dist_init_kpc 6-dist_err_kpc 7-dist_min_kpc 8-dist_max_kpc 9-coverfrac
# 10-coverfrac_bkg 11-wradius_arcmin 12-snr 13-Naper 14-Naper_tot 15-NWaper_tot 16-Naper_bkg 17-icyl 18-tile 19-slice
# 20-id_in_tile 21-id 22-HPX64 23-N 24-MV 25-SNR 26-N_f 27-MV_f 28-SNR_f 29-L
# 30-B 31-ra 32-dec 33-r_exp 34-ell 35-pa 36-mass 37-dist'
dist_kpc_det, disterr_kpc_det = np.loadtxt(match_file, usecols=(5, 6), unpack=True)

m_M_det = 5 * np.log10(dist_kpc_det) + 10.

plot_pure(m_M_det, m_M_det[true_positive], 'Detection distance module', 'Purity wrt Distance Modulus (detection)', bins_mM)

In [None]:
plot_pure(SNR_det, SNR_det[true_positive], 'Signal-to-noise ratio (detection)', 'Purity wrt Signal-to-Noise Ratio')

In [None]:
ipix, Nstar, M_V, SNR, L, B, RA_pix, DEC_pix, r_exp_pc, ell, pa, mass, dist = np.loadtxt(input_simulation_path + '/star_clusters_simulated.dat',
                                                                                         usecols=(0, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15),
                                                                                         unpack=True)

plot_comp(M_V, idx_sim, 'M_V', 'Absolute Magnitude in V band')

In [None]:
plot_comp(dist, idx_sim, 'r (pc) simulated', 'Completeness wrt Distance (simulations)')

In [None]:
plot_comp(SNR, idx_sim, 'SNR', 'Completeness wrt Signal to Noise Ratio')

In [None]:
mM_sim = 5 * np.log10(dist) - 5.

plot_comp(mM_sim, idx_sim, 'm-M', 'Distance modulus')

In [None]:
# 0-peak_id 1-ra 2-dec 3-iobj 4-jobj 5-dist_init_kpc 6-dist_err_kpc 7-dist_min_kpc 8-dist_max_kpc 9-coverfrac
# 10-coverfrac_bkg 11-wradius_arcmin 12-snr 13-Naper 14-Naper_tot 15-NWaper_tot 16-Naper_bkg 17-icyl 18-tile 19-slice
# 20-id_in_tile 21-id 22-HPX64 23-N 24-MV 25-SNR 26-N_f 27-MV_f 28-SNR_f 29-L
# 30-B 31-ra 32-dec 33-r_exp 34-ell 35-pa 36-mass 37-dist'
exp_rad_sim_det, M_V_sim_det, dist_sim_det = np.loadtxt(match_file, usecols=(33, 27, 37), unpack=True)

full_completeness_distances(M_V, M_V_sim_det, 1.7 * r_exp_pc, 1.7 * exp_rad_sim_det, dist, dist_sim_det)

## Clusters simulated but not detected

In [None]:
HPX64, N, MV, SNR, N_f, MV_f, SNR_f, L, B, ra, dec, r_exp, ell, pa, mass, dist = np.loadtxt(unmatch_file, unpack=True)

n_col = 4
n_row = int(len(N) / n_col) + 1

gr, g = np.loadtxt(mask_file, usecols=(0, 1), unpack=True)

fig, axs = plt.subplots(n_row, n_col, figsize=(16, 4 * n_row), dpi=150.)

for ax, HPX in zip(axs.flat, HPX64):
    ax.set_title('HPX: {:d}'.format(int(HPX)))
    data = fits.getdata(input_simulation_path + '/hpx_cats_clean/' + str(int(HPX)) + '.fits')
    MAGG = data['mag_g_with_err']
    MAGR = data['mag_r_with_err']
    GC = data['GC']
    ax.scatter(MAGG[GC == 0] - MAGR[GC == 0], MAGG[GC == 0], color='lightgrey', label='MW', s=0.1)
    ax.scatter(MAGG[GC == 1] - MAGR[GC == 1], MAGG[GC == 1], color='r', label='Cluster', s=0.4)
    
    for i in range(len(mM_slices)):
        ax.plot(gr, g + mM_slices[i], label='m-M={:.2f}'.format(mM_slices[i]), lw=1)
    ax.set_xlim(param2['isochrone_masks'][param2['survey']]['mask_color_min'], param2['isochrone_masks'][param2['survey']]['mask_color_max'])
    ax.set_ylim(param2['isochrone_masks'][param2['survey']]['mask_mag_max'], param2['isochrone_masks'][param2['survey']]['mask_mag_min'])
    ax.set_xlabel(r'$g_0-r_0$')
    ax.set_ylabel(r'$g_0$')
    # ax.legend()
# axs[n_row, n_col].legend()
plt.tight_layout()                      
plt.show()
