In [None]:
import matplotlib.pyplot as plt
import numpy as np
import warnings, json, astropy, os
import astropy.io.fits as fits
from astropy.io.fits import getdata
import astropy.units as u
from astropy.coordinates import SkyCoord
warnings.filterwarnings("ignore")
from qa_gawa import plot_pure, plot_comp, full_completeness_distances
import matplotlib 
#matplotlib.rc('xtick', labelsize=20) 
#matplotlib.rc('ytick', labelsize=20) 
font = {'family' : 'normal',
        'weight' : 'normal',
        'size'   : 12}

matplotlib.rc('font', **font)
# plt.rcParams["figure.figsize"] = (10,8)
# plt.style.use(['science' , 'notebook', 'grid'])

## Introduction

In this jupyter notebook we are working with simulated cluster detections.
There is a table of simulated clusters that were entered into a stellar field, and another table
of detections, where a positional match has already been made between the simulated and detected clusters.

In this round, 66 globular clusters with exponential density profile were simulated,
where each cluster is located at the center of a HealPix pixel with nside=64,
with the distance between one cluster and another ~1 degree and a half-light radius of ~1 arcmin.

Below is information about detections only.

Run this jupyter notebook in the LIneA env with the following command:
<br>
`jupyter nbconvert --execute --to html --EmbedImagesPreprocessor.embed_images=True qa_gawa.ipynb`
<br>
and after the command has finished, run the following cell:
<br>
 cp qa_gawa.html ~/public_html/gawa_processes/00010/detections/qa
<br>
where 00010 is the process number.



In [None]:
# Main settings:
confg = "qa_gawa.json"

# read config file
with open(confg) as fstream:
    param = json.load(fstream)

globals().update(param)

# Creating folder of QA results
os.system("mkdir -p " + input_detection_path + "/qa")


## Matching detections and simulations
### Reading data
Reading data from detections and simulations:

In [None]:
det_file = input_detection_path + '/clusters.fits'
data_det = getdata(det_file)
ra_det = data_det["ra"]
dec_det = data_det["dec"]

slices_file = input_detection_path + '/dslices.fits'
data_sl = getdata(slices_file)
d_slices_pc = data_sl["dist_pc"]
mM_slices = 5 * np.log10(d_slices_pc) - 5.

bin_size_mM = mM_slices[1] - mM_slices[0]
bins_mM = np.linspace(mM_slices[0] - bin_size_mM / 2, mM_slices[-1] + bin_size_mM / 2, len(mM_slices) + 1, endpoint=True)
print(bins_mM)

f_sim = open(input_simulation_path + '/star_clusters_simulated.dat', 'r')
data_sim = f_sim.readlines()[1:]

file_sim = input_simulation_path + '/star_clusters_simulated.dat'
ra_sim, dec_sim, SNR_sim_all = np.loadtxt(file_sim, usecols=(9, 10, 6), unpack=True)

### Matching with astropy search around sky function

In [None]:
C_sim = SkyCoord(ra=ra_sim*u.degree, dec=dec_sim*u.degree)
C_det = SkyCoord(ra=ra_det*u.degree, dec=dec_det*u.degree)

idx_sim, idx_det, d2d, d3d = C_det.search_around_sky(C_sim, 1*u.arcmin)

idx_det_outliers = [i for i in range(len(data_det)) if i not in idx_det]

file_match = open(match_file, 'w')
print('#0-peak_id 1-ra 2-dec 3-iobj 4-jobj 5-dist_init_kpc 6-dist_err_kpc 7-dist_min_kpc 8-dist_max_kpc 9-coverfrac 10-coverfrac_bkg 11-wradius_arcmin 12-snr 13-Naper 14-Naper_tot 15-NWaper_tot 16-Naper_bkg 17-icyl 18-tile 19-slice 20-id_in_tile 21-id 22-HPX64 23-N 24-MV 25-SNR 26-N_f 27-MV_f 28-SNR_f 29-L 30-B 31-ra 32-dec 33-r_exp 34-ell 35-pa 36-mass 37-dist', file=file_match)

for i,j in zip(idx_sim, idx_det):
    print(*data_det[:][j], data_sim[i], sep=' ', file=file_match, end='')

for i in (idx_det_outliers):
    print(*data_det[i], ' -99.999 ' * len(data_sim[1].split()), sep=' ', file=file_match, end='\n')

file_match.close()

In [None]:
ra_det, dec_det, dist_init_kpc_det, dist_err_kpc_det, \
HPX64, SNR_det, SNR_sim, wrad_arcmin_det, exp_rad_sim, dist_sim, M_V_sim_det = np.loadtxt(match_file,
                                             usecols=(1, 2, 5, 6, 22, 12, 28, 11, 33, 37, 24), unpack=True)


Below, a conditional is created where the clusters matched the simulated (confirmed)
and where clusters are just candidates.

In [None]:
real_det = (SNR_sim > 0.)
false_positive = (SNR_sim <= 0.)

Below, a conditional is created where the clusters matched the simulated (confirmed)
and where clusters are just candidates.

In [None]:
cm = plt.cm.get_cmap('copper_r')

fig = plt.figure(figsize=(16, 10))
plt.scatter(ra_sim, dec_sim, c=SNR_sim_all,
            vmin=0, vmax=np.max(SNR_det),
            cmap=cm,
            s=100.0, marker='^', label='Simulations')
sc = plt.scatter(ra_det[real_det], dec_det[real_det], c=SNR_det[real_det],
                 vmin=0, vmax=np.max(SNR_det), marker='x',
                 s=150., cmap=cm, label='Simulated and detected')
plt.scatter(ra_det[false_positive], dec_det[false_positive], c=SNR_det[false_positive],
            s=200.0, cmap=cm, lw=2, alpha=0.75, label='Not matched')
plt.colorbar(sc,label = 'SNR detection')
plt.xlim(np.max(ra_sim)+0.5, np.min(ra_sim)-0.5)
plt.ylim(np.min(dec_sim)-0.5, np.max(dec_sim)+1.0)
plt.xlabel('RA')
plt.ylabel('DEC')
plt.title('Spatial distribution of clusters')
plt.legend(loc=1)
plt.show()

In [None]:
fig = plt.figure(figsize=(16, 10))
plt.scatter(ra_sim, dec_sim, c=SNR_sim_all,
            vmin=0, vmax=np.max(SNR_det),
            cmap=cm,
            s=100.0, marker='^', label='Simulations')
cond = (real_det)&(SNR_det > 5)
sc = plt.scatter(ra_det[cond], dec_det[cond], c=SNR_det[cond],
                 vmin=5, vmax=np.max(SNR_det[cond]), marker='x',
                 s=150., cmap=cm, label='Simulated and detected')
plt.scatter(ra_det[false_positive&cond], dec_det[false_positive&cond], c=SNR_det[false_positive&cond],
            s=200.0, cmap=cm, lw=2, alpha=0.75, label='Not matched')
plt.colorbar(sc,label = 'SNR detection')
plt.xlim(np.max(ra_sim)+0.5, np.min(ra_sim)-0.5)
plt.ylim(np.min(dec_sim)-0.5, np.max(dec_sim)+1.0)
plt.xlabel('RA')
plt.ylabel('DEC')
plt.title('Spatial distribution of clusters with SNR > 5')
plt.legend(loc=1)
plt.show()

In [None]:
fig = plt.figure(figsize=(16, 10))
plt.scatter(ra_sim, dec_sim, c=SNR_sim_all, vmin=0, vmax=np.max(SNR_det), cmap=cm, s=100.0, marker='^', label='Simulations')
cond = (real_det)&(SNR_det > 10)
sc = plt.scatter(ra_det[cond], dec_det[cond], c=SNR_det[cond], vmin=10, vmax=np.max(SNR_det[cond]), marker='x',
                 s=150., cmap=cm, label='Simulated and detected')
plt.scatter(ra_det[false_positive&cond], dec_det[false_positive&cond], c=SNR_det[false_positive&cond],
            s=200.0, cmap=cm, lw=2, alpha=0.75, label='Not matched')
plt.colorbar(sc, label = 'SNR detection')
plt.xlim(np.max(ra_sim) + 0.5, np.min(ra_sim)-0.5)
plt.ylim(np.min(dec_sim) - 0.5, np.max(dec_sim)+1.0)
plt.xlabel('RA')
plt.ylabel('DEC')
plt.title('Spatial distribution of clusters with SNR > 10')
plt.legend(loc=1)
plt.show()

We can notice that all detected and true clusters (True Positives) have high signal-to-noise ratio (SNR),
while those with low SNR are false positives (FP), represented by small blue circles.

It is important to highlight in this case that all simulated clusters were detected. There may be cases where
not all simulated clusters are detected. In this case, the code should read the clusters
simulated again to see how complete the detection is.

## Purity of detection modulus distance

Below we will calculate the detection purity given the detected distance.

In [None]:
# 0-peak_id 1-ra 2-dec 3-iobj 4-jobj 5-dist_init_kpc 6-dist_err_kpc 7-dist_min_kpc 8-dist_max_kpc 9-coverfrac
# 10-coverfrac_bkg 11-wradius_arcmin 12-snr 13-Naper 14-Naper_tot 15-NWaper_tot 16-Naper_bkg 17-icyl 18-tile 19-slice
# 20-id_in_tile 21-id 22-HPX64 23-N 24-MV 25-SNR 26-N_f 27-MV_f 28-SNR_f 29-L
# 30-B 31-ra 32-dec 33-r_exp 34-ell 35-pa 36-mass 37-dist'
dist_kpc_det, disterr_kpc_det = np.loadtxt(match_file, usecols=(5, 6), unpack=True)

m_M_det = 5 * np.log10(dist_kpc_det) + 10.

plot_pure(m_M_det, m_M_det[real_det], 'Detection distance module', 'Detection distance module(pureness)', bins_mM)

In [None]:
plot_pure(SNR_det, SNR_det[real_det], 'Signal-to-noise ratio (detection)', 'Signal-to-noise ratio(pureness)')

In [None]:
ipix, Nstar, M_V, SNR, L, B, RA_pix, DEC_pix, r_exp_pc, ell, pa, mass, dist = np.loadtxt(input_simulation_path + '/star_clusters_simulated.dat',
                                                                                         usecols=(0, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15),
                                                                                         unpack=True)

plot_comp(M_V, idx_sim, 'M_V', 'Absolute Magnitude in V band')

In [None]:
plot_comp(dist, idx_sim, 'r (pc) simulated', 'Distance (simulations)')

In [None]:
plot_comp(SNR, idx_sim, 'SNR', 'Signal to Noise Ratio')

In [None]:
mM_sim = 5 * np.log10(dist) - 5.

plot_comp(mM_sim, idx_sim, 'm-M', 'Distance modulus')

### More plots

In [None]:
# Convert to function:
half_light_radius_arcmin = 1.7 * 60. * np.rad2deg(np.arctan(exp_rad_sim / dist_sim))

fig = plt.figure(figsize=(16, 10))
plt.scatter(half_light_radius_arcmin, wrad_arcmin_det / 4, c='k', marker='s')
plt.plot(np.linspace(0., 1.1 * np.max(wrad_arcmin_det), 4), np.linspace(0., 1.1 * np.max(wrad_arcmin_det), 4), color = 'r')
plt.xlabel(r'$r_{1/2}$ arcmin (simulations)')
plt.ylabel(r'wrad arcmin (detection)')
plt.xlim([0, 1.1*np.max(wrad_arcmin_det / 4)])
plt.ylim([0, 1.1*np.max(wrad_arcmin_det / 4)])
plt.show()

In [None]:
fig = plt.figure(figsize=(16, 10))
plt.scatter(SNR_sim, SNR_det)
plt.plot(np.linspace(0., 1.1 * max(np.max(SNR_sim),np.max(SNR_det)), 4),
        np.linspace(0., 1.1 * max(np.max(SNR_sim),np.max(SNR_det)), 4), color = 'r')
plt.xlabel('SNR (simulations)')
plt.ylabel('SNR (detections)')
plt.xlim([0.1, 1.05 * max(np.max(SNR_sim),np.max(SNR_det))])
plt.ylim([0.1, 1.05 * max(np.max(SNR_sim),np.max(SNR_det))])
plt.show()

In [None]:
dist_sim_kpc = dist_sim / 1000
fig = plt.figure(figsize=(16, 10))
plt.errorbar(dist_sim_kpc, dist_init_kpc_det, yerr=dist_err_kpc_det, xerr=None,
             fmt='o', c='k')
plt.plot(np.linspace(0.8 * min(np.min(dist_sim_kpc),np.min(dist_init_kpc_det)), 1.2 * max(np.max(dist_sim_kpc),np.max(dist_init_kpc_det)), 4),
        np.linspace(0.8 * min(np.min(dist_sim_kpc),np.min(dist_init_kpc_det)), 1.2 * max(np.max(dist_sim_kpc),np.max(dist_init_kpc_det)), 4), color = 'r')
plt.xlim([0.8 * min(np.min(dist_sim_kpc),np.min(dist_init_kpc_det)), 1.2 * max(np.max(dist_sim_kpc),np.max(dist_init_kpc_det))])
plt.ylim([0.8 * min(np.min(dist_sim_kpc),np.min(dist_init_kpc_det)), 1.2 * max(np.max(dist_sim_kpc),np.max(dist_init_kpc_det))])
plt.title('Comparing recovery distances')
plt.xlabel('Distances (kpc) from simulations')
plt.ylabel('Distances (kpc) from detections')
plt.show()



In [None]:
# 0-peak_id 1-ra 2-dec 3-iobj 4-jobj 5-dist_init_kpc 6-dist_err_kpc 7-dist_min_kpc 8-dist_max_kpc 9-coverfrac
# 10-coverfrac_bkg 11-wradius_arcmin 12-snr 13-Naper 14-Naper_tot 15-NWaper_tot 16-Naper_bkg 17-icyl 18-tile 19-slice
# 20-id_in_tile 21-id 22-HPX64 23-N 24-MV 25-SNR 26-N_f 27-MV_f 28-SNR_f 29-L
# 30-B 31-ra 32-dec 33-r_exp 34-ell 35-pa 36-mass 37-dist'
exp_rad_sim_det, M_V_sim_det, dist_sim_det = np.loadtxt(match_file, usecols=(33, 27, 37), unpack=True)

full_completeness_distances(M_V, M_V_sim_det, 1.7 * r_exp_pc, 1.7 * exp_rad_sim_det, dist, dist_sim_det)