In [None]:
from matplotlib import pyplot as plt
from pandas import pandas as pd
import numpy as np
import warnings, json, astropy
import astropy.io.fits as fits
from astropy.io.fits import getdata
import astropy.units as u
from astropy.coordinates import SkyCoord
warnings.filterwarnings("ignore")
# plt.rcParams["figure.figsize"] = (10,8)
# plt.style.use(['science' , 'notebook', 'grid'])

## Introduction

In this jupyter notebook we are working with simulated cluster detections.
There is a table of simulated clusters that were entered into a stellar field, and another table
of detections, where a positional match has already been made between the simulated and detected clusters.

In this round, 66 globular clusters with exponential density profile were simulated,
where each cluster is located at the center of a HealPix pixel with nside=64,
with the distance between one cluster and another ~1 degree and a half-light radius of ~1 arcmin.

Below is information about detections only.

In [None]:
# Main settings:
confg = "qa_gawa.json"

# read config file
with open(confg) as fstream:
    param = json.load(fstream)

globals.update(param)

# Diretório para os resultados
os.system("mkdir -p " + input_detection_path + "/qa")

file_match = open(os.system("mkdir -p " + input_detection_path + "/qa/match.dat"), 'w')

In [None]:
Reading data from detections and simulations:

In [None]:
det_file = input_detection_path + '/clusters.fits'

data_det = getdata(det_file)
ra_det = data_det["ra"]
dec_det = data_det["dec"]

#0-HPX64 1-N 2-MV 3-SNR 4-N_f 5-MV_f 6-SNR_f 7-L 8-B 9-ra 10-dec 11-r_exp 12-ell 13-pa 14-mass 15-dist
data_sim = np.loadtxt(input_detection_path + '/star_clusters_simulated.dat', unpack=True)

ra_sim, dec_sim = np.loadtxt(input_detection_path + '/star_clusters_simulated.dat', usecols=(9, 10), unpack=True)

In [None]:
C_sim = SkyCoord(ra=ra_sim*u.degree, dec=dec_sim*u.degree)
C_det = SkyCoord(ra=ra_det*u.degree, dec=dec_det*u.degree)

idx_sim, idx_det, d2d, d3d = C_det.search_around_sky(C_sim, 1*u.arcmin)

idx_det_outliers = [i for i in range(len(data_sim)) if i not in idx_det]

for i,j in zip(idx_sim, idx_det):
    print(data_det[j], data_sim[i], file=file_match)

for i in (idx_det_outliers):
    print(data_det[i], np.repeat(' -99.999 ' * len(data_sim[0])), file=file_match)

file_match.close()

Name, Class = np.loadtxt('detections.txt', usecols=(8, 11), dtype=str, unpack=True)

In [None]:
N, HPX64, N0_order = np.loadtxt('detections.txt', usecols=(0, 15, 19), dtype=int, unpack=True)

Below, a conditional is created where the clusters matched the simulated (confirmed)
and where clusters are just candidates.

In [None]:
real_det = (Class == 'conf')
false_positive = (Class == 'cand')

Below, a conditional is created where the clusters matched the simulated (confirmed)
and where clusters are just candidates.

In [None]:
cm = plt.cm.get_cmap('inferno_r')
plt.figure(figsize=(10, 6))
sc = plt.scatter(RA, DEC, c= SNR, vmin=0, vmax=np.max(SNR),  s=SNR, cmap=cm, alpha=0.75)
plt.colorbar(sc,label = 'SNR')
plt.xlim(np.max(RA), np.min(RA))
plt.xlabel('RA')
plt.ylabel('DEC')
plt.title('Spatial distribution of clusters detected by Gawa wrt SNR')

We can notice that all detected and true clusters (True Positives) have high signal-to-noise ratio (SNR),
while those with low SNR are false positives (FP), represented by small blue circles.

It is important to highlight in this case that all simulated clusters were detected. There may be cases where
not all simulated clusters are detected. In this case, the code should read the clusters
simulated again to see how complete the detection is.

## Purity of detection distance

Below we will calculate the detection purity given the detected distance.

In [None]:
def plot_pure(arg_all, arg_conf, label, title):
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16,6))
    A = ax1.hist(arg_all, bins=20, range=(np.min(arg_all), np.max(arg_all)), histtype='step', label='All detections')
    B = ax1.hist(arg_conf, bins=20, range=(np.min(arg_all), np.max(arg_all)), histtype='step', label='True clusters')
    pureness = B[0] / A[0]
    ax1.set_xlabel(label)
    ax1.set_ylabel( 'Number of clusters detected')
    ax1.legend()
    
    plt.step(A[1][0:-1], np.nan_to_num(pureness), ':r', label='NaN=0.')
    ax2.step(A[1][0:-1],pureness, label='Data', color='k')
    ax2.set_xlabel(label)
    ax2.set_ylabel('Puriness')
    ax2.set_ylim([0,1.2])
    ax2.legend()
    fig.suptitle(title)
    plt.show()

In [None]:
plot_pure(DistDet_kpc, DistDet_kpc_conf , 'Distance of detection (kpc)', 'Distance of detection (pureness)')

In [None]:
plot_pure(m_M_det, m_M_det_conf , 'Detection distance module', 'Detection distance module(pureness)')

In [None]:
plot_pure(SNR, SNR_conf, 'Signal-to-noise ratio (detection)', 'Signal-to-noise ratio(pureness)')

In [None]:
plot_pure(wave_peak, wave_peak_conf , 'Detection wave peak', 'Detection wave peak (pureness)')

In [None]:
plot_pure(SNR_rank, SNR_rank_conf , 'Detection information', 'Detection information (pureness)')

In [None]:
plot_pure(Wave_r3, Wave_r3_conf , 'Detection information', 'Detection information (pureness)')

In [None]:
plot_pure(SNR_rank, SNR_rank , 'Detection information', 'Detection information (pureness)')

In [None]:
Nstar, M_V, SNR, L, B, RA_pix, DEC_pix, r_exp, ellpa, mass, dist = np.loadtxt('simulations.txt', usecols=(2,3,4,5,6,7,8,9,10,11,12) , unpack=True)