In [1]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from astropy.io import fits
from astropy.table import Table
from astropy.coordinates import SkyCoord
from astropy import units as u

In [26]:
def gen_tables(z_cut = -1, m_cut = -1):
    comp_df_m = []
    recall_df_m = []
    for det_name in det_cats:
        line = {}
        line_r = {}

        sc = SkyCoord(ra=np.array(det_cats[det_name]['RA'])*u.degree, 
                      dec=np.array(det_cats[det_name]['DEC'])*u.degree, frame='icrs')

        for tr_name in true_cats:
            df = true_cats[tr_name].copy()
            if tr_name == 'rm':
                continue
            df = df[df['z'] > z_cut]
            df = df[df['M500'] > m_cut]
            df.index = np.arange(len(df))
            tr_sc = SkyCoord(ra=np.array(df['RA'])*u.degree, 
                          dec=np.array(df['DEC'])*u.degree, frame='icrs')
            idx, d2d, _ = sc.match_to_catalog_sky(tr_sc)
            matched = d2d.degree <= match_dist
            line[tr_name] = np.count_nonzero(det_cats[det_name].iloc[matched]['status'] != 'fn')
            line[tr_name+'_err'], line[tr_name+'_std'] = calc_error(det_cats[det_name], df)

            line_r[tr_name] = line[tr_name] / len(df)
        line['all'] = len(det_cats[det_name])
        line['fp'] = np.count_nonzero(det_cats[det_name]['status'] == 'fp')
        line_r['fp'] = line['fp']
        comp_df_m.append(pd.DataFrame(line, index=[det_name]))
        recall_df_m.append(pd.DataFrame(line_r, index=[det_name]))

    line = {}
    for tr_name in true_cats:
        if tr_name == 'rm':
            continue
        df = true_cats[tr_name].copy()
        df = df[df['z'] > z_cut]
        df = df[df['M500'] > m_cut]
        df.index = np.arange(len(df))
        line[tr_name] = len(df)
        line[tr_name+'_err'] = 0
    line['fp'] = 0
    line['all'] = 0
    comp_df_m.append(pd.DataFrame(line, index=['all']))
    comp_df_m = pd.concat(comp_df_m)
    recall_df_m = pd.concat(recall_df_m)
    return comp_df, recall_df

In [2]:
det_cats = {
    'pz14' : pd.read_csv('/home/rt2122/Data/detected_cats/full_pz14_thr0.1_step8.csv'),
    'pz20' : pd.read_csv('/home/rt2122/Data/detected_cats/full_pz20_thr0.1_step8.csv'),
    'pz25' : pd.read_csv('/home/rt2122/Data/detected_cats/full_pz25_thr0.1_step8.csv'),
    'pz40' : pd.read_csv('/home/rt2122/Data/detected_cats/full_pz40_thr0.1_step8.csv'),
    'pz_act10' : pd.read_csv(
    '/home/rt2122/Data/detected_cats/full_pz_act10_thr0.1_step8.csv'),
    'pz_act14' : pd.read_csv(
    '/home/rt2122/Data/detected_cats/full_pz_act14_thr0.1_step8.csv'),
    'pz_act20' : pd.read_csv('/home/rt2122/Data/detected_cats/full_pz_act20_thr0.1_step8.csv'),
    'pz_act25' : pd.read_csv('/home/rt2122/Data/detected_cats/full_pz_act25_thr0.1_step8.csv')
}

In [3]:
psz2 = None
with fits.open('/home/rt2122/Data/original_catalogs/psz2.fits') as hdul:
    data = hdul[1].data
    psz2 = Table(data).to_pandas()
mcxc = None
with fits.open('/home/rt2122/Data/original_catalogs/mcxc.fits') as hdul:
    data = hdul[1].data
    mcxc = Table(data).to_pandas()
rm = None
with fits.open('/home/rt2122/Data/original_catalogs/redmapper.fits.gz') as hdul:
    data = Table(hdul[1].data)
    names = [name for name in data.colnames if len(data[name].shape) <= 1]
    rm = data[names].to_pandas()
act = None
with fits.open('/home/rt2122/Data/original_catalogs/act.fits') as hdul:
    data = hdul[1].data
    act = Table(data).to_pandas()

In [4]:
true_cats = {'psz2' : psz2, 'mcxc' : mcxc, 'rm' : rm, 'act' : act}

In [5]:
#true_cats['mcxc'].rename({'RAdeg' : 'RA', 'DEdeg' : 'DEC'}, axis='columns', inplace=True)
#true_cats['act'].rename({'RADeg' : 'RA', 'decDeg' : 'DEC'}, axis='columns', inplace=True)
true_cats['psz2'].rename({'REDSHIFT' : 'z', 'MSZ' : 'M500'}, axis='columns', inplace=True)
true_cats['mcxc'].rename({'RAdeg' : 'RA', 'DEdeg' : 'DEC'}, axis='columns', inplace=True)
true_cats['act'].rename({'RADeg' : 'RA', 'decDeg' : 'DEC', 'redshift' : 'z'}, axis='columns', inplace=True)

In [6]:
match_dist = 5 / 60

In [18]:
comp_df = []
recall_df = []
for det_name in det_cats:
    line = {}
    line_r = {}
    
    sc = SkyCoord(ra=np.array(det_cats[det_name]['RA'])*u.degree, 
                  dec=np.array(det_cats[det_name]['DEC'])*u.degree, frame='icrs')
    
    for tr_name in true_cats: 
        tr_sc = SkyCoord(ra=np.array(true_cats[tr_name]['RA'])*u.degree, 
                      dec=np.array(true_cats[tr_name]['DEC'])*u.degree, frame='icrs')
        idx, d2d, _ = sc.match_to_catalog_sky(tr_sc)
        matched = d2d.degree <= match_dist
        line[tr_name] = np.count_nonzero(det_cats[det_name].iloc[matched]['status'] != 'fn')
        line[tr_name+'_err'], line[tr_name+'_std'] = calc_error(det_cats[det_name], true_cats[tr_name])
        
        line_r[tr_name] = line[tr_name] / len(true_cats[tr_name])
    line['all'] = len(det_cats[det_name])
    line['fp'] = np.count_nonzero(det_cats[det_name]['status'] == 'fp')
    line_r['fp'] = line['fp']
    comp_df.append(pd.DataFrame(line, index=[det_name]))
    recall_df.append(pd.DataFrame(line_r, index=[det_name]))

line = {}
for tr_name in true_cats:
    line[tr_name] = len(true_cats[tr_name])
    line[tr_name+'_err'] = 0
line['fp'] = 0
line['all'] = 0
comp_df.append(pd.DataFrame(line, index=['all']))
comp_df = pd.concat(comp_df)
recall_df = pd.concat(recall_df)

In [19]:
comp_df[['psz2', 'psz2_err', 'psz2_std', 'mcxc', 'mcxc_err', 'mcxc_std', 'rm', 'rm_err', 'act', 'act_err', 'act_std', 'all']]

Unnamed: 0,psz2,psz2_err,psz2_std,mcxc,mcxc_err,mcxc_std,rm,rm_err,act,act_err,act_std,all
pz14,1491,21.7,1.167769,725,24.15,0.79563,1242,401.3,849,67.55,1.746387,22394
pz20,1528,24.5,1.050063,740,26.2,0.961359,1306,413.55,875,74.2,2.262509,29670
pz25,1525,25.35,0.963205,747,27.7,0.85255,1374,447.55,888,77.45,2.343496,27177
pz40,1506,21.65,1.117504,739,24.7,0.909309,1279,422.9,871,73.25,1.385783,23872
pz_act10,1361,17.0,0.867543,659,18.6,0.966273,1029,292.7,926,47.75,1.725772,22142
pz_act14,1363,18.0,1.063757,670,21.2,1.087247,1211,335.3,1282,56.45,1.30278,22310
pz_act20,1217,14.95,0.856784,599,15.45,0.662829,1182,270.6,1866,51.9,2.062063,15964
pz_act25,1260,18.65,1.024117,610,17.0,1.112134,1271,291.8,2152,55.55,1.563523,21841
all,1653,0.0,,1743,0.0,,26111,0.0,4195,0.0,,0


In [20]:
recall_df

Unnamed: 0,psz2,mcxc,rm,act,fp
pz14,0.901996,0.41595,0.047566,0.202384,15828
pz20,0.92438,0.424555,0.050017,0.208582,23104
pz25,0.922565,0.428571,0.052622,0.211681,20611
pz40,0.911071,0.423982,0.048983,0.207628,17306
pz_act10,0.823351,0.378084,0.039409,0.220739,16316
pz_act14,0.824561,0.384395,0.046379,0.305602,16484
pz_act20,0.736237,0.34366,0.045268,0.444815,9398
pz_act25,0.76225,0.349971,0.048677,0.512992,15275


In [7]:
def calc_error(det_cat, true_cat, shift=15/60, match_dist=5/60, n_try=20, seed=0):
    import numpy as np
    from astropy.coordinates import SkyCoord
    from astropy import units as u
    
    error = []
    np.random.seed(seed)
    for i in range(n_try):
        det_sc = SkyCoord(ra=np.array(det_cat['RA']) * u.degree, 
                          dec=np.array(det_cat['DEC']) * u.degree, frame='icrs')
        angles = np.random.randint(0, 360, len(det_cat))
        det_sc = det_sc.directional_offset_by(angles*u.degree, shift)

        true_sc = SkyCoord(ra=np.array(true_cat['RA']) * u.degree, 
                           dec=np.array(true_cat['DEC']) * u.degree, frame='icrs')
        _, d2d, _ = det_sc.match_to_catalog_sky(true_sc)
        c_error = np.count_nonzero(d2d.degree < match_dist)
        error.append(c_error)
    error = np.array(error)
    return error.mean(), error.std() / np.sqrt(n_try - 1)

In [14]:
comp_df_m = []
recall_df_m = []
for det_name in det_cats:
    line = {}
    line_r = {}
    
    sc = SkyCoord(ra=np.array(det_cats[det_name]['RA'])*u.degree, 
                  dec=np.array(det_cats[det_name]['DEC'])*u.degree, frame='icrs')
    
    for tr_name in true_cats:
        df = true_cats[tr_name]
        if tr_name == 'rm':
            continue
        df = df[df['M500'] > 4]
        tr_sc = SkyCoord(ra=np.array(df['RA'])*u.degree, 
                      dec=np.array(df['DEC'])*u.degree, frame='icrs')
        idx, d2d, _ = sc.match_to_catalog_sky(tr_sc)
        matched = d2d.degree <= match_dist
        line[tr_name] = np.count_nonzero(det_cats[det_name].iloc[matched]['status'] != 'fn')
        line[tr_name+'_err'], line[tr_name+'_std'] = calc_error(det_cats[det_name], df)
        
        line_r[tr_name] = line[tr_name] / len(df)
    line['all'] = len(det_cats[det_name])
    line['fp'] = np.count_nonzero(det_cats[det_name]['status'] == 'fp')
    line_r['fp'] = line['fp']
    comp_df_m.append(pd.DataFrame(line, index=[det_name]))
    recall_df_m.append(pd.DataFrame(line_r, index=[det_name]))

line = {}
for tr_name in true_cats:
    if tr_name == 'rm':
        continue
    line[tr_name] = np.count_nonzero(true_cats[tr_name]['M500'] > 4)
    line[tr_name+'_err'] = 0
line['fp'] = 0
line['all'] = 0
comp_df_m.append(pd.DataFrame(line, index=['all']))
comp_df_m = pd.concat(comp_df_m)
recall_df_m = pd.concat(recall_df_m)

In [12]:
comp_df_m #M500 > 5

Unnamed: 0,psz2,psz2_err,psz2_std,mcxc,mcxc_err,mcxc_std,act,act_err,act_std,all,fp
pz14,486,7.95,0.613339,118,2.1,0.289282,142,2.7,0.370632,22394,15828
pz20,489,8.65,0.637739,117,2.3,0.341051,144,2.85,0.405716,29670,23104
pz25,489,6.75,0.458114,116,1.35,0.243602,145,1.9,0.339504,27177,20611
pz40,485,6.15,0.514398,118,2.1,0.390007,142,2.2,0.359825,23872,17306
pz_act10,463,5.65,0.482728,118,1.6,0.222427,133,1.8,0.224781,22142,16316
pz_act14,466,6.0,0.533114,118,1.1,0.239517,136,1.8,0.344887,22310,16484
pz_act20,448,5.0,0.39736,115,1.1,0.239517,135,1.55,0.256238,15964,9398
pz_act25,455,5.75,0.415964,116,1.55,0.2112,138,2.1,0.339504,21841,15275
all,490,0.0,,137,0.0,,153,0.0,,0,0


In [13]:
recall_df_m #M500 > 5

Unnamed: 0,psz2,mcxc,act,fp
pz14,0.991837,0.861314,0.928105,15828
pz20,0.997959,0.854015,0.941176,23104
pz25,0.997959,0.846715,0.947712,20611
pz40,0.989796,0.861314,0.928105,17306
pz_act10,0.944898,0.861314,0.869281,16316
pz_act14,0.95102,0.861314,0.888889,16484
pz_act20,0.914286,0.839416,0.882353,9398
pz_act25,0.928571,0.846715,0.901961,15275


In [15]:
comp_df_m #M500 > 4

Unnamed: 0,psz2,psz2_err,psz2_std,mcxc,mcxc_err,mcxc_std,act,act_err,act_std,all,fp
pz14,702,11.05,0.844409,224,3.7,0.370632,260,5.35,0.595045,22394,15828
pz20,711,12.25,0.873212,222,4.4,0.520121,260,6.7,0.677068,29670,23104
pz25,710,10.1,0.475173,220,3.4,0.284697,268,5.65,0.442927,27177,20611
pz40,705,9.15,0.693105,221,3.85,0.52453,264,5.7,0.487205,23872,17306
pz_act10,660,8.25,0.537416,219,2.6,0.343358,249,3.05,0.320156,22142,16316
pz_act14,667,8.3,0.538516,222,3.0,0.410391,267,3.8,0.45073,22310,16484
pz_act20,634,7.2,0.634947,215,2.0,0.369922,275,4.2,0.374166,15964,9398
pz_act25,644,8.4,0.563822,217,2.6,0.284697,283,5.15,0.581627,21841,15275
all,713,0.0,,264,0.0,,342,0.0,,0,0


In [16]:
recall_df_m #M500 > 4

Unnamed: 0,psz2,mcxc,act,fp
pz14,0.984572,0.848485,0.760234,15828
pz20,0.997195,0.840909,0.760234,23104
pz25,0.995792,0.833333,0.783626,20611
pz40,0.98878,0.837121,0.77193,17306
pz_act10,0.925666,0.829545,0.72807,16316
pz_act14,0.935484,0.840909,0.780702,16484
pz_act20,0.889201,0.814394,0.804094,9398
pz_act25,0.903226,0.82197,0.827485,15275


In [23]:
comp_df_m = []
recall_df_m = []
for det_name in det_cats:
    line = {}
    line_r = {}
    
    sc = SkyCoord(ra=np.array(det_cats[det_name]['RA'])*u.degree, 
                  dec=np.array(det_cats[det_name]['DEC'])*u.degree, frame='icrs')
    
    for tr_name in true_cats:
        df = true_cats[tr_name]
        if tr_name == 'rm':
            continue
        df = df[df['z'] > 0.5]
        df = df[df['M500'] > 4]
        df.index = np.arange(len(df))
        tr_sc = SkyCoord(ra=np.array(df['RA'])*u.degree, 
                      dec=np.array(df['DEC'])*u.degree, frame='icrs')
        idx, d2d, _ = sc.match_to_catalog_sky(tr_sc)
        matched = d2d.degree <= match_dist
        line[tr_name] = np.count_nonzero(det_cats[det_name].iloc[matched]['status'] != 'fn')
        line[tr_name+'_err'], line[tr_name+'_std'] = calc_error(det_cats[det_name], df)
        
        line_r[tr_name] = line[tr_name] / len(df)
    line['all'] = len(det_cats[det_name])
    line['fp'] = np.count_nonzero(det_cats[det_name]['status'] == 'fp')
    line_r['fp'] = line['fp']
    comp_df_m.append(pd.DataFrame(line, index=[det_name]))
    recall_df_m.append(pd.DataFrame(line_r, index=[det_name]))

line = {}
for tr_name in true_cats:
    if tr_name == 'rm':
        continue
    df = true_cats[tr_name]
    df = df[df['z'] > 0.5]
    df = df[df['M500'] > 4]
    df.index = np.arange(len(df))
    line[tr_name] = len(df)
    line[tr_name+'_err'] = 0
line['fp'] = 0
line['all'] = 0
comp_df_m.append(pd.DataFrame(line, index=['all']))
comp_df_m = pd.concat(comp_df_m)
recall_df_m = pd.concat(recall_df_m)

In [18]:
comp_df_m #z > 0.6

Unnamed: 0,psz2,psz2_err,psz2_std,mcxc,mcxc_err,mcxc_std,act,act_err,act_std,all,fp
pz14,33,0.6,0.197351,6,0.25,0.09934,141,27.0,1.23757,22394,15828
pz20,35,0.7,0.206474,5,0.5,0.153897,142,29.2,1.321084,29670,23104
pz25,35,0.35,0.15,5,0.45,0.184605,154,32.35,1.464088,27177,20611
pz40,34,0.45,0.153469,7,0.5,0.153897,151,28.8,1.079961,23872,17306
pz_act10,30,0.25,0.09934,4,0.15,0.081918,184,19.65,1.093702,22142,16316
pz_act14,31,0.35,0.109424,4,0.3,0.105131,298,22.2,0.950346,22310,16484
pz_act20,28,0.35,0.131289,2,0.25,0.12301,467,19.6,1.38678,15964,9398
pz_act25,29,0.5,0.170139,3,0.4,0.133771,552,23.55,0.969197,21841,15275
all,35,0.0,,29,0.0,,1634,0.0,,0,0


In [19]:
recall_df_m #z > 0.6

Unnamed: 0,psz2,mcxc,act,fp
pz14,0.942857,0.206897,0.086291,15828
pz20,1.0,0.172414,0.086903,23104
pz25,1.0,0.172414,0.094247,20611
pz40,0.971429,0.241379,0.092411,17306
pz_act10,0.857143,0.137931,0.112607,16316
pz_act14,0.885714,0.137931,0.182375,16484
pz_act20,0.8,0.068966,0.285802,9398
pz_act25,0.828571,0.103448,0.337821,15275


In [24]:
comp_df_m #z > 0.5 M500 > 4

Unnamed: 0,psz2,psz2_err,psz2_std,mcxc,mcxc_err,mcxc_std,act,act_err,act_std,all,fp
pz14,94,1.4,0.327671,11,0.15,0.081918,72,1.8,0.367065,22394,15828
pz20,98,1.65,0.232549,11,0.15,0.081918,76,2.65,0.326666,29670,23104
pz25,98,1.2,0.186378,11,0.05,0.05,81,2.45,0.28539,27177,20611
pz40,95,1.5,0.28562,11,0.3,0.127733,81,2.2,0.432861,23872,17306
pz_act10,85,1.0,0.205196,11,0.15,0.081918,68,1.05,0.223312,22142,16316
pz_act14,86,1.2,0.186378,11,0.1,0.068825,82,1.4,0.244949,22310,16484
pz_act20,78,1.4,0.210263,9,0.2,0.091766,91,1.9,0.21643,15964,9398
pz_act25,81,1.0,0.217643,11,0.05,0.05,97,2.05,0.256238,21841,15275
all,97,0.0,,13,0.0,,132,0.0,,0,0


In [25]:
recall_df_m #z > 0.5 M500 > 4

Unnamed: 0,psz2,mcxc,act,fp
pz14,0.969072,0.846154,0.545455,15828
pz20,1.010309,0.846154,0.575758,23104
pz25,1.010309,0.846154,0.613636,20611
pz40,0.979381,0.846154,0.613636,17306
pz_act10,0.876289,0.846154,0.515152,16316
pz_act14,0.886598,0.846154,0.621212,16484
pz_act20,0.804124,0.692308,0.689394,9398
pz_act25,0.835052,0.846154,0.734848,15275
