In [1]:
import numpy as np
import pandas as pd
import os
from matplotlib import pyplot as plt
from astropy.io import fits
from astropy.table import Table
from astropy.coordinates import SkyCoord
from astropy import units as u

In [2]:
def get_name(file):
    if 'pz_act' in file:
        return file[5:13]
    return file[5:9]

In [3]:
def calc_error(det_cat, true_cat, shift=15/60, match_dist=5/60, n_try=20, seed=0):
    import numpy as np
    from astropy.coordinates import SkyCoord
    from astropy import units as u
    
    error = []
    np.random.seed(seed)
    for i in range(n_try):
        det_sc = SkyCoord(ra=np.array(det_cat['RA']) * u.degree, 
                          dec=np.array(det_cat['DEC']) * u.degree, frame='icrs')
        angles = np.random.randint(0, 360, len(det_cat))
        det_sc = det_sc.directional_offset_by(angles*u.degree, shift)

        true_sc = SkyCoord(ra=np.array(true_cat['RA']) * u.degree, 
                           dec=np.array(true_cat['DEC']) * u.degree, frame='icrs')
        _, d2d, _ = det_sc.match_to_catalog_sky(true_sc)
        c_error = np.count_nonzero(d2d.degree < match_dist)
        error.append(c_error)
    error = np.array(error)
    return error.mean(), error.std() / np.sqrt(n_try - 1)

In [4]:
def gen_tables(det_cats_files, true_cats_files, z_cut = -1, m_cut=-1, b_cut=None, match_dist=5/60):
    
    true_cats = {os.path.splitext(os.path.basename(file))[0] : pd.read_csv(file) for file in true_cats_files}
    det_cats = {get_name(os.path.splitext(os.path.basename(file))[0]) : 
                pd.read_csv(file) for file in det_cats_files}
    
    comp_df = []
    recall_df = []
    
    for det_name in det_cats:
        det = det_cats[det_name]
        det = det[det['status'] != 'fn']
        det.index = np.arange(len(det))
        line = {}
        line_r = {}

        det_sc = SkyCoord(ra=np.array(det['RA'])*u.degree, 
                      dec=np.array(det['DEC'])*u.degree, frame='icrs')
        det['b'] = det_sc.galactic.b.degree
        
        if not(b_cut is None):
            det = det[np.abs(det['b']) > b_cut]
            det.index = np.arange(len(det))
            det_sc = SkyCoord(ra=np.array(det['RA'])*u.degree, 
                          dec=np.array(det['DEC'])*u.degree, frame='icrs')
            

        for tr_name in true_cats:
            
            if tr_name == 'RM' and (z_cut > -1 or m_cut > -1):
                continue
            tr = true_cats[tr_name]
            if z_cut > -1:    
                tr = tr[tr['z'] > z_cut]
            if m_cut > -1:
                tr = tr[tr['M500'] > m_cut]
                
            tr_sc = SkyCoord(ra=np.array(tr['RA'])*u.degree, 
                          dec=np.array(tr['DEC'])*u.degree, frame='icrs')
            tr['b'] = tr_sc.galactic.b.degree
            true_cats[tr_name] = tr
            if not(b_cut is None):
                tr = tr[np.abs(tr['b']) > b_cut]
                tr.index = np.arange(len(tr))
                tr_sc = SkyCoord(ra=np.array(tr['RA'])*u.degree, 
                              dec=np.array(tr['DEC'])*u.degree, frame='icrs')
            
            tr.index = np.arange(len(tr))
            
            idx, d2d, _ = det_sc.match_to_catalog_sky(tr_sc)
            matched = d2d.degree <= match_dist
            
            line[tr_name] = np.count_nonzero(matched)
            line[tr_name+'_err'], line[tr_name+'_std'] = calc_error(det, tr)

            line_r[tr_name] = line[tr_name] / len(tr)
            
        line['all'] = len(det)
        line['fp'] = np.count_nonzero(det['status'] == 'fp')
        line_r['fp'] = line['fp']
        line_r['all'] = line['all']
        comp_df.append(pd.DataFrame(line, index=[det_name]))
        recall_df.append(pd.DataFrame(line_r, index=[det_name]))
        line = {}
    
    for tr_name in true_cats:
        if tr_name == 'RM' and (z_cut > -1 or m_cut > -1):
            continue
        tr = true_cats[tr_name]
        if z_cut > -1:    
            tr = tr[tr['z'] > z_cut]
        if m_cut > -1:
            tr = tr[tr['M500'] > m_cut] 
        tr.index = np.arange(len(tr))
        
        line[tr_name] = len(tr)
        line[tr_name+'_err'] = 0
    line['fp'] = 0
    line['all'] = 0
    comp_df.append(pd.DataFrame(line, index=['all']))
    
    comp_df = pd.concat(comp_df)
    recall_df = pd.concat(recall_df)
    
    return comp_df, recall_df

In [5]:
tr_dir = '/home/rt2122/Data/original_catalogs/csv/'
true_cats_files = [os.path.join(tr_dir, name) for name in next(os.walk(tr_dir))[-1]]
det_dir = '/home/rt2122/Data/detected_cats/'
det_cats_files = [os.path.join(det_dir, name) for name in sorted(next(os.walk(det_dir))[-1]) if 'full' in name]
#det_cats_files = ['/home/rt2122/Data/detected_cats/full_pz14_thr0.1_step8.csv', 
#                 '/home/rt2122/Data/detected_cats/full_pz_act10_thr0.1_step8.csv']
comp, recall = gen_tables(det_cats_files, true_cats_files)
comp

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  det['b'] = det_sc.galactic.b.degree
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  det['b'] = det_sc.galactic.b.degree


Unnamed: 0,PSZ2,PSZ2_err,PSZ2_std,MCXC,MCXC_err,MCXC_std,RM,RM_err,RM_std,ACT,ACT_err,ACT_std,all,fp
pz14,1491,15.75,1.0733,725,17.95,0.838153,1242,298.75,3.305398,849,43.25,1.389576,17968,15828
pz20,1528,20.3,1.0763,740,20.55,0.910393,1306,311.9,5.114427,875,50.3,1.592912,25312,23104
pz25,1525,19.2,0.741975,747,20.15,0.99809,1374,341.0,4.090425,888,53.65,1.351948,22837,20611
pz40,1506,18.15,1.103285,739,20.2,0.878396,1279,328.65,3.937856,871,49.9,1.470589,19495,17306
pz_act10,1361,13.2,0.936398,659,13.65,0.785644,1029,206.6,3.642873,926,30.65,1.076727,18363,16316
pz_act14,1363,15.1,0.953939,670,13.8,0.902044,1211,260.45,3.811668,1282,39.75,1.305605,18878,16484
pz_act20,1217,10.3,0.69623,599,10.5,0.626183,1182,191.3,3.585864,1866,33.55,1.298734,12239,9398
pz_act25,1260,14.0,0.954215,610,13.4,1.003677,1271,208.55,2.953566,2152,37.9,1.454394,18447,15275
all,1653,0.0,,1743,0.0,,26111,0.0,,4195,0.0,,0,0


In [6]:
recall

Unnamed: 0,PSZ2,MCXC,RM,ACT,fp,all
pz14,0.901996,0.41595,0.047566,0.202384,15828,17968
pz20,0.92438,0.424555,0.050017,0.208582,23104,25312
pz25,0.922565,0.428571,0.052622,0.211681,20611,22837
pz40,0.911071,0.423982,0.048983,0.207628,17306,19495
pz_act10,0.823351,0.378084,0.039409,0.220739,16316,18363
pz_act14,0.824561,0.384395,0.046379,0.305602,16484,18878
pz_act20,0.736237,0.34366,0.045268,0.444815,9398,12239
pz_act25,0.76225,0.349971,0.048677,0.512992,15275,18447


In [7]:
comp, recall = gen_tables(det_cats_files, true_cats_files, m_cut=4)
comp

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  det['b'] = det_sc.galactic.b.degree
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tr['b'] = tr_sc.galactic.b.degree


Unnamed: 0,PSZ2,PSZ2_err,PSZ2_std,MCXC,MCXC_err,MCXC_std,ACT,ACT_err,ACT_std,all,fp
pz14,702,7.65,0.726473,224,2.5,0.380443,260,3.2,0.401314,17968,15828
pz20,711,9.35,0.662034,222,3.95,0.45,260,5.0,0.45306,25312,23104
pz25,710,8.3,0.487205,220,3.3,0.317059,268,4.35,0.519489,22837,20611
pz40,705,9.15,0.77553,221,3.25,0.452333,264,5.2,0.484496,19495,17306
pz_act10,660,5.75,0.648379,219,1.95,0.358909,249,2.0,0.316228,18363,16316
pz_act14,667,6.55,0.642671,222,2.3,0.333246,267,3.45,0.505106,18878,16484
pz_act20,634,4.5,0.380443,215,1.75,0.29802,275,2.4,0.335606,12239,9398
pz_act25,644,5.65,0.53447,217,2.3,0.370632,283,3.3,0.333246,18447,15275
all,713,0.0,,264,0.0,,342,0.0,,0,0


In [8]:
recall

Unnamed: 0,PSZ2,MCXC,ACT,fp,all
pz14,0.984572,0.848485,0.760234,15828,17968
pz20,0.997195,0.840909,0.760234,23104,25312
pz25,0.995792,0.833333,0.783626,20611,22837
pz40,0.98878,0.837121,0.77193,17306,19495
pz_act10,0.925666,0.829545,0.72807,16316,18363
pz_act14,0.935484,0.840909,0.780702,16484,18878
pz_act20,0.889201,0.814394,0.804094,9398,12239
pz_act25,0.903226,0.82197,0.827485,15275,18447


In [9]:
comp, recall = gen_tables(det_cats_files, true_cats_files, m_cut=4, z_cut=0.5)
comp

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  det['b'] = det_sc.galactic.b.degree


Unnamed: 0,PSZ2,PSZ2_err,PSZ2_std,MCXC,MCXC_err,MCXC_std,ACT,ACT_err,ACT_std,all,fp
pz14,94,1.2,0.312881,11,0.2,0.11698,72,1.0,0.261574,17968,15828
pz20,98,0.85,0.243602,11,0.15,0.081918,76,1.75,0.29802,25312,23104
pz25,98,1.1,0.289282,11,0.3,0.146898,81,1.75,0.260314,22837,20611
pz40,95,1.15,0.232549,11,0.1,0.068825,81,2.1,0.306937,19495,17306
pz_act10,85,0.5,0.153897,11,0.1,0.1,68,0.45,0.135239,18363,16316
pz_act14,86,0.8,0.247088,11,0.05,0.05,82,1.45,0.336194,18878,16484
pz_act20,78,0.6,0.152177,9,0.05,0.05,91,0.85,0.254176,12239,9398
pz_act25,81,0.85,0.195677,11,0.2,0.091766,97,1.4,0.244949,18447,15275
all,97,0.0,,13,0.0,,132,0.0,,0,0


In [10]:
recall

Unnamed: 0,PSZ2,MCXC,ACT,fp,all
pz14,0.969072,0.846154,0.545455,15828,17968
pz20,1.010309,0.846154,0.575758,23104,25312
pz25,1.010309,0.846154,0.613636,20611,22837
pz40,0.979381,0.846154,0.613636,17306,19495
pz_act10,0.876289,0.846154,0.515152,16316,18363
pz_act14,0.886598,0.846154,0.621212,16484,18878
pz_act20,0.804124,0.692308,0.689394,9398,12239
pz_act25,0.835052,0.846154,0.734848,15275,18447


In [11]:
comp, recall = gen_tables(det_cats_files, true_cats_files, m_cut=5)
comp

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  det['b'] = det_sc.galactic.b.degree
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tr['b'] = tr_sc.galactic.b.degree


Unnamed: 0,PSZ2,PSZ2_err,PSZ2_std,MCXC,MCXC_err,MCXC_std,ACT,ACT_err,ACT_std,all,fp
pz14,486,5.25,0.561366,118,1.55,0.26631,142,1.75,0.227977,17968,15828
pz20,489,7.0,0.640723,117,2.4,0.365629,144,2.5,0.373462,25312,23104
pz25,489,5.9,0.390007,116,1.95,0.294467,145,1.95,0.328273,22837,20611
pz40,485,5.85,0.558546,118,1.45,0.26631,142,2.35,0.292674,19495,17306
pz_act10,463,4.2,0.521132,118,0.85,0.254176,133,0.85,0.220943,18363,16316
pz_act14,466,5.35,0.498814,118,1.3,0.218849,136,1.3,0.241704,18878,16484
pz_act20,448,2.7,0.23056,115,0.9,0.21643,135,1.05,0.198348,12239,9398
pz_act25,455,4.15,0.436945,116,1.45,0.328273,138,1.45,0.223312,18447,15275
all,490,0.0,,137,0.0,,153,0.0,,0,0


In [12]:
recall

Unnamed: 0,PSZ2,MCXC,ACT,fp,all
pz14,0.991837,0.861314,0.928105,15828,17968
pz20,0.997959,0.854015,0.941176,23104,25312
pz25,0.997959,0.846715,0.947712,20611,22837
pz40,0.989796,0.861314,0.928105,17306,19495
pz_act10,0.944898,0.861314,0.869281,16316,18363
pz_act14,0.95102,0.861314,0.888889,16484,18878
pz_act20,0.914286,0.839416,0.882353,9398,12239
pz_act25,0.928571,0.846715,0.901961,15275,18447


In [13]:
comp, recall = gen_tables(det_cats_files, true_cats_files, b_cut=20)
comp

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  det['b'] = det_sc.galactic.b.degree


Unnamed: 0,PSZ2,PSZ2_err,PSZ2_std,MCXC,MCXC_err,MCXC_std,RM,RM_err,RM_std,ACT,ACT_err,ACT_std,all,fp
pz14,1229,11.25,0.664415,646,14.45,0.737974,1232,285.35,4.666947,849,43.45,1.164101,12823,10971
pz20,1260,12.25,0.706641,662,16.4,0.758808,1296,304.75,4.844897,875,45.5,1.792528,14271,12355
pz25,1249,14.4,1.108342,665,17.75,0.570664,1362,321.5,3.571451,888,50.3,1.643328,15388,13468
pz40,1238,14.15,0.805491,658,18.15,0.625237,1269,314.95,2.87409,871,48.9,1.325658,14127,12233
pz_act10,1136,8.65,0.595045,596,10.6,0.685949,1022,206.55,3.583937,926,30.15,0.859238,8587,6780
pz_act14,1148,9.85,0.689298,605,13.65,0.949446,1204,250.45,2.855719,1282,37.65,1.08646,10691,8529
pz_act20,1045,7.25,0.67229,543,9.0,0.593828,1173,184.55,3.878738,1866,29.7,1.167769,8122,5463
pz_act25,1066,8.2,0.495241,546,10.45,0.789653,1263,206.5,2.576718,2152,35.6,1.098803,9636,6675
all,1653,0.0,,1743,0.0,,26111,0.0,,4195,0.0,,0,0


In [14]:
recall

Unnamed: 0,PSZ2,MCXC,RM,ACT,fp,all
pz14,0.915797,0.400744,0.047386,0.202384,10971,12823
pz20,0.938897,0.41067,0.049848,0.208582,12355,14271
pz25,0.9307,0.412531,0.052387,0.211681,13468,15388
pz40,0.922504,0.408189,0.04881,0.207628,12233,14127
pz_act10,0.846498,0.369727,0.039309,0.220739,6780,8587
pz_act14,0.85544,0.37531,0.046309,0.305602,8529,10691
pz_act20,0.778689,0.336849,0.045117,0.444815,5463,8122
pz_act25,0.794337,0.33871,0.048579,0.512992,6675,9636
