Пояснение по gen_table: параметр dict_cut выбирает, как обрезать каталоги.

Например при dict_cut = {'z' : [0.1, 0.3]} в каталогах останутся только объекты с 0.1 =< z < 0.3. 

Для b используется модуль: dict_cut = {'b' : [20, np.inf]} оставит в каталогах объекты для которых |b| > 20 

In [2]:
import numpy as np
import pandas as pd
import os
from matplotlib import pyplot as plt
from astropy.io import fits
from astropy.table import Table
from astropy.coordinates import SkyCoord
from astropy import units as u

In [3]:
def get_name(file):
    if 'pz_act' in file:
        return file[5:13]
    return file[5:9]

In [4]:
def calc_error(det_cat, true_cat, shift=15/60, match_dist=5/60, n_try=20, seed=0):
    import numpy as np
    from astropy.coordinates import SkyCoord
    from astropy import units as u
    
    error = []
    np.random.seed(seed)
    for i in range(n_try):
        det_sc = SkyCoord(ra=np.array(det_cat['RA']) * u.degree, 
                          dec=np.array(det_cat['DEC']) * u.degree, frame='icrs')
        angles = np.random.randint(0, 360, len(det_cat))
        det_sc = det_sc.directional_offset_by(angles*u.degree, shift)

        true_sc = SkyCoord(ra=np.array(true_cat['RA']) * u.degree, 
                           dec=np.array(true_cat['DEC']) * u.degree, frame='icrs')
        _, d2d, _ = det_sc.match_to_catalog_sky(true_sc)
        c_error = np.count_nonzero(d2d.degree < match_dist)
        error.append(c_error)
    error = np.array(error)
    return error.mean(), error.std() / np.sqrt(n_try - 1)

In [61]:
def cut_cat(df, dict_cut = {'z' : [-np.inf, np.inf], 'M500' : [-np.inf, np.inf], 'b' :[-np.inf, np.inf]}):
 
    sc = SkyCoord(ra=np.array(df['RA'])*u.degree, 
                  dec=np.array(df['DEC'])*u.degree, frame='icrs')
    df['b'] = sc.galactic.b.degree
    for prm in dict_cut:
        if prm == 'b':
            df = df[np.abs(df[prm]) >= dict_cut[prm][0]]
            df = df[np.abs(df[prm]) < dict_cut[prm][1]]
            
        df = df[df[prm] >= dict_cut[prm][0]]
        df = df[df[prm] < dict_cut[prm][1]]
    df.index = np.arange(len(df))
    return df

In [63]:
def gen_tables(det_cats_files, true_cats_files, dict_cut = {'z' : [-np.inf, np.inf], 'M500' : [-np.inf, np.inf], 
                        'b' :[-np.inf, np.inf]}, match_dist=5/60):
    
    true_cats = {os.path.splitext(os.path.basename(file))[0] : pd.read_csv(file) for file in true_cats_files}
    det_cats = {get_name(os.path.splitext(os.path.basename(file))[0]) : 
                pd.read_csv(file) for file in det_cats_files}
    
    comp_df = []
    recall_df = []
    
    for name in det_cats:
        df = det_cats[name]
        df = df[df['status'] != 'fn']
        df.index = np.arange(len(df))
        if 'b' in dict_cut:
            det_cats[name] = cut_cat(df, {'b' : dict_cut['b']})
        else:
            det_cats[name] = df
    for name in true_cats:
        true_cats[name] = cut_cat(true_cats[name], dict_cut)
    
    for det_name in det_cats:
        det = det_cats[det_name]
        line = {}
        line_r = {}

        det_sc = SkyCoord(ra=np.array(det['RA'])*u.degree, 
                      dec=np.array(det['DEC'])*u.degree, frame='icrs') 

        for tr_name in true_cats: 
            tr = true_cats[tr_name]
            tr_sc = SkyCoord(ra=np.array(tr['RA'])*u.degree, 
                          dec=np.array(tr['DEC'])*u.degree, frame='icrs')
            
            idx, d2d, _ = tr_sc.match_to_catalog_sky(det_sc)
            matched = d2d.degree <= match_dist
            
            line[tr_name] = np.count_nonzero(matched)
            line[tr_name+'_err'], line[tr_name+'_std'] = calc_error(det, tr)

            line_r[tr_name] = line[tr_name] / len(tr)
            
        line['all'] = len(det)
        line['fp'] = np.count_nonzero(det['status'] == 'fp')
        line_r['fp'] = line['fp']
        line_r['all'] = line['all']
        comp_df.append(pd.DataFrame(line, index=[det_name]))
        recall_df.append(pd.DataFrame(line_r, index=[det_name]))
        line = {}
    
    for tr_name in true_cats: 
        line[tr_name] = len(true_cats[tr_name])
        line[tr_name+'_err'] = 0
    line['fp'] = 0
    line['all'] = 0
    comp_df.append(pd.DataFrame(line, index=['all']))
    
    comp_df = pd.concat(comp_df)
    recall_df = pd.concat(recall_df)
    
    return comp_df, recall_df

In [16]:
tr_dir = '/home/rt2122/Data/original_catalogs/csv/'
true_cats_files = [os.path.join(tr_dir, name) for name in next(os.walk(tr_dir))[-1]]
det_dir = '/home/rt2122/Data/detected_cats/'
det_cats_files = [os.path.join(det_dir, name) for name in sorted(next(os.walk(det_dir))[-1]) if 'full' in name]
#det_cats_files = ['/home/rt2122/Data/detected_cats/full_pz14_thr0.1_step8.csv', 
#                 '/home/rt2122/Data/detected_cats/full_pz_act10_thr0.1_step8.csv']

In [51]:
comp, recall = gen_tables(det_cats_files, true_cats_files, dict_cut={})
comp

Unnamed: 0,PSZ2,PSZ2_err,PSZ2_std,MCXC,MCXC_err,MCXC_std,ACT,ACT_err,ACT_std,all,fp
pz14,1488,15.75,1.0733,726,17.95,0.838153,858,43.25,1.389576,17968,15828
pz20,1526,20.3,1.0763,739,20.55,0.910393,887,50.3,1.592912,25312,23104
pz25,1523,19.2,0.741975,747,20.15,0.99809,895,53.65,1.351948,22837,20611
pz40,1503,18.15,1.103285,740,20.2,0.878396,881,49.9,1.470589,19495,17306
pz_act06,1219,11.55,0.815879,587,10.4,0.666491,614,18.45,1.034853,17515,15934
pz_act10,1360,13.2,0.936398,662,13.65,0.785644,940,30.65,1.076727,18363,16316
pz_act14,1362,15.1,0.953939,673,13.8,0.902044,1301,39.75,1.305605,18878,16484
pz_act20,1219,10.3,0.69623,602,10.5,0.626183,1884,33.55,1.298734,12239,9398
pz_act25,1262,14.0,0.954215,612,13.4,1.003677,2169,37.9,1.454394,18447,15275
all,1653,0.0,,1743,0.0,,4195,0.0,,0,0


In [52]:
recall

Unnamed: 0,PSZ2,MCXC,ACT,fp,all
pz14,0.900181,0.416523,0.204529,15828,17968
pz20,0.92317,0.423982,0.211442,23104,25312
pz25,0.921355,0.428571,0.213349,20611,22837
pz40,0.909256,0.424555,0.210012,17306,19495
pz_act06,0.737447,0.336776,0.146365,15934,17515
pz_act10,0.822747,0.379805,0.224076,16316,18363
pz_act14,0.823956,0.386116,0.310131,16484,18878
pz_act20,0.737447,0.345382,0.449106,9398,12239
pz_act25,0.76346,0.351119,0.517044,15275,18447


In [53]:
true_cats_files = [file for file in true_cats_files if not ('RM' in file)]

In [54]:
comp, recall = gen_tables(det_cats_files, true_cats_files, dict_cut={'M500' : [3, np.inf]})
comp

Unnamed: 0,PSZ2,PSZ2_err,PSZ2_std,MCXC,MCXC_err,MCXC_std,ACT,ACT_err,ACT_std,all,fp
pz14,863,9.2,0.749034,349,4.4,0.437697,465,9.75,0.725023,17968,15828
pz20,877,11.65,0.677748,346,6.25,0.597693,462,12.7,1.041507,25312,23104
pz25,877,10.85,0.563238,344,4.95,0.520501,474,11.3,0.897658,22837,20611
pz40,869,11.35,0.859238,345,5.3,0.594271,466,11.95,0.621861,19495,17306
pz_act06,755,5.5,0.426121,311,3.65,0.318508,354,3.95,0.483817,17515,15934
pz_act10,806,6.95,0.712575,331,3.7,0.404579,470,7.35,0.477245,18363,16316
pz_act14,815,7.9,0.672388,335,3.55,0.45581,549,9.0,0.602626,18878,16484
pz_act20,773,5.55,0.51542,317,2.55,0.373286,645,7.8,0.634947,12239,9398
pz_act25,785,7.25,0.551911,320,3.7,0.45364,692,8.85,0.677748,18447,15275
all,885,0.0,,435,0.0,,975,0.0,,0,0


In [55]:
recall

Unnamed: 0,PSZ2,MCXC,ACT,fp,all
pz14,0.975141,0.802299,0.476923,15828,17968
pz20,0.99096,0.795402,0.473846,23104,25312
pz25,0.99096,0.790805,0.486154,20611,22837
pz40,0.981921,0.793103,0.477949,17306,19495
pz_act06,0.853107,0.714943,0.363077,15934,17515
pz_act10,0.910734,0.76092,0.482051,16316,18363
pz_act14,0.920904,0.770115,0.563077,16484,18878
pz_act20,0.873446,0.728736,0.661538,9398,12239
pz_act25,0.887006,0.735632,0.709744,15275,18447


In [57]:
comp, recall = gen_tables(det_cats_files, true_cats_files, dict_cut={'M500' : [4, np.inf]})
comp

Unnamed: 0,PSZ2,PSZ2_err,PSZ2_std,MCXC,MCXC_err,MCXC_std,ACT,ACT_err,ACT_std,all,fp
pz14,699,7.65,0.726473,221,2.5,0.380443,258,3.2,0.401314,17968,15828
pz20,707,9.35,0.662034,219,3.95,0.45,257,5.0,0.45306,25312,23104
pz25,706,8.3,0.487205,217,3.3,0.317059,265,4.35,0.519489,22837,20611
pz40,702,9.15,0.77553,219,3.25,0.452333,263,5.2,0.484496,19495,17306
pz_act06,620,4.4,0.365629,208,1.8,0.312881,213,1.15,0.181731,17515,15934
pz_act10,659,5.75,0.648379,218,1.95,0.358909,248,2.0,0.316228,18363,16316
pz_act14,665,6.55,0.642671,220,2.3,0.333246,266,3.45,0.505106,18878,16484
pz_act20,634,4.5,0.380443,214,1.75,0.29802,274,2.4,0.335606,12239,9398
pz_act25,644,5.65,0.53447,216,2.3,0.370632,282,3.3,0.333246,18447,15275
all,713,0.0,,264,0.0,,342,0.0,,0,0


In [58]:
recall

Unnamed: 0,PSZ2,MCXC,ACT,fp,all
pz14,0.980365,0.837121,0.754386,15828,17968
pz20,0.991585,0.829545,0.751462,23104,25312
pz25,0.990182,0.82197,0.774854,20611,22837
pz40,0.984572,0.829545,0.769006,17306,19495
pz_act06,0.869565,0.787879,0.622807,15934,17515
pz_act10,0.924264,0.825758,0.725146,16316,18363
pz_act14,0.932679,0.833333,0.777778,16484,18878
pz_act20,0.889201,0.810606,0.80117,9398,12239
pz_act25,0.903226,0.818182,0.824561,15275,18447


In [59]:
comp, recall = gen_tables(det_cats_files, true_cats_files, dict_cut={'M500' : [5, np.inf]})
comp

Unnamed: 0,PSZ2,PSZ2_err,PSZ2_std,MCXC,MCXC_err,MCXC_std,ACT,ACT_err,ACT_std,all,fp
pz14,484,5.25,0.561366,117,1.55,0.26631,140,1.75,0.227977,17968,15828
pz20,486,7.0,0.640723,116,2.4,0.365629,141,2.5,0.373462,25312,23104
pz25,486,5.9,0.390007,115,1.95,0.294467,142,1.95,0.328273,22837,20611
pz40,483,5.85,0.558546,117,1.45,0.26631,141,2.35,0.292674,19495,17306
pz_act06,443,3.25,0.369031,112,1.15,0.232549,119,0.65,0.15,17515,15934
pz_act10,462,4.2,0.521132,117,0.85,0.254176,132,0.85,0.220943,18363,16316
pz_act14,465,5.35,0.498814,117,1.3,0.218849,135,1.3,0.241704,18878,16484
pz_act20,448,2.7,0.23056,114,0.9,0.21643,134,1.05,0.198348,12239,9398
pz_act25,455,4.15,0.436945,115,1.45,0.328273,137,1.45,0.223312,18447,15275
all,490,0.0,,137,0.0,,153,0.0,,0,0


In [60]:
recall

Unnamed: 0,PSZ2,MCXC,ACT,fp,all
pz14,0.987755,0.854015,0.915033,15828,17968
pz20,0.991837,0.846715,0.921569,23104,25312
pz25,0.991837,0.839416,0.928105,20611,22837
pz40,0.985714,0.854015,0.921569,17306,19495
pz_act06,0.904082,0.817518,0.777778,15934,17515
pz_act10,0.942857,0.854015,0.862745,16316,18363
pz_act14,0.94898,0.854015,0.882353,16484,18878
pz_act20,0.914286,0.832117,0.875817,9398,12239
pz_act25,0.928571,0.839416,0.895425,15275,18447


In [64]:
comp, recall = gen_tables(det_cats_files, true_cats_files, dict_cut={'b' : [20, np.inf]})
comp

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['b'] = sc.galactic.b.degree


Unnamed: 0,PSZ2,PSZ2_err,PSZ2_std,MCXC,MCXC_err,MCXC_std,ACT,ACT_err,ACT_std,all,fp
pz14,591,6.1,0.593385,332,9.5,0.748683,246,19.3,0.987021,7112,6266
pz20,608,7.1,0.496832,342,9.6,0.563822,251,18.75,0.790985,7329,6458
pz25,601,6.85,0.488149,345,9.0,0.676679,246,18.9,0.981138,7946,7082
pz40,597,6.7,0.60741,344,9.7,0.781699,256,19.8,1.340856,7678,6810
pz_act06,505,2.35,0.424729,261,4.5,0.483953,192,9.7,0.87389,2923,2263
pz_act10,538,3.75,0.415964,308,7.05,0.617614,302,13.5,0.738063,4772,3952
pz_act14,548,5.1,0.475173,314,8.8,0.713037,476,18.8,0.869362,5963,4958
pz_act20,512,4.2,0.456532,281,6.2,0.432861,690,15.35,1.119857,4385,3206
pz_act25,514,4.5,0.366348,277,5.8,0.526158,780,14.85,1.044472,4988,3718
all,644,0.0,,917,0.0,,1469,0.0,,0,0


In [65]:
recall

Unnamed: 0,PSZ2,MCXC,ACT,fp,all
pz14,0.917702,0.36205,0.167461,6266,7112
pz20,0.944099,0.372955,0.170865,6458,7329
pz25,0.93323,0.376227,0.167461,7082,7946
pz40,0.927019,0.375136,0.174268,6810,7678
pz_act06,0.784161,0.284624,0.130701,2263,2923
pz_act10,0.835404,0.335878,0.205582,3952,4772
pz_act14,0.850932,0.342421,0.32403,4958,5963
pz_act20,0.795031,0.306434,0.469707,3206,4385
pz_act25,0.798137,0.302072,0.530973,3718,4988


In [66]:
comp, recall = gen_tables(det_cats_files, true_cats_files, dict_cut={'M500' : [3, np.inf],
                                                                    'z' : [-np.inf, 0.1]})
comp

Unnamed: 0,PSZ2,PSZ2_err,PSZ2_std,MCXC,MCXC_err,MCXC_std,ACT,ACT_err,ACT_std,all,fp
pz14,59,0.5,0.135724,42,0.55,0.153469,13,0.1,0.068825,17968,15828
pz20,60,0.9,0.203909,45,0.5,0.114708,13,0.2,0.091766,25312,23104
pz25,62,1.2,0.212751,45,0.7,0.163836,14,0.15,0.081918,22837,20611
pz40,60,0.65,0.181731,43,0.45,0.184605,14,0.2,0.11698,19495,17306
pz_act06,59,0.55,0.169752,44,0.45,0.135239,13,0.05,0.05,17515,15934
pz_act10,59,0.35,0.131289,44,0.45,0.135239,14,0.2,0.091766,18363,16316
pz_act14,60,0.25,0.09934,45,0.4,0.133771,14,0.1,0.068825,18878,16484
pz_act20,59,0.5,0.153897,46,0.25,0.12301,15,0.1,0.068825,12239,9398
pz_act25,59,0.55,0.135239,46,0.45,0.135239,15,0.05,0.05,18447,15275
all,64,0.0,,51,0.0,,17,0.0,,0,0


In [67]:
recall

Unnamed: 0,PSZ2,MCXC,ACT,fp,all
pz14,0.921875,0.823529,0.764706,15828,17968
pz20,0.9375,0.882353,0.764706,23104,25312
pz25,0.96875,0.882353,0.823529,20611,22837
pz40,0.9375,0.843137,0.823529,17306,19495
pz_act06,0.921875,0.862745,0.764706,15934,17515
pz_act10,0.921875,0.862745,0.823529,16316,18363
pz_act14,0.9375,0.882353,0.823529,16484,18878
pz_act20,0.921875,0.901961,0.882353,9398,12239
pz_act25,0.921875,0.901961,0.882353,15275,18447


In [68]:
comp, recall = gen_tables(det_cats_files, true_cats_files, dict_cut={'M500' : [3, np.inf],
                                                                    'z' : [0.1, 0.3]})
comp

Unnamed: 0,PSZ2,PSZ2_err,PSZ2_std,MCXC,MCXC_err,MCXC_std,ACT,ACT_err,ACT_std,all,fp
pz14,446,4.6,0.372756,235,2.7,0.3,132,1.05,0.184605,17968,15828
pz20,453,5.6,0.466792,229,3.95,0.45581,135,2.05,0.373286,25312,23104
pz25,451,5.05,0.483817,228,3.1,0.542412,137,1.9,0.29824,22837,20611
pz40,449,5.85,0.466087,230,3.8,0.438898,133,2.15,0.364728,19495,17306
pz_act06,396,3.1,0.362012,207,2.55,0.26631,120,0.8,0.186378,17515,15934
pz_act10,419,3.9,0.542412,219,2.65,0.35,131,1.25,0.25,18363,16316
pz_act14,422,3.9,0.428584,221,2.3,0.348682,137,1.35,0.243602,18878,16484
pz_act20,405,3.0,0.45306,210,1.7,0.272416,134,1.05,0.256238,12239,9398
pz_act25,407,3.95,0.407011,210,2.5,0.303488,135,1.6,0.265568,18447,15275
all,454,0.0,,288,0.0,,159,0.0,,0,0


In [69]:
recall

Unnamed: 0,PSZ2,MCXC,ACT,fp,all
pz14,0.982379,0.815972,0.830189,15828,17968
pz20,0.997797,0.795139,0.849057,23104,25312
pz25,0.993392,0.791667,0.861635,20611,22837
pz40,0.988987,0.798611,0.836478,17306,19495
pz_act06,0.872247,0.71875,0.754717,15934,17515
pz_act10,0.922907,0.760417,0.823899,16316,18363
pz_act14,0.929515,0.767361,0.861635,16484,18878
pz_act20,0.89207,0.729167,0.842767,9398,12239
pz_act25,0.896476,0.729167,0.849057,15275,18447


In [72]:
comp, recall = gen_tables(det_cats_files, true_cats_files, dict_cut={'M500' : [3, np.inf],
                                                                    'z' : [0.3, 0.6]})
comp

Unnamed: 0,PSZ2,PSZ2_err,PSZ2_std,MCXC,MCXC_err,MCXC_std,ACT,ACT_err,ACT_std,all,fp
pz14,324,3.85,0.572506,66,1.0,0.205196,239,5.15,0.603826,17968,15828
pz20,328,4.8,0.45073,66,1.75,0.323427,239,6.1,0.602189,25312,23104
pz25,328,4.2,0.367065,65,1.15,0.195677,241,5.55,0.678524,22837,20611
pz40,325,4.4,0.56847,66,0.95,0.198348,233,5.55,0.400493,19495,17306
pz_act06,273,1.65,0.318508,58,0.65,0.166623,174,1.65,0.378883,17515,15934
pz_act10,297,2.55,0.294467,64,0.5,0.170139,230,3.5,0.373462,18363,16316
pz_act14,301,3.3,0.417385,64,0.85,0.166623,264,4.1,0.458258,18878,16484
pz_act20,280,1.8,0.267542,59,0.6,0.183533,303,4.05,0.38027,12239,9398
pz_act25,289,2.65,0.342399,61,0.65,0.208693,326,4.1,0.53262,18447,15275
all,331,0.0,,89,0.0,,457,0.0,,0,0


In [73]:
recall

Unnamed: 0,PSZ2,MCXC,ACT,fp,all
pz14,0.978852,0.741573,0.522976,15828,17968
pz20,0.990937,0.741573,0.522976,23104,25312
pz25,0.990937,0.730337,0.527352,20611,22837
pz40,0.981873,0.741573,0.509847,17306,19495
pz_act06,0.824773,0.651685,0.380744,15934,17515
pz_act10,0.897281,0.719101,0.503282,16316,18363
pz_act14,0.909366,0.719101,0.577681,16484,18878
pz_act20,0.845921,0.662921,0.66302,9398,12239
pz_act25,0.873112,0.685393,0.713348,15275,18447


In [74]:
comp, recall = gen_tables(det_cats_files, true_cats_files, dict_cut={'M500' : [3, np.inf],
                                                                    'z' : [0.6, np.inf]})
comp

Unnamed: 0,PSZ2,PSZ2_err,PSZ2_std,MCXC,MCXC_err,MCXC_std,ACT,ACT_err,ACT_std,all,fp
pz14,34,0.25,0.12301,6,0.15,0.081918,81,3.45,0.51542,17968,15828
pz20,36,0.35,0.109424,6,0.05,0.05,75,4.35,0.603826,25312,23104
pz25,36,0.4,0.152177,6,0.0,0.0,82,3.7,0.528653,22837,20611
pz40,35,0.45,0.169752,6,0.1,0.068825,86,4.05,0.51542,19495,17306
pz_act06,27,0.2,0.091766,2,0.0,0.0,47,1.45,0.276015,17515,15934
pz_act10,31,0.15,0.081918,4,0.1,0.068825,95,2.4,0.358359,18363,16316
pz_act14,32,0.45,0.135239,5,0.0,0.0,134,3.45,0.461548,18878,16484
pz_act20,29,0.25,0.09934,2,0.0,0.0,193,2.6,0.4,12239,9398
pz_act25,30,0.1,0.068825,3,0.1,0.068825,216,3.1,0.422399,18447,15275
all,36,0.0,,7,0.0,,342,0.0,,0,0


In [75]:
recall

Unnamed: 0,PSZ2,MCXC,ACT,fp,all
pz14,0.944444,0.857143,0.236842,15828,17968
pz20,1.0,0.857143,0.219298,23104,25312
pz25,1.0,0.857143,0.239766,20611,22837
pz40,0.972222,0.857143,0.251462,17306,19495
pz_act06,0.75,0.285714,0.137427,15934,17515
pz_act10,0.861111,0.571429,0.277778,16316,18363
pz_act14,0.888889,0.714286,0.391813,16484,18878
pz_act20,0.805556,0.285714,0.564327,9398,12239
pz_act25,0.833333,0.428571,0.631579,15275,18447
