Пояснение по gen_table: параметр dict_cut выбирает, как обрезать каталоги.

Например при dict_cut = {'z' : [0.1, 0.3]} в каталогах останутся только объекты с 0.1 =< z < 0.3. 

Для b используется модуль: dict_cut = {'b' : [20, np.inf]} оставит в каталогах объекты для которых |b| >= 20 

In [1]:
import numpy as np
import pandas as pd
import os
from matplotlib import pyplot as plt
from astropy.io import fits
from astropy.table import Table
from astropy.coordinates import SkyCoord
from astropy import units as u
from DS_healpix_fragmentation import radec2pix
from DS_Planck_Unet import val_pix

In [2]:
def get_name(file):
    if 'pz_act' in file:
        return file[5:13]
    return file[5:9]

In [3]:
def calc_error(det_cat, true_cat, shift=15/60, match_dist=5/60, n_try=200, seed=0):
    import numpy as np
    from astropy.coordinates import SkyCoord
    from astropy import units as u
    
    error = []
    np.random.seed(seed)
    for i in range(n_try):
        det_sc = SkyCoord(ra=np.array(det_cat['RA']) * u.degree, 
                          dec=np.array(det_cat['DEC']) * u.degree, frame='icrs')
        angles = np.random.randint(0, 360, len(det_cat))
        det_sc = det_sc.directional_offset_by(angles*u.degree, shift)

        true_sc = SkyCoord(ra=np.array(true_cat['RA']) * u.degree, 
                           dec=np.array(true_cat['DEC']) * u.degree, frame='icrs')
        _, d2d, _ = det_sc.match_to_catalog_sky(true_sc)
        c_error = np.count_nonzero(d2d.degree < match_dist)
        error.append(c_error)
    error = np.array(error)
    return error.mean(), error.std() / np.sqrt(n_try - 1)

In [4]:
def cut_cat(df, dict_cut = {'z' : [-np.inf, np.inf], 'M500' : [-np.inf, np.inf], 'b' :[-np.inf, np.inf]}, 
           big_pix=None):
 
    sc = SkyCoord(ra=np.array(df['RA'])*u.degree, 
                  dec=np.array(df['DEC'])*u.degree, frame='icrs')
    df['b'] = sc.galactic.b.degree
    for prm in dict_cut:
        if prm == 'b':
            df = df[np.abs(df[prm]) >= dict_cut[prm][0]]
            df = df[np.abs(df[prm]) < dict_cut[prm][1]]
            
        df = df[df[prm] >= dict_cut[prm][0]]
        df = df[df[prm] < dict_cut[prm][1]]
    df.index = np.arange(len(df))
    
    if not (big_pix is None):
        pix2 = radec2pix(df['RA'], df['DEC'], 2)
        df = df[np.in1d(pix2, big_pix)]
        df.index = np.arange(len(df))
    
    return df

In [5]:
def gen_tables(det_cats_files, true_cats_files, dict_cut = {'z' : [-np.inf, np.inf], 'M500' : [-np.inf, np.inf], 
                        'b' :[-np.inf, np.inf]}, big_pix = None, match_dist=5/60, shift_err=15/60, 
                        n_err=20):
    
    true_cats = {os.path.splitext(os.path.basename(file))[0] : pd.read_csv(file) for file in true_cats_files}
    det_cats = {get_name(os.path.splitext(os.path.basename(file))[0]) : 
                pd.read_csv(file) for file in det_cats_files}
    
    comp_df = []
    recall_df = []
    
    for name in det_cats:
        df = det_cats[name]
        df = df[df['status'] != 'fn']
        df.index = np.arange(len(df))
        if 'b' in dict_cut:
            det_cats[name] = cut_cat(df, {'b' : dict_cut['b']}, big_pix)
        else:
            det_cats[name] = cut_cat(df, {}, big_pix)
    for name in true_cats:
        true_cats[name] = cut_cat(true_cats[name], dict_cut, big_pix)
    
    for det_name in det_cats:
        det = det_cats[det_name]
        line = {}
        line_r = {}

        det_sc = SkyCoord(ra=np.array(det['RA'])*u.degree, 
                      dec=np.array(det['DEC'])*u.degree, frame='icrs') 

        for tr_name in true_cats: 
            tr = true_cats[tr_name]
            tr_sc = SkyCoord(ra=np.array(tr['RA'])*u.degree, 
                          dec=np.array(tr['DEC'])*u.degree, frame='icrs')
            
            idx, d2d, _ = tr_sc.match_to_catalog_sky(det_sc)
            matched = d2d.degree <= match_dist
            
            line[tr_name] = np.count_nonzero(matched)
            line[tr_name+'_err'], line[tr_name+'_std'] = calc_error(det, tr, shift=shift_err, n_try=n_err)

            line_r[tr_name] = line[tr_name] / len(tr)
            
        line['all'] = len(det)
        line['fp'] = np.count_nonzero(det['status'] == 'fp')
        line_r['fp'] = line['fp']
        line_r['all'] = line['all']
        comp_df.append(pd.DataFrame(line, index=[det_name]))
        recall_df.append(pd.DataFrame(line_r, index=[det_name]))
        line = {}
    
    for tr_name in true_cats: 
        line[tr_name] = len(true_cats[tr_name])
        line[tr_name+'_err'] = 0
    line['fp'] = 0
    line['all'] = 0
    comp_df.append(pd.DataFrame(line, index=['all']))
    
    comp_df = pd.concat(comp_df)
    recall_df = pd.concat(recall_df)
    
    return comp_df, recall_df

In [6]:
tr_dir = '/home/rt2122/Data/original_catalogs/csv/'
true_cats_files = [os.path.join(tr_dir, name) for name in next(os.walk(tr_dir))[-1]]
det_dir = '/home/rt2122/Data/detected_cats/'
det_cats_files = [os.path.join(det_dir, name) for name in sorted(next(os.walk(det_dir))[-1]) if 'full' in name]
#det_cats_files = ['/home/rt2122/Data/detected_cats/full_pz14_thr0.1_step8.csv', 
#                 '/home/rt2122/Data/detected_cats/full_pz_act10_thr0.1_step8.csv']

In [8]:
comp, recall = gen_tables(det_cats_files, true_cats_files, dict_cut={'b' : [20, np.inf]}, 
                          shift_err=15/60, n_err=20)
comp

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['b'] = sc.galactic.b.degree


Unnamed: 0,PSZ2,PSZ2_err,PSZ2_std,MCXC,MCXC_err,MCXC_std,RM,RM_err,RM_std,ACT,ACT_err,ACT_std,all,fp
pz14,591,6.1,0.593385,332,9.5,0.748683,939,202.35,3.35471,246,19.3,0.987021,7112,6266
pz20,608,7.1,0.496832,342,9.6,0.563822,977,206.95,3.350943,251,18.75,0.790985,7329,6458
pz25,601,6.85,0.488149,345,9.0,0.676679,1052,227.0,3.728976,246,18.9,0.981138,7946,7082
pz40,597,6.7,0.60741,344,9.7,0.781699,967,215.6,2.574572,256,19.8,1.340856,7678,6810
pz_act06,505,2.35,0.424729,261,4.5,0.483953,563,85.45,1.70984,192,9.7,0.87389,2923,2263
pz_act10,538,3.75,0.415964,308,7.05,0.617614,781,143.65,2.90034,302,13.5,0.738063,4772,3952
pz_act14,548,5.1,0.475173,314,8.8,0.713037,922,186.25,2.853322,476,18.8,0.869362,5963,4958
pz_act20,512,4.2,0.456532,281,6.2,0.432861,882,134.9,2.871732,690,15.35,1.119857,4385,3206
pz_act25,514,4.5,0.366348,277,5.8,0.526158,918,147.45,2.329982,780,14.85,1.044472,4988,3718
all,644,0.0,,917,0.0,,17816,0.0,,1469,0.0,,0,0


In [9]:
recall

Unnamed: 0,PSZ2,MCXC,RM,ACT,fp,all
pz14,0.917702,0.36205,0.052705,0.167461,6266,7112
pz20,0.944099,0.372955,0.054838,0.170865,6458,7329
pz25,0.93323,0.376227,0.059048,0.167461,7082,7946
pz40,0.927019,0.375136,0.054277,0.174268,6810,7678
pz_act06,0.784161,0.284624,0.031601,0.130701,2263,2923
pz_act10,0.835404,0.335878,0.043837,0.205582,3952,4772
pz_act14,0.850932,0.342421,0.051751,0.32403,4958,5963
pz_act20,0.795031,0.306434,0.049506,0.469707,3206,4385
pz_act25,0.798137,0.302072,0.051527,0.530973,3718,4988


In [10]:
comp, recall = gen_tables(det_cats_files, true_cats_files, dict_cut={'b' : [20, np.inf]}, 
                          shift_err=15/60, n_err=200)
comp

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['b'] = sc.galactic.b.degree


Unnamed: 0,PSZ2,PSZ2_err,PSZ2_std,MCXC,MCXC_err,MCXC_std,RM,RM_err,RM_std,ACT,ACT_err,ACT_std,all,fp
pz14,591,6.305,0.175825,332,9.005,0.228991,939,205.74,1.022713,246,19.38,0.298115,7112,6266
pz20,608,6.6,0.168946,342,9.565,0.218443,977,203.77,1.004155,251,17.735,0.29335,7329,6458
pz25,601,7.73,0.186573,345,10.375,0.212243,1052,228.605,1.099244,246,18.84,0.314829,7946,7082
pz40,597,7.08,0.177696,344,10.215,0.226939,967,222.385,1.001799,256,20.27,0.319367,7678,6810
pz_act06,505,2.69,0.127478,261,3.925,0.136896,563,88.49,0.621192,192,8.905,0.219707,2923,2263
pz_act10,538,3.91,0.129396,308,6.46,0.193985,781,146.65,0.858793,302,13.72,0.257662,4772,3952
pz_act14,548,5.445,0.166504,314,7.985,0.194972,922,186.98,0.974782,476,18.015,0.302791,5963,4958
pz_act20,512,4.165,0.153809,281,5.91,0.175824,882,136.195,0.875711,690,13.895,0.250667,4385,3206
pz_act25,514,4.795,0.142712,277,6.34,0.194321,918,145.245,0.866794,780,14.655,0.2767,4988,3718
all,644,0.0,,917,0.0,,17816,0.0,,1469,0.0,,0,0


In [11]:
recall

Unnamed: 0,PSZ2,MCXC,RM,ACT,fp,all
pz14,0.917702,0.36205,0.052705,0.167461,6266,7112
pz20,0.944099,0.372955,0.054838,0.170865,6458,7329
pz25,0.93323,0.376227,0.059048,0.167461,7082,7946
pz40,0.927019,0.375136,0.054277,0.174268,6810,7678
pz_act06,0.784161,0.284624,0.031601,0.130701,2263,2923
pz_act10,0.835404,0.335878,0.043837,0.205582,3952,4772
pz_act14,0.850932,0.342421,0.051751,0.32403,4958,5963
pz_act20,0.795031,0.306434,0.049506,0.469707,3206,4385
pz_act25,0.798137,0.302072,0.051527,0.530973,3718,4988


In [12]:
comp, recall = gen_tables(det_cats_files, true_cats_files, dict_cut={'b' : [20, np.inf]}, 
                          shift_err=60/60, n_err=20)
comp

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['b'] = sc.galactic.b.degree


Unnamed: 0,PSZ2,PSZ2_err,PSZ2_std,MCXC,MCXC_err,MCXC_std,RM,RM_err,RM_std,ACT,ACT_err,ACT_std,all,fp
pz14,591,4.55,0.489226,332,6.1,0.68017,939,117.55,3.028353,246,9.8,0.600877,7112,6266
pz20,608,4.05,0.467215,342,5.45,0.489226,977,125.75,2.266315,251,9.2,0.759501,7329,6458
pz25,601,4.2,0.374166,345,6.6,0.483409,1052,132.65,2.964949,246,9.6,0.60437,7946,7082
pz40,597,5.15,0.477245,344,6.6,0.472396,967,131.05,2.515714,256,10.8,0.655342,7678,6810
pz_act06,505,2.5,0.400657,261,2.3,0.241704,563,49.7,1.393783,192,3.55,0.499868,2923,2263
pz_act10,538,2.85,0.378883,308,4.35,0.471699,781,79.55,1.811477,302,5.4,0.582192,4772,3952
pz_act14,548,3.1,0.323631,314,5.7,0.487205,922,98.5,2.397916,476,7.85,0.665997,5963,4958
pz_act20,512,2.65,0.310136,281,3.65,0.424729,882,74.95,2.017652,690,6.1,0.469602,4385,3206
pz_act25,514,3.35,0.424729,277,4.55,0.510289,918,83.25,1.224476,780,5.6,0.46112,4988,3718
all,644,0.0,,917,0.0,,17816,0.0,,1469,0.0,,0,0


In [13]:
recall

Unnamed: 0,PSZ2,MCXC,RM,ACT,fp,all
pz14,0.917702,0.36205,0.052705,0.167461,6266,7112
pz20,0.944099,0.372955,0.054838,0.170865,6458,7329
pz25,0.93323,0.376227,0.059048,0.167461,7082,7946
pz40,0.927019,0.375136,0.054277,0.174268,6810,7678
pz_act06,0.784161,0.284624,0.031601,0.130701,2263,2923
pz_act10,0.835404,0.335878,0.043837,0.205582,3952,4772
pz_act14,0.850932,0.342421,0.051751,0.32403,4958,5963
pz_act20,0.795031,0.306434,0.049506,0.469707,3206,4385
pz_act25,0.798137,0.302072,0.051527,0.530973,3718,4988
