Пояснение по gen_table: параметр dict_cut выбирает, как обрезать каталоги.

Например при dict_cut = {'z' : [0.1, 0.3]} в каталогах останутся только объекты с 0.1 =< z < 0.3. 

Для b используется модуль: dict_cut = {'b' : [20, np.inf]} оставит в каталогах объекты для которых |b| >= 20 

In [12]:
import numpy as np
import pandas as pd
import os
from matplotlib import pyplot as plt
from astropy.io import fits
from astropy.table import Table
from astropy.coordinates import SkyCoord
from astropy import units as u
from DS_healpix_fragmentation import radec2pix
from DS_Planck_Unet import val_pix

In [13]:
def get_name(file):
    if 'pz_act' in file:
        return file[5:13]
    return file[5:9]

In [14]:
def calc_error(det_cat, true_cat, shift=15/60, match_dist=5/60, n_try=200, seed=0):
    import numpy as np
    from astropy.coordinates import SkyCoord
    from astropy import units as u
    
    error = []
    np.random.seed(seed)
    for i in range(n_try):
        det_sc = SkyCoord(ra=np.array(det_cat['RA']) * u.degree, 
                          dec=np.array(det_cat['DEC']) * u.degree, frame='icrs')
        angles = np.random.randint(0, 360, len(det_cat))
        det_sc = det_sc.directional_offset_by(angles*u.degree, shift)

        true_sc = SkyCoord(ra=np.array(true_cat['RA']) * u.degree, 
                           dec=np.array(true_cat['DEC']) * u.degree, frame='icrs')
        _, d2d, _ = det_sc.match_to_catalog_sky(true_sc)
        c_error = np.count_nonzero(d2d.degree < match_dist)
        error.append(c_error)
    error = np.array(error)
    return error.mean(), error.std() / np.sqrt(n_try - 1)

In [15]:
def cut_cat(df, dict_cut = {'z' : [-np.inf, np.inf], 'M500' : [-np.inf, np.inf], 'b' :[-np.inf, np.inf]}, 
           big_pix=None):
 
    sc = SkyCoord(ra=np.array(df['RA'])*u.degree, 
                  dec=np.array(df['DEC'])*u.degree, frame='icrs')
    df['b'] = sc.galactic.b.degree
    for prm in dict_cut:
        if prm == 'b':
            df = df[np.abs(df[prm]) >= dict_cut[prm][0]]
            df = df[np.abs(df[prm]) < dict_cut[prm][1]]
        else:
            df = df[df[prm] >= dict_cut[prm][0]]
            df = df[df[prm] < dict_cut[prm][1]]
        df.index = np.arange(len(df))
    
    if not (big_pix is None):
        pix2 = radec2pix(df['RA'], df['DEC'], 2)
        df = df[np.in1d(pix2, big_pix)]
        df.index = np.arange(len(df))
    
    return df

In [16]:
def gen_tables(det_cats_files, true_cats_files, dict_cut = {'z' : [-np.inf, np.inf], 'M500' : [-np.inf, np.inf], 
                        'b' :[-np.inf, np.inf]}, big_pix = None, match_dist=5/60, shift_err=15/60, 
                        n_err=20):
    
    true_cats = {os.path.splitext(os.path.basename(file))[0] : pd.read_csv(file) for file in true_cats_files}
    det_cats = {get_name(os.path.splitext(os.path.basename(file))[0]) : 
                pd.read_csv(file) for file in det_cats_files}
    
    comp_df = []
    recall_df = []
    
    for name in det_cats:
        df = det_cats[name]
        df = df[df['status'] != 'fn']
        df.index = np.arange(len(df))
        if 'b' in dict_cut:
            det_cats[name] = cut_cat(df, {'b' : dict_cut['b']}, big_pix)
        else:
            det_cats[name] = cut_cat(df, {}, big_pix)
    for name in true_cats:
        true_cats[name] = cut_cat(true_cats[name], dict_cut, big_pix)
    
    for det_name in det_cats:
        det = det_cats[det_name]
        line = {}
        line_r = {}

        det_sc = SkyCoord(ra=np.array(det['RA'])*u.degree, 
                      dec=np.array(det['DEC'])*u.degree, frame='icrs') 

        for tr_name in true_cats: 
            tr = true_cats[tr_name]
            tr_sc = SkyCoord(ra=np.array(tr['RA'])*u.degree, 
                          dec=np.array(tr['DEC'])*u.degree, frame='icrs')
            
            idx, d2d, _ = tr_sc.match_to_catalog_sky(det_sc)
            matched = d2d.degree <= match_dist
            
            line[tr_name] = np.count_nonzero(matched)
            line[tr_name+'_err'], line[tr_name+'_std'] = calc_error(det, tr, shift=shift_err, n_try=n_err)

            line_r[tr_name] = line[tr_name] / len(tr)
            
        line['all'] = len(det)
        line['fp'] = np.count_nonzero(det['status'] == 'fp')
        line_r['fp'] = line['fp']
        line_r['all'] = line['all']
        comp_df.append(pd.DataFrame(line, index=[det_name]))
        recall_df.append(pd.DataFrame(line_r, index=[det_name]))
        line = {}
    
    for tr_name in true_cats: 
        line[tr_name] = len(true_cats[tr_name])
        line[tr_name+'_err'] = 0
    line['fp'] = 0
    line['all'] = 0
    comp_df.append(pd.DataFrame(line, index=['all']))
    
    comp_df = pd.concat(comp_df)
    recall_df = pd.concat(recall_df)
    
    return comp_df, recall_df

In [17]:
true_cats_files = ['/home/rt2122/Data/clusters/mcxcwp.csv', '/home/rt2122/Data/clusters/actwp.csv']
det_dir = '/home/rt2122/Data/detected_cats/'
det_cats_files = [os.path.join(det_dir, name) for name in sorted(next(os.walk(det_dir))[-1]) if 'full' in name]

In [18]:
comp, recall = gen_tables(det_cats_files, true_cats_files)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['b'] = sc.galactic.b.degree


In [19]:
comp

Unnamed: 0,mcxcwp,mcxcwp_err,mcxcwp_std,actwp,actwp_err,actwp_std,all,fp
pz14,214,12.6,0.638254,458,38.55,1.310835,17968,15828
pz20,221,13.15,0.696892,485,43.75,1.374342,25312,23104
pz25,227,13.9,0.763992,497,48.3,1.3,22837,20611
pz40,225,13.4,0.758808,482,44.0,1.378405,19495,17306
pz_act06,112,7.0,0.606977,266,16.55,0.977308,17515,15934
pz_act10,163,9.45,0.737974,571,26.8,1.060288,18363,16316
pz_act14,174,8.8,0.749034,925,35.15,1.131545,18878,16484
pz_act20,119,7.25,0.556658,1533,30.85,1.156845,12239,9398
pz_act25,129,9.0,0.858395,1811,33.7,1.367094,18447,15275
all,1193,0.0,,3720,0.0,,0,0


In [20]:
recall

Unnamed: 0,mcxcwp,actwp,fp,all
pz14,0.17938,0.123118,15828,17968
pz20,0.185247,0.130376,23104,25312
pz25,0.190277,0.133602,20611,22837
pz40,0.1886,0.12957,17306,19495
pz_act06,0.093881,0.071505,15934,17515
pz_act10,0.13663,0.153495,16316,18363
pz_act14,0.145851,0.248656,16484,18878
pz_act20,0.099749,0.412097,9398,12239
pz_act25,0.108131,0.486828,15275,18447


In [21]:
comp, recall = gen_tables(det_cats_files, true_cats_files, {'M500' : [3, np.inf]})
comp

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['b'] = sc.galactic.b.degree


Unnamed: 0,mcxcwp,mcxcwp_err,mcxcwp_std,actwp,actwp_err,actwp_std,all,fp
pz14,53,1.25,0.227977,203,7.0,0.557249,17968,15828
pz20,48,1.85,0.232549,199,8.3,0.754286,25312,23104
pz25,45,1.35,0.220943,214,7.95,0.716259,22837,20611
pz40,49,1.6,0.30262,205,8.25,0.4967,19495,17306
pz_act06,26,1.4,0.222427,120,2.85,0.442927,17515,15934
pz_act10,39,1.35,0.274101,223,5.1,0.480679,18363,16316
pz_act14,41,0.85,0.195677,298,6.3,0.538516,18878,16484
pz_act20,29,0.55,0.26631,408,6.4,0.595598,12239,9398
pz_act25,29,1.2,0.267542,450,6.05,0.658847,18447,15275
all,129,0.0,,689,0.0,,0,0


In [22]:
recall

Unnamed: 0,mcxcwp,actwp,fp,all
pz14,0.410853,0.29463,15828,17968
pz20,0.372093,0.288824,23104,25312
pz25,0.348837,0.310595,20611,22837
pz40,0.379845,0.297533,17306,19495
pz_act06,0.20155,0.174165,15934,17515
pz_act10,0.302326,0.323657,16316,18363
pz_act14,0.317829,0.432511,16484,18878
pz_act20,0.224806,0.592163,9398,12239
pz_act25,0.224806,0.65312,15275,18447


In [23]:
comp, recall = gen_tables(det_cats_files, true_cats_files, {'M500' : [4, np.inf]})
comp

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['b'] = sc.galactic.b.degree


Unnamed: 0,mcxcwp,mcxcwp_err,mcxcwp_std,actwp,actwp_err,actwp_std,all,fp
pz14,20,0.35,0.131289,71,1.1,0.29824,17968,15828
pz20,18,0.8,0.171679,71,1.9,0.331662,25312,23104
pz25,16,0.8,0.155597,80,2.0,0.290191,22837,20611
pz40,19,0.7,0.163836,79,2.6,0.358359,19495,17306
pz_act06,12,0.75,0.175844,43,0.35,0.109424,17515,15934
pz_act10,16,0.45,0.184605,71,0.7,0.163836,18363,16316
pz_act14,17,0.35,0.109424,87,1.7,0.377666,18878,16484
pz_act20,15,0.3,0.163836,104,1.35,0.292674,12239,9398
pz_act25,16,0.65,0.181731,110,1.35,0.254176,18447,15275
all,56,0.0,,149,0.0,,0,0


In [24]:
recall

Unnamed: 0,mcxcwp,actwp,fp,all
pz14,0.357143,0.47651,15828,17968
pz20,0.321429,0.47651,23104,25312
pz25,0.285714,0.536913,20611,22837
pz40,0.339286,0.530201,17306,19495
pz_act06,0.214286,0.288591,15934,17515
pz_act10,0.285714,0.47651,16316,18363
pz_act14,0.303571,0.583893,16484,18878
pz_act20,0.267857,0.697987,9398,12239
pz_act25,0.285714,0.738255,15275,18447


In [25]:
comp, recall = gen_tables(det_cats_files, true_cats_files, {'M500' : [5, np.inf]})
comp

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['b'] = sc.galactic.b.degree


Unnamed: 0,mcxcwp,mcxcwp_err,mcxcwp_std,actwp,actwp_err,actwp_std,all,fp
pz14,8,0.2,0.11698,25,0.35,0.131289,17968,15828
pz20,7,0.3,0.105131,26,0.65,0.195677,25312,23104
pz25,5,0.15,0.081918,26,0.6,0.168585,22837,20611
pz40,8,0.3,0.105131,27,0.6,0.168585,19495,17306
pz_act06,6,0.45,0.114133,13,0.05,0.05,17515,15934
pz_act10,7,0.35,0.131289,22,0.1,0.068825,18363,16316
pz_act14,7,0.15,0.081918,25,0.35,0.166623,18878,16484
pz_act20,6,0.1,0.068825,27,0.25,0.09934,12239,9398
pz_act25,7,0.45,0.153469,27,0.2,0.091766,18447,15275
all,22,0.0,,35,0.0,,0,0


In [26]:
recall

Unnamed: 0,mcxcwp,actwp,fp,all
pz14,0.363636,0.714286,15828,17968
pz20,0.318182,0.742857,23104,25312
pz25,0.227273,0.742857,20611,22837
pz40,0.363636,0.771429,17306,19495
pz_act06,0.272727,0.371429,15934,17515
pz_act10,0.318182,0.628571,16316,18363
pz_act14,0.318182,0.714286,16484,18878
pz_act20,0.272727,0.771429,9398,12239
pz_act25,0.318182,0.771429,15275,18447
