In [8]:
import numpy as np
import pandas as pd
import os
from DS_data_transformation import calc_error
from DS_Planck_Unet import train_pix_act

In [2]:
def get_name(file):
    if 'pz_act' in file:
        return file[5:13]
    return file[5:9]

In [19]:
def cut_cat(df, dict_cut = {'z' : [-np.inf, np.inf], 'M500' : [-np.inf, np.inf], 'b' :[-np.inf, np.inf]}, 
           big_pix=None):
    from astropy.coordinates import SkyCoord
    from astropy import units as u
    from DS_healpix_fragmentation import radec2pix
    import numpy as np
 
    sc = SkyCoord(ra=np.array(df['RA'])*u.degree, 
                  dec=np.array(df['DEC'])*u.degree, frame='icrs')
    df['b'] = sc.galactic.b.degree
    for prm in dict_cut:
        if prm == 'b':
            df = df[np.abs(df[prm]) >= dict_cut[prm][0]]
            df = df[np.abs(df[prm]) < dict_cut[prm][1]]
            df.index = np.arange(len(df))
        else: 
            df = df[df[prm] >= dict_cut[prm][0]]
            df = df[df[prm] < dict_cut[prm][1]]
            df.index = np.arange(len(df))
    
    if not (big_pix is None):
        pix2 = radec2pix(df['RA'], df['DEC'], 2)
        df = df[np.in1d(pix2, big_pix)]
        df.index = np.arange(len(df))
    
    return df

In [15]:
def gen_tables(det_cats_files, true_cats_files, 
               dict_cut = {'z' : [-np.inf, np.inf], 'M500' : [-np.inf, np.inf], 
                        'b' :[-np.inf, np.inf]}, big_pix = None, match_dist=5/60):
    from astropy.coordinates import SkyCoord
    from astropy import units as u
    
    true_cats = {os.path.splitext(os.path.basename(file))[0] : pd.read_csv(file) for file in true_cats_files}
    det_cats = {get_name(os.path.splitext(os.path.basename(file))[0]) : 
                pd.read_csv(file) for file in det_cats_files}
    
    comp_df = []
    recall_df = []
    
    for name in det_cats:
        df = det_cats[name]
        df = df[df['status'] != 'fn']
        df.index = np.arange(len(df))
        if 'b' in dict_cut:
            det_cats[name] = cut_cat(df, {'b' : dict_cut['b']}, big_pix)
        else:
            det_cats[name] = cut_cat(df, {}, big_pix)
    for name in true_cats:
        true_cats[name] = cut_cat(true_cats[name], dict_cut, big_pix)
    
    for det_name in det_cats:
        det = det_cats[det_name]
        line = {}
        line_r = {}

        det_sc = SkyCoord(ra=np.array(det['RA'])*u.degree, 
                      dec=np.array(det['DEC'])*u.degree, frame='icrs') 

        for tr_name in true_cats: 
            tr = true_cats[tr_name]
            tr_sc = SkyCoord(ra=np.array(tr['RA'])*u.degree, 
                          dec=np.array(tr['DEC'])*u.degree, frame='icrs')
            
            idx, d2d, _ = tr_sc.match_to_catalog_sky(det_sc)
            matched = d2d.degree <= match_dist
            
            line[tr_name] = np.count_nonzero(matched)
            line[tr_name+'_err'], line[tr_name+'_std'] = calc_error(det, tr)

            line_r[tr_name] = line[tr_name] / len(tr)
            
        line['all'] = len(det)
        line['fp'] = np.count_nonzero(det['status'] == 'fp')
        line_r['fp'] = line['fp']
        line_r['all'] = line['all']
        comp_df.append(pd.DataFrame(line, index=[det_name]))
        recall_df.append(pd.DataFrame(line_r, index=[det_name]))
        line = {}
    
    for tr_name in true_cats: 
        line[tr_name] = len(true_cats[tr_name])
        line[tr_name+'_err'] = 0
    line['fp'] = 0
    line['all'] = 0
    comp_df.append(pd.DataFrame(line, index=['all']))
    
    comp_df = pd.concat(comp_df)
    recall_df = pd.concat(recall_df)
    
    return comp_df, recall_df

In [10]:
tr_dir = '/home/rt2122/Data/original_catalogs/csv/'
true_cats_files = [os.path.join(tr_dir, name) for name in next(os.walk(tr_dir))[-1]]
det_dir = '/home/rt2122/Data/detected_cats/'
det_cats_files = [os.path.join(det_dir, name) for name in sorted(next(os.walk(det_dir))[-1]) if 'full' in name]
det_cats_files = [file for file in det_cats_files if 'pz_act' in file]

In [13]:
cur_pix = list(set(range(48)) - set(train_pix_act))
print(repr(train_pix_act))
repr(list(range(48)))

[2, 8, 10, 11, 13, 15, 32, 33, 36, 37, 40, 42, 43, 44, 45, 46]


'[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47]'

In [20]:
comp, recall = gen_tables(det_cats_files, true_cats_files, {}, cur_pix)
comp

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['b'] = sc.galactic.b.degree
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['b'] = sc.galactic.b.degree


Unnamed: 0,PSZ2,PSZ2_err,PSZ2_std,MCXC,MCXC_err,MCXC_std,RM,RM_err,RM_std,ACT,ACT_err,ACT_std,all,fp
pz_act06,724,6.2,0.438898,340,4.45,0.472814,336,37.05,1.513927,124,2.4,0.365629,13921,13110
pz_act10,829,7.3,0.56242,388,6.6,0.554408,460,60.3,1.504554,165,3.35,0.482728,13061,12089
pz_act14,815,7.05,0.737974,396,7.4,0.549641,488,70.75,2.040221,171,4.0,0.528155,12472,11503
pz_act20,718,4.2,0.495241,351,4.2,0.531136,390,52.5,1.477017,134,2.75,0.383028,7325,6508
pz_act25,755,7.45,0.697646,365,7.15,0.577084,413,56.55,1.936458,139,3.5,0.455955,12580,11712
all,1025,0.0,,1041,0.0,,12452,0.0,,898,0.0,,0,0


In [22]:
recall

Unnamed: 0,PSZ2,MCXC,RM,ACT,fp,all
pz_act06,0.706341,0.326609,0.026984,0.138085,13110,13921
pz_act10,0.80878,0.372719,0.036942,0.183742,12089,13061
pz_act14,0.795122,0.380403,0.03919,0.190423,11503,12472
pz_act20,0.700488,0.337176,0.03132,0.14922,6508,7325
pz_act25,0.736585,0.350624,0.033167,0.154788,11712,12580


In [23]:
comp, recall = gen_tables(det_cats_files, true_cats_files, {'b' : [20, np.inf]}, cur_pix)
comp

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['b'] = sc.galactic.b.degree


Unnamed: 0,PSZ2,PSZ2_err,PSZ2_std,MCXC,MCXC_err,MCXC_std,RM,RM_err,RM_std,ACT,ACT_err,ACT_std,all,fp
pz_act06,559,1.65,0.418487,285,1.95,0.234801,335,33.6,1.498069,124,1.85,0.310136,2550,1918
pz_act10,625,3.1,0.339504,330,3.55,0.294467,458,53.65,1.46947,165,2.4,0.406526,4031,3278
pz_act14,622,4.15,0.405716,337,3.75,0.512348,486,69.5,1.618154,171,2.95,0.351501,4842,4082
pz_act20,567,2.95,0.438148,300,3.4,0.499473,388,50.75,1.923367,134,2.35,0.318508,3451,2796
pz_act25,581,2.85,0.301531,306,3.8,0.45073,411,54.15,1.775973,139,2.5,0.344124,4185,3509
all,744,0.0,,921,0.0,,12416,0.0,,898,0.0,,0,0


In [24]:
recall

Unnamed: 0,PSZ2,MCXC,RM,ACT,fp,all
pz_act06,0.751344,0.309446,0.026981,0.138085,1918,2550
pz_act10,0.840054,0.358306,0.036888,0.183742,3278,4031
pz_act14,0.836022,0.365907,0.039143,0.190423,4082,4842
pz_act20,0.762097,0.325733,0.03125,0.14922,2796,3451
pz_act25,0.780914,0.332248,0.033102,0.154788,3509,4185


In [26]:
true_cats_files = [file for file in true_cats_files if not ('RM' in file)]

In [27]:
comp, recall = gen_tables(det_cats_files, true_cats_files, {'b' : [20, np.inf], 'M500' : [3, np.inf]}, cur_pix)
comp

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['b'] = sc.galactic.b.degree


Unnamed: 0,PSZ2,PSZ2_err,PSZ2_std,MCXC,MCXC_err,MCXC_std,ACT,ACT_err,ACT_std,all,fp
pz_act06,352,0.9,0.250263,151,0.55,0.184605,83,0.45,0.135239,2550,1918
pz_act10,386,2.0,0.261574,160,0.7,0.146898,100,0.85,0.181731,4031,3278
pz_act14,388,2.5,0.366348,165,1.1,0.339504,109,0.85,0.195677,4842,4082
pz_act20,361,2.0,0.403928,159,0.75,0.239242,88,0.8,0.236198,3451,2796
pz_act25,371,1.45,0.245753,160,0.65,0.208693,89,0.6,0.168585,4185,3509
all,425,0.0,,200,0.0,,256,0.0,,0,0


In [28]:
recall

Unnamed: 0,PSZ2,MCXC,ACT,fp,all
pz_act06,0.828235,0.755,0.324219,1918,2550
pz_act10,0.908235,0.8,0.390625,3278,4031
pz_act14,0.912941,0.825,0.425781,4082,4842
pz_act20,0.849412,0.795,0.34375,2796,3451
pz_act25,0.872941,0.8,0.347656,3509,4185


In [29]:
comp, recall = gen_tables(det_cats_files, true_cats_files, {'b' : [20, np.inf], 'M500' : [4, np.inf]}, cur_pix)
comp

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['b'] = sc.galactic.b.degree


Unnamed: 0,PSZ2,PSZ2_err,PSZ2_std,MCXC,MCXC_err,MCXC_std,ACT,ACT_err,ACT_std,all,fp
pz_act06,289,0.65,0.195677,97,0.45,0.169752,53,0.0,0.0,2550,1918
pz_act10,314,1.7,0.281911,103,0.5,0.114708,61,0.05,0.05,4031,3278
pz_act14,314,1.9,0.289282,105,0.65,0.28354,68,0.3,0.127733,4842,4082
pz_act20,294,1.55,0.373286,103,0.5,0.153897,55,0.3,0.127733,3451,2796
pz_act25,302,1.25,0.25,106,0.4,0.152177,55,0.25,0.09934,4185,3509
all,338,0.0,,120,0.0,,88,0.0,,0,0


In [30]:
recall

Unnamed: 0,PSZ2,MCXC,ACT,fp,all
pz_act06,0.85503,0.808333,0.602273,1918,2550
pz_act10,0.928994,0.858333,0.693182,3278,4031
pz_act14,0.928994,0.875,0.772727,4082,4842
pz_act20,0.869822,0.858333,0.625,2796,3451
pz_act25,0.893491,0.883333,0.625,3509,4185
