In [57]:
import numpy as np
import pandas as pd
import os
from DS_data_transformation import get_prm
from DS_Planck_Unet import east_val
from astropy.coordinates import SkyCoord
from astropy import units as u
from DS_healpix_fragmentation import radec2pix

In [80]:
def stat_orig_cats_simple(det_cats_dict, big_pix=None, true_cats_dir='/home/rt2122/Data/original_catalogs/csv/', match_dist=5/60):
    import os
    from astropy.coordinates import SkyCoord
    from astropy import units as u
    from DS_data_transformation import calc_error
    from DS_healpix_fragmentation import radec2pix
    import numpy as np
    import pandas as pd
    
    true_cats_files = next(os.walk(true_cats_dir))[-1]
    true_cats_files = [os.path.join(true_cats_dir, file) for file in true_cats_files]
    
    true_cats = {os.path.splitext(os.path.basename(file))[0] : pd.read_csv(file) for file in true_cats_files}
    det_cats = {name : 
                pd.read_csv(det_cats_dict[name]) for name in det_cats_dict}
    
    recall_df = []
    if not (big_pix is None):
        for tr_name in true_cats:
            tr = true_cats[tr_name]
            pix = radec2pix(tr['RA'], tr['DEC'], 2)
            tr = tr[np.in1d(pix, big_pix)]
            tr.index = np.arange(len(tr))
            true_cats[tr_name] = tr
    
    for name in det_cats:
        df = det_cats[name]
        df = df[df['status'] != 'fn']
        df.index = np.arange(len(df))
        det_cats[name] = df
    
    for det_name in det_cats:
        det = det_cats[det_name]
        line_r = {}

        det_sc = SkyCoord(ra=np.array(det['RA'])*u.degree, 
                      dec=np.array(det['DEC'])*u.degree, frame='icrs') 

        for tr_name in true_cats: 
            tr = true_cats[tr_name]
            tr_sc = SkyCoord(ra=np.array(tr['RA'])*u.degree, 
                          dec=np.array(tr['DEC'])*u.degree, frame='icrs')
            
            idx, d2d, _ = tr_sc.match_to_catalog_sky(det_sc)
            matched = d2d.degree <= match_dist
            line_r[tr_name] = np.count_nonzero(matched) / len(tr)
            
        line_r['precision'] = 1 - np.count_nonzero(det['status'] == 'fp') / len(det)
        recall_df.append(pd.DataFrame(line_r, index=[det_name]))
    
    recall_df = pd.concat(recall_df)
    return recall_df


In [71]:
cat_d = {1 : '/home/rt2122/Data/detected_cats/all_found4/val_all_found4_ep1_thr0.1_step8.csv', 
        2 : '/home/rt2122/Data/detected_cats/all_found4/val_all_found4_ep2_thr0.1_step8.csv'}

In [72]:
recall_df = stat_orig_cats_simple(cat_d, big_pix=east_val)

In [73]:
recall_df

Unnamed: 0,PSZ2,MCXC,RM,ACT,fp
1,0.567797,0.3,0.027778,0.099237,650
2,0.686441,0.323077,0.024573,0.137405,225


In [74]:
dirname = '/home/rt2122/Data/detected_cats/all_found4/'
files = next(os.walk(dirname))[-1]
cat_d = {int(get_prm('ep', s)) : os.path.join(dirname, s) for s in files}

In [81]:
recall_df = stat_orig_cats_simple(cat_d, big_pix=east_val)

In [82]:
recall_df = recall_df.sort_index()
recall_df

Unnamed: 0,PSZ2,MCXC,RM,ACT,precision
1,0.567797,0.3,0.027778,0.099237,0.104683
2,0.686441,0.323077,0.024573,0.137405,0.287975
3,0.813559,0.353846,0.028846,0.160305,0.265664
4,0.864407,0.369231,0.035256,0.160305,0.217143
5,0.898305,0.369231,0.037749,0.183206,0.235409
6,0.872881,0.369231,0.036681,0.175573,0.234343
7,0.923729,0.376923,0.046652,0.21374,0.184814
8,0.889831,0.369231,0.037393,0.160305,0.219925
9,0.898305,0.376923,0.041667,0.183206,0.206133
10,0.915254,0.376923,0.042379,0.198473,0.200637


In [44]:
pd.read_csv(cat_d[1])

Unnamed: 0,RA,DEC,area,min_rad,max_rad,mean_rad,min_pred,max_pred,tRA,tDEC,status,catalog,M500,z
0,15.688053,13.637115,7.0,0.653364,2.563909,1.693181,0.102670,0.176453,,,fp,,,
1,20.543867,18.537875,3.0,0.350943,2.125569,1.445601,0.106537,0.115303,,,fp,,,
2,21.254802,19.811888,1.0,0.000000,1.414214,0.853553,0.102464,0.102464,,,fp,,,
3,7.318530,4.908482,2.0,0.460816,1.770306,1.078048,0.110242,0.128990,,,fp,,,
4,8.917532,6.928533,1.0,0.000000,1.414214,0.853553,0.114104,0.114104,,,fp,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
968,209.942238,72.121196,,,,,,,209.942238,72.121196,fn,planck_z,3.778043,0.2143
969,238.760353,84.163702,,,,,,,238.760353,84.163702,fn,planck_z,4.331743,0.1940
970,205.487900,74.451412,,,,,,,205.487900,74.451412,fn,planck_z,5.507178,0.4780
971,261.230797,85.891243,,,,,,,261.230797,85.891243,fn,planck_z,5.982977,0.1780


In [45]:
def stat_split_cats_simple(files, big_pix=list(range(48))):
    import pandas as pd
    import numpy as np
    from DS_healpix_fragmentation import radec2pix

    res_df = []
    for model in files:
        df = pd.read_csv(files[model])
        df = df.iloc[np.in1d(radec2pix(df['RA'], df['DEC'], 2), big_pix)]
        df.index = np.arange(len(df))
        line = {}
        for cat in ['planck_z', 'planck_no_z', 'mcxcwp', 'actwp']:
            cur_df = df[df['catalog'] == cat]
            cur_df.index = np.arange(len(cur_df))
            line[cat] =  np.count_nonzero(cur_df['status'] == 'tp')/ len(cur_df)
        line['fp'] = np.count_nonzero(df['status'] == 'fp')
        res_df.append(pd.DataFrame(line, index=[model]))
    res_df = pd.concat(res_df)
    return res_df

In [49]:
recall = stat_split_cats_simple(cat_d)

In [51]:
recall.sort_index()

Unnamed: 0,planck_z,planck_no_z,mcxcwp,actwp,fp
1,0.655556,0.178571,0.068182,0.051282,650
2,0.755556,0.357143,0.079545,0.051282,225
3,0.866667,0.535714,0.056818,0.068376,293
4,0.911111,0.607143,0.079545,0.068376,411
5,0.955556,0.607143,0.079545,0.094017,393
6,0.933333,0.571429,0.068182,0.08547,379
7,0.944444,0.75,0.090909,0.128205,569
8,0.933333,0.642857,0.079545,0.068376,415
9,0.933333,0.678571,0.079545,0.094017,466
10,0.944444,0.714286,0.090909,0.111111,502


In [66]:
df = pd.read_csv('/home/rt2122/Data/detected_cats/all_found4/val_all_found4_ep1_thr0.1_step8.csv')
df = df[np.in1d(radec2pix(df['RA'], df['DEC'], 2), east_val)]
df = df[df['status'] != 'fn']

In [67]:
psz2 = pd.read_csv('/home/rt2122/Data/original_catalogs/csv/PSZ2.csv')
psz2 = psz2[np.in1d(radec2pix(psz2['RA'], psz2['DEC'], 2), east_val)]

In [68]:
df_sc = SkyCoord(ra=df['RA']*u.degree, dec=df['DEC']*u.degree, frame='icrs')
p_sc = SkyCoord(ra=psz2['RA']*u.degree, dec=psz2['DEC']*u.degree, frame='icrs')

In [69]:
_, d2d, _ = p_sc.match_to_catalog_sky(df_sc)
matched = d2d.degree <= 5/60
len(psz2), np.count_nonzero(matched)

(118, 67)

In [61]:
df[df['catalog'] == 'planck_z']
df[df['status'] == 'fn']

Unnamed: 0,RA,DEC,area,min_rad,max_rad,mean_rad,min_pred,max_pred,tRA,tDEC,status,catalog,M500,z
309,0.030000,8.274000,,,,,,,0.030000,8.274000,fn,mcxcwp,0.737300,0.0396
310,0.103000,-2.625000,,,,,,,0.103000,-2.625000,fn,mcxcwp,0.329700,0.0379
311,1.247000,11.701000,,,,,,,1.247000,11.701000,fn,mcxcwp,1.300900,0.0761
312,3.583000,8.900000,,,,,,,3.583000,8.900000,fn,mcxcwp,2.721800,0.1630
313,3.983000,16.249000,,,,,,,3.983000,16.249000,fn,mcxcwp,0.963800,0.0830
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
968,209.942238,72.121196,,,,,,,209.942238,72.121196,fn,planck_z,3.778043,0.2143
969,238.760353,84.163702,,,,,,,238.760353,84.163702,fn,planck_z,4.331743,0.1940
970,205.487900,74.451412,,,,,,,205.487900,74.451412,fn,planck_z,5.507178,0.4780
971,261.230797,85.891243,,,,,,,261.230797,85.891243,fn,planck_z,5.982977,0.1780


In [85]:
recall_df.to_csv('/home/rt2122/Data/models_stat/all_found4_recall.csv')

In [84]:
recall_df.index.name='epoch'