Пояснение по gen_table: параметр dict_cut выбирает, как обрезать каталоги.

Например при dict_cut = {'z' : [0.1, 0.3]} в каталогах останутся только объекты с 0.1 =< z < 0.3. 

Для b используется модуль: dict_cut = {'b' : [20, np.inf]} оставит в каталогах объекты для которых |b| >= 20 

In [1]:
import numpy as np
import pandas as pd
import os
from matplotlib import pyplot as plt
from astropy.io import fits
from astropy.table import Table
from astropy.coordinates import SkyCoord
from astropy import units as u
from DS_healpix_fragmentation import radec2pix
from DS_Planck_Unet import val_pix, test_pix, train_pix_act
from DS_data_transformation import calc_error

In [2]:
def get_name(file):
    if 'pz_act' in file:
        return file[5:13]
    return file[5:9]

In [3]:
def cut_cat(df, dict_cut = {'z' : [-np.inf, np.inf], 'M500' : [-np.inf, np.inf], 'b' :[-np.inf, np.inf]}, 
           big_pix=None):
 
    sc = SkyCoord(ra=np.array(df['RA'])*u.degree, 
                  dec=np.array(df['DEC'])*u.degree, frame='icrs')
    df['b'] = sc.galactic.b.degree
    for prm in dict_cut:
        if prm == 'b':
            df = df[np.abs(df[prm]) >= dict_cut[prm][0]]
            df = df[np.abs(df[prm]) < dict_cut[prm][1]]
            df.index = np.arange(len(df))
        else: 
            df = df[df[prm] >= dict_cut[prm][0]]
            df = df[df[prm] < dict_cut[prm][1]]
            df.index = np.arange(len(df))
    
    if not (big_pix is None):
        pix2 = radec2pix(df['RA'], df['DEC'], 2)
        df = df[np.in1d(pix2, big_pix)]
        df.index = np.arange(len(df))
    
    return df

In [4]:
def gen_tables(det_cats_files, true_cats_files, dict_cut = {'z' : [-np.inf, np.inf], 'M500' : [-np.inf, np.inf], 
                        'b' :[-np.inf, np.inf]}, big_pix = None, match_dist=5/60, n_try=200):
    
    true_cats = {os.path.splitext(os.path.basename(file))[0] : pd.read_csv(file) for file in true_cats_files}
    det_cats = {get_name(os.path.splitext(os.path.basename(file))[0]) : 
                pd.read_csv(file) for file in det_cats_files}
    
    comp_df = []
    recall_df = []
    
    for name in det_cats:
        df = det_cats[name]
        df = df[df['status'] != 'fn']
        df.index = np.arange(len(df))
        if 'b' in dict_cut:
            det_cats[name] = cut_cat(df, {'b' : dict_cut['b']}, big_pix)
        else:
            det_cats[name] = cut_cat(df, {}, big_pix)
    for name in true_cats:
        true_cats[name] = cut_cat(true_cats[name], dict_cut, big_pix)
    
    for det_name in det_cats:
        det = det_cats[det_name]
        line = {}
        line_r = {}

        det_sc = SkyCoord(ra=np.array(det['RA'])*u.degree, 
                      dec=np.array(det['DEC'])*u.degree, frame='icrs') 

        for tr_name in true_cats: 
            tr = true_cats[tr_name]
            tr_sc = SkyCoord(ra=np.array(tr['RA'])*u.degree, 
                          dec=np.array(tr['DEC'])*u.degree, frame='icrs')
            
            idx, d2d, _ = tr_sc.match_to_catalog_sky(det_sc)
            matched = d2d.degree <= match_dist
            
            line[tr_name] = np.count_nonzero(matched)
            line[tr_name+'_err'], line[tr_name+'_std'] = calc_error(det, tr, n_try=n_try)

            line_r[tr_name] = line[tr_name] / len(tr)
            
        line['all'] = len(det)
        line['fp'] = np.count_nonzero(det['status'] == 'fp')
        line_r['fp'] = line['fp']
        line_r['all'] = line['all']
        comp_df.append(pd.DataFrame(line, index=[det_name]))
        recall_df.append(pd.DataFrame(line_r, index=[det_name]))
        line = {}
    
    for tr_name in true_cats: 
        line[tr_name] = len(true_cats[tr_name])
        line[tr_name+'_err'] = 0
    line['fp'] = 0
    line['all'] = 0
    comp_df.append(pd.DataFrame(line, index=['all']))
    
    comp_df = pd.concat(comp_df)
    recall_df = pd.concat(recall_df)
    return comp_df, recall_df

In [19]:
def extr_one_model(comp, recall, model, line_name):
    if model is None:
        model = comp.index[0]
    tr_cats = [name for name in list(recall) if not ('all' in name or 'fp' in name)]
    line = {}
    for name in tr_cats:
        line[name] = '{:.2f}'.format(recall.loc[model, name])
        line[name+'*'] = '{}/{}/{:.2f}'.format(
            comp.loc[model, name], comp.loc['all', name], comp.loc[model, name+'_err'])
    return pd.DataFrame(line, index=[line_name])

In [6]:
tr_dir = '/home/rt2122/Data/original_catalogs/csv/'
true_cats_files = [os.path.join(tr_dir, name) for name in next(os.walk(tr_dir))[-1] if not ('RM' in name)]
det_dir = '/home/rt2122/Data/detected_cats/'
det_cats_files = [os.path.join(det_dir, name) for name in sorted(next(os.walk(det_dir))[-1]) if 'full' in name]

In [20]:
m500_table1 = []
cur_dcat = ['/home/rt2122/Data/detected_cats/full_pz14_thr0.1_step8.csv']
for thr in [5, 4, 3, -np.inf]:
    comp, recall = gen_tables(cur_dcat, true_cats_files, dict_cut={'b' : [20, np.inf], 'M500' : [thr, np.inf]})
    name = 'M500 > {}'.format(thr)
    if thr == -np.inf:
        name = 'all'
    line = extr_one_model(comp, recall, None, name)
    m500_table1.append(line)
m500_table1 = pd.concat(m500_table1)
print(m500_table1.to_latex())

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['b'] = sc.galactic.b.degree


\begin{tabular}{lllllll}
\toprule
{} &  PSZ2 &            PSZ2* &  MCXC &           MCXC* &   ACT &            ACT* \\
\midrule
M500 > 5 &  0.99 &     411/417/4.04 &  0.88 &    107/121/1.10 &  0.92 &    140/153/1.50 \\
M500 > 4 &  0.98 &     605/619/6.00 &  0.86 &    199/232/2.12 &  0.75 &    258/342/3.42 \\
M500 > 3 &  0.97 &     753/775/7.35 &  0.82 &    309/378/3.35 &  0.48 &    465/975/9.71 \\
all      &  0.91 &  1226/1342/12.09 &  0.40 &  647/1612/15.30 &  0.20 &  858/4195/42.53 \\
\bottomrule
\end{tabular}



In [21]:
m500_table2 = []
cur_dcat = ['/home/rt2122/Data/detected_cats/full_pz_act10_thr0.1_step8.csv']
for thr in [5, 4, 3, -np.inf]:
    comp, recall = gen_tables(cur_dcat, true_cats_files, dict_cut={'b' : [20, np.inf], 'M500' : [thr, np.inf]})
    name = 'M500 > {}'.format(thr)
    if thr == -np.inf:
        name = 'all'
    line = extr_one_model(comp, recall, None, name)
    m500_table2.append(line)
m500_table2 = pd.concat(m500_table2)
print(m500_table2.to_latex())

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['b'] = sc.galactic.b.degree


\begin{tabular}{lllllll}
\toprule
{} &  PSZ2 &           PSZ2* &  MCXC &           MCXC* &   ACT &            ACT* \\
\midrule
M500 > 5 &  0.95 &    397/417/2.71 &  0.88 &    107/121/0.84 &  0.86 &    132/153/1.09 \\
M500 > 4 &  0.93 &    575/619/3.92 &  0.84 &    196/232/1.57 &  0.73 &    248/342/2.33 \\
M500 > 3 &  0.91 &    709/775/4.80 &  0.78 &    295/378/2.49 &  0.48 &    470/975/6.57 \\
all      &  0.85 &  1134/1342/7.93 &  0.37 &  599/1612/10.07 &  0.22 &  940/4195/30.53 \\
\bottomrule
\end{tabular}



In [22]:
m500_table1_val = []
cur_dcat = ['/home/rt2122/Data/detected_cats/full_pz14_thr0.1_step8.csv']
for thr in [5, 4, 3, -np.inf]:
    comp, recall = gen_tables(cur_dcat, true_cats_files, dict_cut={'b' : [20, np.inf], 'M500' : [thr, np.inf]},
                             big_pix=val_pix)
    name = 'M500 > {}'.format(thr)
    if thr == -np.inf:
        name = 'all'
    line = extr_one_model(comp, recall, None, name)
    m500_table1_val.append(line)
m500_table1_val = pd.concat(m500_table1_val)
print(m500_table1_val.to_latex())

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['b'] = sc.galactic.b.degree
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['b'] = sc.galactic.b.degree


\begin{tabular}{lllllll}
\toprule
{} &  PSZ2 &         PSZ2* &  MCXC &        MCXC* &   ACT &          ACT* \\
\midrule
M500 > 5 &  0.95 &    42/44/0.23 &  0.82 &    9/11/0.07 &  1.00 &    24/24/0.11 \\
M500 > 4 &  0.96 &    72/75/0.35 &  0.84 &   16/19/0.10 &  0.78 &    36/46/0.17 \\
M500 > 3 &  0.97 &    92/95/0.42 &  0.84 &   26/31/0.14 &  0.50 &   66/133/0.54 \\
all      &  0.93 &  148/160/0.74 &  0.43 &  72/166/0.72 &  0.21 &  127/602/2.17 \\
\bottomrule
\end{tabular}



In [24]:
m500_table2_val = []
cur_dcat = ['/home/rt2122/Data/detected_cats/full_pz_act10_thr0.1_step8.csv']
for thr in [5, 4, 3, -np.inf]:
    comp, recall = gen_tables(cur_dcat, true_cats_files, dict_cut={'b' : [20, np.inf], 'M500' : [thr, np.inf]},
                             big_pix=val_pix)
    name = 'M500 > {}'.format(thr)
    if thr == -np.inf:
        name = 'all'
    line = extr_one_model(comp, recall, None, name)
    m500_table2_val.append(line)
m500_table2_val = pd.concat(m500_table2_val)
print(m500_table2_val.to_latex())

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['b'] = sc.galactic.b.degree


\begin{tabular}{lllllll}
\toprule
{} &  PSZ2 &         PSZ2* &  MCXC &        MCXC* &   ACT &         ACT* \\
\midrule
M500 > 5 &  0.93 &    41/44/0.22 &  0.82 &    9/11/0.09 &  0.88 &   21/24/0.08 \\
M500 > 4 &  0.93 &    70/75/0.32 &  0.79 &   15/19/0.12 &  0.63 &   29/46/0.13 \\
M500 > 3 &  0.93 &    88/95/0.40 &  0.74 &   23/31/0.17 &  0.37 &  49/133/0.45 \\
all      &  0.90 &  144/160/0.58 &  0.39 &  65/166/0.58 &  0.16 &  96/602/1.67 \\
\bottomrule
\end{tabular}



In [36]:
m500_table1_test = []
cur_dcat = ['/home/rt2122/Data/detected_cats/full_pz14_thr0.1_step8.csv']
cur_tcat = [name for name in true_cats_files if not ('ACT' in name)]
for thr in [5, 4, 3, -np.inf]:
    comp, recall = gen_tables(cur_dcat, cur_tcat, dict_cut={'b' : [20, np.inf], 'M500' : [thr, np.inf]},
                             big_pix=test_pix)
    name = 'M500 > {}'.format(thr)
    if thr == -np.inf:
        name = 'all'
    line = extr_one_model(comp, recall, None, name)
    m500_table1_test.append(line)
m500_table1_test = pd.concat(m500_table1_test)
m500_table1_test

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['b'] = sc.galactic.b.degree


Unnamed: 0,PSZ2,PSZ2*,MCXC,MCXC*
M500 > 5,1.0,9/9/0.01,1.0,2/2/0.0
M500 > 4,0.952381,20/21/0.065,0.909091,10/11/0.03
M500 > 3,0.928571,26/28/0.085,0.857143,12/14/0.055
all,0.896552,52/58/0.18,0.328571,23/70/0.165


In [37]:
m500_table2_test = []
cur_dcat = ['/home/rt2122/Data/detected_cats/full_pz_act10_thr0.1_step8.csv']
for thr in [5, 4, 3, -np.inf]:
    comp, recall = gen_tables(cur_dcat, cur_tcat, dict_cut={'b' : [20, np.inf], 'M500' : [thr, np.inf]},
                             big_pix=test_pix)
    name = 'M500 > {}'.format(thr)
    if thr == -np.inf:
        name = 'all'
    line = extr_one_model(comp, recall, None, name)
    m500_table2_test.append(line)
m500_table2_test = pd.concat(m500_table2_test)
m500_table2_test

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['b'] = sc.galactic.b.degree


Unnamed: 0,PSZ2,PSZ2*,MCXC,MCXC*
M500 > 5,0.888889,8/9/0.02,1.0,2/2/0.0
M500 > 4,0.857143,18/21/0.03,0.818182,9/11/0.015
M500 > 3,0.892857,25/28/0.035,0.785714,11/14/0.02
all,0.724138,42/58/0.125,0.3,21/70/0.045


In [18]:
'{:2f}'.format(0.314234)

'0.314234'