In [4]:
import numpy as np
import pandas as pd
from DS_detector import *
from DS_Planck_Unet import load_planck_model
from matplotlib import pyplot as plt
from skimage.filters import roberts 
from skimage.draw import circle



In [5]:
def pack_all_catalogs(cat_dir='/home/rt2122/Data/clusters/'):
    import os
    import pandas as pd
    
    all_cats = []
    files = next(os.walk(cat_dir))[-1]
    for file in files:
        df = pd.read_csv(os.path.join(cat_dir, file))
        df['catalog'] = file[:-4]
        all_cats.append(df)
    all_cats = pd.concat(all_cats)
    all_cats.index = np.arange(all_cats.shape[0])
    
    return all_cats

In [6]:
def gen_pics_for_detection_new(ipix, model, nside=2, depth=10, step=64, size=64, 
        mask_radius=15/60, clusters_dir='/home/rt2122/Data/clusters/'):
    from DS_healpix_fragmentation import one_pixel_fragmentation, pix2radec, radec2pix
    from DS_Planck_Unet import draw_pic_with_mask, draw_pic
    import pandas as pd
    import numpy as np
    import healpy as hp
    import os
    
    true_clusters = pack_all_catalogs(clusters_dir)
 
    big_matr = one_pixel_fragmentation(nside, ipix, depth)
    big_pic, big_mask = draw_pic_with_mask(center=None, matr=big_matr, 
                            mask_radius=mask_radius,
                            clusters_arr=np.array(true_clusters[['RA', 'DEC']]))
    
    pics, matrs, masks = [], [], []
    for i in range(0, big_matr.shape[0], step):
        for j in range(0, big_matr.shape[1], step):
            pic = big_pic[i:i+size,j:j+size,:]
            mask = big_mask[i:i+size,j:j+size,:]
            matr = big_matr[i:i+size,j:j+size]
            
            if pic.shape == (size, size, pic.shape[-1]):
                if np.count_nonzero(mask) > 0:
                    pics.append(pic)
                    matrs.append(matr)
                    masks.append(mask)
 
    
    ans = model.predict(np.array(pics))
    return {'true_clusters' : true_clusters,
            'pics' : pics, 'matrs' : matrs, 'masks' : masks, 'ans' : ans} 


In [7]:
def get_radius(figure, center):
    import numpy as np
    from skimage.filters import roberts
    center = np.array(center)
    
    edge = np.where(roberts(figure) != 0)
    min_rad = figure.shape[0]
    max_rad = 0
    
    for point in zip(*edge):
        rad = np.linalg.norm(center - np.array(point))
        min_rad = min(min_rad, rad)
        max_rad = max(max_rad, rad)
    
    return min_rad, max_rad

In [29]:
def find_centers_on_mask_new(mask, thr, binary=True):
    import numpy as np

    mask_binary = np.copy(mask)
    mask_binary = np.array(mask_binary >= thr, dtype=np.float32)
    
    figures = divide_figures(mask_binary)
    centers = []
    areas = []
    min_rad = []
    max_rad = []
    min_pred = []
    max_pred = []
    for figure in figures:
        f = np.zeros_like(mask)
        f[np.where(figure)] = mask[np.where(figure)]

        if not binary:
            centers.append(find_centroid(f))
        else:
            centers.append(find_centroid(figure))
        
        areas.append(np.count_nonzero(figure))
        rads = get_radius(figure[:,:,0], centers[-1])
        min_rad.append(rads[0])
        max_rad.append(rads[1])
        min_pred.append(np.partition(list(set(f.flatten())), 1)[1])
        max_pred.append(f.max())

    return {'centers' : np.array(centers), 'areas' : np.array(areas), 
            'min_rad' : np.array(min_rad), 'max_rad' : np.array(max_rad),
           'min_pred': np.array(min_pred), 'max_pred' : np.array(max_pred)}

In [38]:
def gen_pics_for_detection_new(ipix, model, big_nside=2, step=64, size=64, depth=10, 
        mask_radius=15/60, clusters_dir='/home/rt2122/Data/clusters/'):
    from DS_healpix_fragmentation import one_pixel_fragmentation, pix2radec, radec2pix
    from DS_Planck_Unet import draw_pic_with_mask, draw_pic
    import pandas as pd
    import numpy as np
    import healpy as hp
    import os
    
    true_clusters = pack_all_catalogs(clusters_dir)
    clusters_pix = radec2pix(true_clusters['RA'], true_clusters['DEC'], 2)
    true_clusters = true_clusters[clusters_pix == ipix]
    true_clusters.index = np.arange(true_clusters.shape[0])
 
    big_matr = one_pixel_fragmentation(big_nside, ipix, depth)
    big_pic, big_mask = draw_pic_with_mask(center=None, matr=big_matr, 
                            mask_radius=mask_radius,
                            clusters_arr=np.array(true_clusters[['RA', 'DEC']]))
    
    pics, matrs, masks = [], [], []
    for i in range(0, big_matr.shape[0], step):
        for j in range(0, big_matr.shape[1], step):
            pic = big_pic[i:i+size,j:j+size,:]
            mask = big_mask[i:i+size,j:j+size,:]
            matr = big_matr[i:i+size,j:j+size]
            
            if pic.shape == (size, size, pic.shape[-1]):
                if np.count_nonzero(mask) > 0:
                    pics.append(pic)
                    matrs.append(matr)
                    masks.append(mask)
 
    
    ans = model.predict(np.array(pics))
    return {'true_clusters' : true_clusters,
            'pics' : pics, 'matrs' : matrs, 'masks' : masks, 'ans' : ans} 


In [10]:
def detect_clusters_on_pic_new(ans, matr, thr, binary):
    import numpy as np
    dd = find_centers_on_mask_new(ans, thr, binary)
    if len(dd['centers']) > 0:
        centers = np.array(dd['centers'], dtype=np.int32)
        dd['centers'] = matr[centers[:,0], centers[:,1]]
    return dd

In [182]:
def detect_clusters_new(all_dict, thr, base_nside=2048, tp_dist=5/60, 
                        fp_dist=15/60, binary=False, ret_coords=True):
    import numpy as np
    import pandas as pd
    from DS_healpix_fragmentation import pix2radec
    from astropy.coordinates import SkyCoord
    from astropy import units as u
    
    masks = all_dict['masks']
    ans = all_dict['ans']
    matrs = all_dict['matrs']
    true_clusters = all_dict['true_clusters']
    
    res_cat = pd.DataFrame({'RA' : [], 'DEC' : [], 'area' : [], 
                      'min_rad' : [], 'max_rad' : [],
                      'min_pred' : [], 'max_pred' : []})
    res_cat_sc = None
    
    params = ['tp', 'fp', 'tn', 'fn']
    stat_df = dict(zip(params, [0] * len(params)))
    
    for i in range(len(ans)):
        dd_pic = detect_clusters_on_pic_new(ans[i], matrs[i], thr, binary)
        centers = dd_pic['centers']
        
        if np.count_nonzero(masks[i]) and len(centers) == 0:
            stat_df['tn'] += 1
        
        if len(centers) > 0: 
            centers = pix2radec(centers, nside=base_nside)
            sc = SkyCoord(ra=centers[0]*u.degree, 
                          dec=centers[1]*u.degree, frame='icrs')
            if res_cat_sc is None:
                res_cat['RA'] = sc.icrs.ra.degree
                res_cat['DEC'] = sc.icrs.dec.degree
                res_cat['area'] = dd_pic['areas']
                res_cat['min_rad'] = dd_pic['min_rad']
                res_cat['max_rad'] = dd_pic['max_rad']
                res_cat['min_pred'] = dd_pic['min_pred']
                res_cat['max_pred'] = dd_pic['max_pred']
                res_cat_sc = sc
            else: 
                idx, d2d, _ = sc.match_to_catalog_sky(res_cat_sc)
                res_cat_new_idx = d2d.degree >  fp_dist
                res_cat_new = pd.DataFrame({'RA':centers[0][res_cat_new_idx],
                                      'DEC':centers[1][res_cat_new_idx],
                                      'area' : dd_pic['areas'][res_cat_new_idx],          
                'min_rad' : dd_pic['min_rad'][res_cat_new_idx],
                'max_rad' : dd_pic['max_rad'][res_cat_new_idx],
                'min_pred' : dd_pic['min_pred'][res_cat_new_idx],
                'max_pred' : dd_pic['max_pred'][res_cat_new_idx],
                                      })
                res_cat = pd.concat([res_cat, res_cat_new])
                res_cat.index = np.arange(len(res_cat))
                res_cat_sc = SkyCoord(ra=res_cat['RA']*u.degree, dec=res_cat['DEC']*u.degree, 
                                 frame='icrs')
        
    true_clusters_sc = SkyCoord(ra=true_clusters['RA']*u.degree, 
                                dec=true_clusters['DEC']*u.degree, frame='icrs')
    
    idx, d2d, _ = res_cat_sc.match_to_catalog_sky(true_clusters_sc)
    matched_idx = d2d.degree <= tp_dist
    res_cat['status'] = ''
    res_cat['status'].iloc[matched_idx] = 'tp'
    res_cat['catalog'] = ''
    res_cat['catalog'].iloc[matched_idx] = np.array(
        true_clusters['catalog'][idx[matched_idx]])
    res_cat['status'].iloc[np.logical_not(matched_idx)] = 'fp'
    
    true_clusters['found'] = False
    true_clusters['found'].iloc[idx[matched_idx]] = True
    not_found = true_clusters[np.logical_not(true_clusters['found'])]
    
    fn = pd.DataFrame({'RA' : not_found['RA'], 'DEC' : not_found['DEC'], 
                      'catalog' : not_found['catalog'], 
                       'status' : ['fn'] * len(not_found)})
    
    res_cat = pd.concat([res_cat, fn])
    res_cat.index = np.arange(len(res_cat))
    if ret_coords:
        return res_cat
    stat_df['tp'] = np.count_nonzero(res_cat['status'] == 'tp')
    stat_df['fp'] = np.count_nonzero(res_cat['status'] == 'fp')
    stat_df['fn'] = np.count_nonzero(res_cat['status'] == 'fn')
    return stat_df

In [12]:
model = load_planck_model(
    '/home/rt2122/Models/planck_z/f8d2.ep0014-vl0.006570-l0.004067.hdf5')

In [58]:
all_dict = gen_pics_for_detection_new(6, model, 
                            clusters_dir='/home/rt2122/Data/clusters_planck_z/')

In [183]:
coords = detect_clusters_new(all_dict, 0.4)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_with_indexer(indexer, value)


In [179]:
coords.shape

(49, 9)

In [180]:
for status in ['tp', 'fn']:
    for cat in ['planck_z', 'planck_no_z', 'mcxcwp', 'act']:
        c = coords[coords['catalog'] == cat]
        c.index = np.arange(len(c))
        print(status, cat, np.count_nonzero(c['status'] == status))
print(np.count_nonzero(coords['status'] == 'fp'))
print(np.count_nonzero(coords['status'] == 'tp'))

tp planck_z 29
tp planck_no_z 0
tp mcxcwp 0
tp act 0
fn planck_z 11
fn planck_no_z 0
fn mcxcwp 0
fn act 0
9
29


In [173]:
planck_z = coords[coords['catalog'] == 'planck_z']

In [174]:
planck_z.shape

(32, 9)

In [175]:
planck_z

Unnamed: 0,RA,DEC,area,min_rad,max_rad,min_pred,max_pred,status,catalog
0,261.411216,85.87928,19.0,1.478872,3.613255,0.50819,0.998537,tp,planck_z
1,171.923132,71.105538,19.0,1.134615,3.556877,0.430976,0.922336,tp,planck_z
2,178.983605,73.566323,15.0,1.070389,3.143234,0.403708,0.760757,tp,planck_z
3,181.797649,71.68696,14.0,0.921796,3.343648,0.482473,0.81331,tp,planck_z
5,275.878223,78.388854,17.0,0.978495,3.508872,0.434119,0.984679,tp,planck_z
8,208.761411,77.263916,17.0,1.015839,3.472759,0.465352,0.990765,tp,planck_z
10,199.954068,70.0167,13.0,1.035318,3.262703,0.417573,0.812428,tp,planck_z
12,195.837588,67.458089,16.0,1.151331,3.340828,0.409737,0.939598,tp,planck_z
13,194.572598,65.373192,16.0,1.52936,3.576284,0.486278,0.973751,tp,planck_z
14,213.4853,71.292974,18.0,1.106321,3.376304,0.413182,0.974803,tp,planck_z


In [75]:
true_clusters = all_dict['true_clusters']

In [76]:
np.count_nonzero(true_clusters['catalog'] == 'planck_z')

40

In [74]:
coords.shape, coords.index[-1]

((49, 10), 48)

In [91]:
set(coords['catalog'])

{nan, 'planck_z'}

In [126]:
res_cat = pd.DataFrame({'RA' : [1, 2, 3], 'DEC' : [4, 5, 6], 'area' : [6, 6, 4]})

In [127]:
true_clusters = pd.DataFrame({'RA' : [1, 2], 'DEC' : [4, 5], 'catalog' : ['planck_z'] * 2})

In [128]:
res_cat['tp_cat'] = ''
matched_idx = np.array([True, True, False])
idx = np.array([0, 1, 2])
res_cat['catalog'] = ''

In [135]:
res_cat['catalog'] = ''
res_cat['catalog'].iloc[matched_idx]= true_clusters['catalog'][idx[matched_idx]]
res_cat['status'] = ''
res_cat['status'].iloc[matched_idx] = 'tp'
res_cat['status'].iloc[np.logical_not(matched_idx)] = 'fp'
    
true_clusters['found'] = False
true_clusters['found'].iloc[idx[matched_idx]] = True
not_found = true_clusters[np.logical_not(true_clusters['found'])]
print(np.count_nonzero(true_clusters['found']))

2


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_with_indexer(indexer, value)


In [136]:
res_cat

Unnamed: 0,RA,DEC,area,tp_cat,catalog,status
0,1,4,6,,planck_z,tp
1,2,5,6,,planck_z,tp
2,3,6,4,,,fp


In [181]:
coords[coords['status'] == 'tp']

Unnamed: 0,RA,DEC,area,min_rad,max_rad,min_pred,max_pred,status,catalog
0,261.411216,85.87928,19.0,1.478872,3.613255,0.50819,0.998537,tp,planck_z
1,171.923132,71.105538,19.0,1.134615,3.556877,0.430976,0.922336,tp,planck_z
2,178.983605,73.566323,15.0,1.070389,3.143234,0.403708,0.760757,tp,planck_z
3,181.797649,71.68696,14.0,0.921796,3.343648,0.482473,0.81331,tp,planck_z
4,284.353067,74.919484,11.0,1.08144,3.332216,0.425504,0.868978,tp,planck_z
5,275.878223,78.388854,17.0,0.978495,3.508872,0.434119,0.984679,tp,planck_z
8,208.761411,77.263916,17.0,1.015839,3.472759,0.465352,0.990765,tp,planck_z
10,199.954068,70.0167,13.0,1.035318,3.262703,0.417573,0.812428,tp,planck_z
12,195.837588,67.458089,16.0,1.151331,3.340828,0.409737,0.939598,tp,planck_z
13,194.572598,65.373192,16.0,1.52936,3.576284,0.486278,0.973751,tp,planck_z


In [184]:
stat = detect_clusters_new(all_dict, 0.4, ret_coords=False)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_with_indexer(indexer, value)


In [185]:
stat

{'tp': 29, 'fp': 9, 'tn': 15, 'fn': 11}