In [1]:
import pandas as pd
import math
import numpy as np
from astropy.coordinates import SkyCoord
from astropy.io import fits
import healpy as hp
import matplotlib.pyplot as plt
import time
import sys
from pixell import enmap, enplot, reproject, utils, curvedsky, wcsutils
from matplotlib import cm
from scipy.optimize import curve_fit
from scipy.linalg import sqrtm
from tqdm import tqdm

In [2]:
amf = pd.read_csv('data/dr9_main_complete.csv', header=None)
amf.columns = ['amf_no','amf_ra','amf_dec','amf_z','amf_lk','amf_rh','amf_r200','amf_rc','amf_c','amf_str_rh','bax_id','mcxc_id','red_id','whl_id']

c = SkyCoord(amf['amf_ra'],amf['amf_dec'],frame='icrs',unit='deg')
amf['glat'] = c.galactic.b.degree
amf['glon'] = c.galactic.l.degree
amf['cat'] = 0
amf_whl = amf[(amf.whl_id!='-9999')]

In [3]:
amf

Unnamed: 0,amf_no,amf_ra,amf_dec,amf_z,amf_lk,amf_rh,amf_r200,amf_rc,amf_c,amf_str_rh,bax_id,mcxc_id,red_id,whl_id,glat,glon,cat
0,4,23.9128,20.7465,0.0601,176.9125,219.3663,1.946,0.925,2.104,73.0001,-9999,-9999,-9999,-9999,-40.943468,136.662636,0
1,5,260.6324,32.1398,0.2252,222.1219,200.7117,1.784,0.625,2.856,35.0001,ABELL2261,J1722.4+3208,RMJ172227.2+320757.2,J172227.2+320757,31.846666,55.602221,0
2,6,197.8796,-1.3356,0.2042,171.6872,192.0829,1.773,0.227,7.792,9.0001,ABELL1689,J1311.5-0120,RMJ131129.5-012028.0,J131129.5-012028,61.128514,313.370385,0
3,7,250.1485,46.6917,0.2248,225.9874,182.3373,1.725,0.495,3.483,36.0001,ABELL2219,J1640.3+4642,RMJ164019.8+464241.5,J164019.8+464242,41.429880,72.599428,0
4,8,346.3402,21.0378,0.1453,121.0124,182.2769,1.775,0.440,4.034,72.0001,-9999,-9999,RMJ230520.1+210313.5,J230520.1+210313,-35.420653,92.175825,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
46474,54273,321.0614,-5.5885,0.4036,7.3319,20.0008,0.734,0.260,2.823,85.1216,-9999,-9999,-9999,J212412.8-053647,-36.312474,46.861993,0
46475,54274,161.7603,-2.9238,0.2064,11.9584,20.0006,0.790,0.090,8.781,9.1111,-9999,-9999,-9999,J104700.1-025524,47.546325,253.093638,0
46476,54275,324.7319,15.5898,0.1964,8.6750,20.0005,0.793,0.180,4.404,73.0886,-9999,-9999,-9999,J213855.7+153523,-26.720882,69.515189,0
46477,54276,132.8109,1.8870,0.2697,12.2539,20.0002,0.772,0.260,2.971,12.1012,-9999,-9999,-9999,J085052.3+015316,27.329967,225.825134,0


In [5]:
hdul = fits.open("data/galaxy_clusters_desdr2.fits")
data = hdul[1].data
zou1 = pd.DataFrame({'ra':data['RA_PEAK'],
                    'dec':data['DEC_PEAK'],
                    'z':data['PHOTO_Z_PEAK'],
                    'rh':data['RICHNESS']})
c = SkyCoord(zou1['ra'],zou1['dec'],frame='icrs',unit='deg')
zou1['glat'] = c.galactic.b.degree
zou1['glon'] = c.galactic.l.degree
zou1['cat'] = 1

In [6]:
hdul = fits.open("data/galaxy_clusters_desidr9.fits")
data = hdul[1].data
zou2 = pd.DataFrame({'ra':data['RA_PEAK'],
                    'dec':data['DEC_PEAK'],
                    'z':data['PHOTO_Z_PEAK'],
                    'rh':data['RICHNESS']})
c = SkyCoord(zou2['ra'],zou2['dec'],frame='icrs',unit='deg')
zou2['glat'] = c.galactic.b.degree
zou2['glon'] = c.galactic.l.degree
zou2['cat'] = 2

In [7]:
hdul = fits.open("data/galaxy_clusters_hscpdr3_wide.fits")
data = hdul[1].data
zou3 = pd.DataFrame({'ra':data['RA_PEAK'],
                    'dec':data['DEC_PEAK'],
                    'z':data['PHOTO_Z_PEAK'],
                    'rh':data['RICHNESS']})
c = SkyCoord(zou3['ra'],zou3['dec'],frame='icrs',unit='deg')
zou3['glat'] = c.galactic.b.degree
zou3['glon'] = c.galactic.l.degree
zou3['cat'] = 3

In [8]:
zou = pd.concat([zou1,zou2,zou3])

In [9]:
# Mass in 10^14
wh22 = pd.read_csv('data/cluster_DESunWISE.dat', sep='\s+', header=None, usecols=[3,4,5,10,11])
wh22.columns = ['ra','dec','z','rh','m']
c = SkyCoord(wh22['ra'],wh22['dec'],frame='icrs',unit='deg')
wh22['glat'] = c.galactic.b.degree
wh22['glon'] = c.galactic.l.degree
wh22['cat'] = 4

In [10]:
# dcl.z = np.array(dcl.z).byteswap().newbyteorder()

In [11]:
amf0 = amf.rename(columns={'amf_ra':'amf_ra','amf_dec':'amf_dec','amf_z':'z','amf_rh':'rh'})[['amf_ra','amf_dec','z','rh','cat']]
zou0 = zou.rename(columns={'ra':'amf_ra','dec':'amf_dec'})[['amf_ra','amf_dec','z','rh','cat']]
wh220 = wh22.rename(columns={'ra':'amf_ra','dec':'amf_dec'})[['amf_ra','amf_dec','z','rh','cat']]

union = pd.concat([amf0,zou0,wh220])#.to_csv('../../data/union.csv')

In [12]:
union

Unnamed: 0,amf_ra,amf_dec,z,rh,cat
0,23.91280,20.74650,0.0601,219.3663,0
1,260.63240,32.13980,0.2252,200.7117,0
2,197.87960,-1.33560,0.2042,192.0829,0
3,250.14850,46.69170,0.2248,182.3373,0
4,346.34020,21.03780,0.1453,182.2769,0
...,...,...,...,...,...
151239,359.98911,-6.40920,1.0678,68.5000,4
151240,359.99091,-44.66209,0.7684,16.3400,4
151241,359.99377,-49.09889,0.6708,21.8500,4
151242,359.99503,-37.71075,0.6743,45.1500,4


In [121]:
import pandas as pd
import numpy as np
from scipy.spatial import KDTree

# Assuming you already have a DataFrame named 'df' with columns 'x' and 'y'
df = union.copy()
coords = df[['amf_ra', 'amf_dec','z']].values

# Create a k-d tree from the coordinates
tree = KDTree(coords)

# Query the k-d tree for nearest neighbors within 0.5 distance
# Note that each point will find itself as the nearest neighbor with distance 0
distances, indices = tree.query(coords, k=2, distance_upper_bound=1e-2)

# Check if there's any point within 0.5 distance for each row, excluding the point itself (hence k=2)
# We can do this by checking if the second smallest distance is <= 0.5
df['Match'] = distances[:, 1] <= 1e-2

In [130]:
df[df['Match']].round(1).drop_duplicates(subset=['amf_ra','amf_dec','z'])

Unnamed: 0,amf_ra,amf_dec,z,rh,cat,Match
22,347.1,-2.2,0.3,149.3,0,True
44,326.5,20.5,0.3,133.4,0,True
58,341.4,28.1,0.4,128.8,0,True
62,192.4,49.9,0.3,126.5,0,True
63,16.2,0.1,0.3,126.5,0,True
...,...,...,...,...,...,...
150554,359.1,2.4,0.4,25.5,4,True
150617,359.2,2.5,0.4,16.9,4,True
150635,359.3,-42.2,0.9,44.9,4,True
150680,359.3,-0.5,1.0,44.8,4,True


In [125]:
deswh = union[(union.cat.isin([1,4]))]

import pandas as pd
import numpy as np
from scipy.spatial import KDTree

# Assuming you already have a DataFrame named 'deswh' with columns 'x' and 'y'
coords = deswh[['amf_ra', 'amf_dec','z']].values

# Create a k-d tree from the coordinates
tree = KDTree(coords)

# Query the k-d tree for nearest neighbors within 0.5 distance
# Note that each point will find itself as the nearest neighbor with distance 0
distances, indices = tree.query(coords, k=2, distance_upper_bound=1e-2)

# Check if there's any point within 0.5 distance for each row, excluding the point itself (hence k=2)
# We can do this by checking if the second smallest distance is <= 0.5
deswh['Match'] = distances[:, 1] <= 1e-2

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  deswh['Match'] = distances[:, 1] <= 1e-2


In [131]:
deswh[deswh['Match']].round(1).drop_duplicates(subset=['amf_ra','amf_dec','z'])

Unnamed: 0,amf_ra,amf_dec,z,rh,cat,Match
3,0.8,5.3,0.2,112.9,1,True
44,12.7,4.9,0.5,127.8,1,True
97,26.6,4.9,0.7,134.4,1,True
132,32.5,5.0,0.6,151.1,1,True
141,35.4,5.1,0.8,132.8,1,True
...,...,...,...,...,...,...
145143,350.4,0.6,0.8,24.9,4,True
148011,355.2,-39.8,0.7,48.4,4,True
148139,355.4,-43.3,0.2,16.0,4,True
150516,359.1,-1.5,0.6,18.8,4,True


In [127]:
hscwh = union[(union.cat.isin([3,4]))]

import pandas as pd
import numpy as np
from scipy.spatial import KDTree

# Assuming you already have a DataFrame named 'hscwh' with columns 'x' and 'y'
coords = hscwh[['amf_ra', 'amf_dec','z']].values

# Create a k-d tree from the coordinates
tree = KDTree(coords)

# Query the k-d tree for nearest neighbors within 0.5 distance
# Note that each point will find itself as the nearest neighbor with distance 0
distances, indices = tree.query(coords, k=2, distance_upper_bound=1e-2)

# Check if there's any point within 0.5 distance for each row, excluding the point itself (hence k=2)
# We can do this by checking if the second smallest distance is <= 0.5
hscwh['Match'] = distances[:, 1] <= 1e-2

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  hscwh['Match'] = distances[:, 1] <= 1e-2


In [133]:
hscwh[hscwh['Match']].round(1).drop_duplicates(subset=['amf_ra','amf_dec','z'])

Unnamed: 0,amf_ra,amf_dec,z,rh,cat,Match
9012,15.7,2.6,0.5,188.7,3,True
9051,21.2,2.5,0.2,96.1,3,True
9101,32.2,3.0,0.4,55.6,3,True
9108,34.0,2.8,0.1,38.4,3,True
9171,38.9,2.9,0.9,135.0,3,True
...,...,...,...,...,...,...
58000,38.9,-1.9,0.7,49.0,4,True
136799,336.3,-0.4,0.8,41.1,4,True
137128,336.8,1.0,0.9,26.6,4,True
140503,342.6,0.8,0.2,17.4,4,True
