This code is to check how much of Marko's and Hollis's catalogs match with my catalog of potential neighbors around the 9 spectroscopically confirmed QGs. The reason for this code is the fact that there aren't many matches between my catalog and that of Rafael, which is compiled by CIGALE based on Olivier's catalog. A lot of names to keep track of, but they're all related somehow.

(Binh, Sep 25, 2024)

In [2]:
# this sets up basic packages
import numpy as np
import pandas as pd
import astropy.units as u
import astropy.cosmology.units as cu

# this sets up matplotlib
import matplotlib.pyplot as plt
%matplotlib inline

# this sets up astropy
from astropy.io import fits
from astropy.wcs import WCS
from astropy.wcs.utils import pixel_to_skycoord
from astropy.utils.data import get_pkg_data_filename
from astropy.coordinates import SkyCoord, Angle, match_coordinates_sky, Distance
from astropy.table import Table, unique

from regions import Regions, CircleSkyRegion

First, let's load in my catalog.

In [4]:
my_catalog = pd.read_csv('potential_neighbors.txt', sep='\t')

## CHECK 1

### Part 1: Marko's catalog vs. mine

In [7]:
marko = fits.open('/Users/ngbinh/Downloads/check_data/COSMOSWeb_master_v3.1.0_assoc_cold+hot_sersic_cgs_err-calib.fits')
marko_data = Table(marko[1].data)

In [8]:
### matching coords
marko_objects = SkyCoord(ra=marko_data['RA_DETEC'].data*u.degree, dec=marko_data['DEC_DETEC'].data*u.degree)
my_objects = SkyCoord(ra=my_catalog['ra'].values*u.degree, dec=my_catalog['dec'].values*u.degree)

# use search_around_sky to find matching indices (matching in RA and Dec) in each catalog
idx_marko, idx_mine, d2d, d3d = my_objects.search_around_sky(marko_objects, 1*u.arcsec)

In [9]:
# there are going to be cases where one set of coords in my catalog is matched to many sets of coords in Marko's catalog.
# in this part, we're going to find those cases and just select the set of coords with the minimum separation.
my_unique_ids = np.unique(idx_mine)
marko_matches = []
my_matches_with_marko = []
for match in my_unique_ids:
    separations = d2d[np.where(idx_mine == match)]
    if np.size(separations) > 1:
        # this adds the id of the object from Marko's catalog with the minimum separation from my set of coords
        marko_matches.append(idx_marko[np.where(d2d == np.min(separations))][0])
        my_matches_with_marko.append(idx_mine[np.where(d2d == np.min(separations))][0])
    else:
        # this just adds the id of the object from Marko's catalog if there's only one match
        marko_matches.append(idx_marko[np.where(idx_mine == match)][0])
        my_matches_with_marko.append(idx_mine[np.where(idx_mine == match)][0])

In [10]:
print("The number of unique matches with Marko's catalog is", np.size(marko_matches), 'out of', np.size(my_catalog['id'].values))

The number of unique matches with Marko's catalog is 139 out of 155


In [11]:
# these are the objects (rows) in my catalog that aren't in Marko's catalog
catalog_not_in_marko = pd.concat([my_catalog, my_catalog.loc[my_matches_with_marko]]).drop_duplicates(keep=False)
catalog_not_in_marko

Unnamed: 0,id,ra,dec
14,780511,150.059862,2.377896
15,780592,150.060254,2.37831
16,780808,150.060603,2.379956
20,780995,150.060525,2.381155
21,781053,150.061446,2.381259
34,782969,150.112951,2.377312
37,783202,150.112071,2.379218
88,784663,150.087636,2.398937
95,642465,150.43679,2.466868
96,642503,150.436177,2.467482


### Part 2: Marko's catalog vs. Olivier's

In [13]:
# this reads in the file and makes sure the names match the columns
olivier_data = pd.read_csv('/Users/ngbinh/Downloads/check_data/photoz_BC03_v3.1.0_cosmosweb.out', sep=' ').shift(1, axis=1)

In [14]:
olivier_marko_matches, olivier_idx, marko_idx = np.intersect1d(olivier_data['Id'].values, 
                                                                    np.array(marko_matches, dtype='object'), return_indices=True)

In [15]:
# this finds matches between Olivier's catalog and Marko's based on the IDs
olivier_matching_rows = olivier_data[olivier_data['Id'].isin(olivier_marko_matches)]

In [16]:
### Based on the README_flagging TXT file, count/categorize the Olivier-Marko matches based on the flags Olivier gave them.
olivier_flags = olivier_matching_rows['warn_fl'].values

for flag_value in np.unique(olivier_flags):
    flag_no = np.size(np.where(olivier_flags==flag_value))
    print(flag_no, 'objects are flagged', flag_value)

111 objects are flagged 0
25 objects are flagged 1
1 objects are flagged 3


In [17]:
# this retrieves all the objects flagged 1
flagged_ones = olivier_matching_rows[olivier_matching_rows['warn_fl']==1]['Id'].values

In [53]:
olivier_matching_rows[olivier_matching_rows['warn_fl']==1]

Unnamed: 0,#,Id,alpha,delta,zfinal,type,warn_fl,zPDF,zPDF_l68,zPDF_u68,zChi2,chi2_best,NbFilt,zp_AGN,chi2_agn,mod_agn,mod_star,chi_star
642709,,642709,150.05962,2.37923,-99.0,0,1,1.627,1.62,1.6431,1.629,521.811,31,2.4924,549.813,20.0,254.0,738.898
642860,,642860,150.06119,2.37984,-99.0,0,1,0.0917,0.03,0.2014,2.432,72.7762,31,0.0752,79.6577,3.0,196.0,85.4632
642890,,642890,150.06118,2.38007,-99.0,0,1,3.5078,1.6031,3.5986,1.636,197.194,31,2.795,222.187,20.0,155.0,268.521
643084,,643084,150.0591,2.38198,-99.0,0,1,0.9897,0.98,0.9986,0.9896,663.266,30,0.0,783.446,3.0,393.0,795.987
645848,,645848,150.08713,2.39217,-99.0,0,1,0.9937,0.9709,1.0,0.9953,324.266,32,0.0,426.227,3.0,427.0,396.39
645908,,645908,150.08484,2.39333,-99.0,0,1,0.9932,0.9533,1.0175,0.9973,46.2117,32,0.1209,66.802,3.0,200.0,127.29
646089,,646089,150.08726,2.39377,-99.0,0,1,0.9853,0.98,2.4897,0.9853,366.263,32,0.0,454.712,4.0,414.0,482.441
646139,,646139,150.08696,2.39423,-99.0,0,1,2.411,1.6375,2.4416,2.434,158.312,32,0.0658,193.753,4.0,198.0,226.398
646168,,646168,150.08713,2.39441,-99.0,0,1,0.9672,0.8752,1.6178,1.635,119.544,32,0.1321,147.523,3.0,361.0,174.851
646215,,646215,150.08469,2.3956,-99.0,0,1,0.6965,0.5148,1.6538,2.455,81.8831,32,0.1234,100.122,3.0,370.0,113.701


In [18]:
# this finds the intersecting elements between flagged_ones and the array of matching IDs from earlier,
# which will allow us to trace the rows in my catalog that correspond to the flagged objects
flags, flagged_idx, marko_idx_narrowed = np.intersect1d(flagged_ones, olivier_marko_matches, return_indices=True)

In [19]:
twenty_five_lephare = my_catalog.iloc[np.array(my_matches_with_marko, dtype='object')[marko_idx[marko_idx_narrowed]]]

### Discussion
Based on the README_flagging.txt file, this means that of Marko's <b>139</b> objects that are matchable to my catalog:
- <b>111</b> objects look fine, according to Olivier (flag 0)
- <b>25</b> objects have an offset between ground and space, according to Olivier (flag 1)
- <b>1</b> object has hot pixels, i.e. it has a radius below 1.e-5, according to Olivier (flag 3)

Additionally, there are <b>2</b> objects that are NOT flagged by Olivier at all because they simply just don't show up in Olivier's catalog.

## CHECK 2
### Hollis's catalog with mine

In [22]:
hollis = fits.open('/Users/ngbinh/Downloads/check_data/COSMOS-Web_hot+cold_aperture_catalog_v1.3.fits')
hollis_data = Table(hollis[1].data)

In [23]:
### matching coords
hollis_objects = SkyCoord(ra=hollis_data['ra'].data*u.degree, dec=hollis_data['dec'].data*u.degree)

# use search_around_sky to find matching indices (matching in RA and Dec) in each catalog
idx_hollis, idx_mine2, d2d2, d3d2 = my_objects.search_around_sky(hollis_objects, 1*u.arcsec)

In [24]:
# there are going to be cases where one set of coords in my catalog is matched to many sets of coords in Hollis's catalog.
# in this part, we're going to find those cases and just select the set of coords with the minimum separation.
my_unique_ids_2 = np.unique(idx_mine2)
hollis_matches = []
my_matches_with_hollis = []
for match in my_unique_ids_2:
    separations = d2d2[np.where(idx_mine2 == match)]
    if np.size(separations) > 1:
        # this adds the id of the object from Hollis's catalog with the minimum separation from my set of coords
        hollis_matches.append(idx_hollis[np.where(d2d2 == np.min(separations))][0])
        my_matches_with_hollis.append(idx_mine2[np.where(d2d2 == np.min(separations))][0])
    else:
        # this just adds the id of the object from Hollis's catalog if there's only one match
        hollis_matches.append(idx_hollis[np.where(idx_mine2 == match)][0])
        my_matches_with_hollis.append(idx_mine2[np.where(idx_mine2 == match)][0])

In [25]:
print("The number of unique matches with Hollis's catalog is", np.size(hollis_matches), 'out of', np.size(my_catalog['id'].values))

The number of unique matches with Hollis's catalog is 139 out of 155


In [26]:
# these are the objects (rows) in my catalog that aren't in Hollis's catalog
catalog_not_in_hollis = pd.concat([my_catalog, my_catalog.loc[my_matches_with_hollis]]).drop_duplicates(keep=False)
catalog_not_in_hollis

Unnamed: 0,id,ra,dec
14,780511,150.059862,2.377896
15,780592,150.060254,2.37831
16,780808,150.060603,2.379956
20,780995,150.060525,2.381155
21,781053,150.061446,2.381259
34,782969,150.112951,2.377312
37,783202,150.112071,2.379218
88,784663,150.087636,2.398937
95,642465,150.43679,2.466868
96,642503,150.436177,2.467482


## CONCLUSION
There are <b>16</b> objects in my catalog of potential neighbors that are NOT in either Marko's or Hollis's catalog.

In [28]:
# Save the basic info of the 16 outliers
catalog_not_in_hollis.to_csv('16_outliers.txt', sep='\t', index=False)

In [29]:
# Save the basic info of the 25 LePhare objects
twenty_five_lephare.to_csv('25_flag1.txt', sep='\t', index=False)