In [1]:
## imports
# astronomy
from astropy.io import fits
from astropy.io import ascii
from astropy.table import Table
from astropy.coordinates import SkyCoord
import astropy.units as u
# plotting
import matplotlib.pyplot as plt
# data 
import numpy as np
import pandas as pd 

# 1. Read Data

In [2]:
# file paths 
path_catalogs = 'C:\\Users\\polar\\OneDrive - The University of Kansas\\AGNerds\\Catalogs'

In [3]:
# open tphot
tphot_data = ascii.read(path_catalogs+'\\tphot.cat')
tphot_cols = tphot_data.colnames

# # show table
# tphot_data.show_in_notebook()
print(tphot_cols)

['field', 'ra', 'dec', 'f560w_uJy', 'f560w_uJy_err', 'f770w_uJy', 'f770w_uJy_err', 'f1000w_uJy', 'f1000w_uJy_err', 'f1280w_uJy', 'f1280w_uJy_err', 'f1500w_uJy', 'f1500w_uJy_err', 'f1800w_uJy', 'f1800w_uJy_err', 'f2100w_uJy', 'f2100w_uJy_err']


In [34]:
# open EGS F22
egs_f22_data = Table.read(path_catalogs+'\\EGS_F22.fits') # ??? something is wrong with the dimentsions... TODO just build new table manually 
egs_f22_cols = egs_f22_data.columns

# # print all columns
# for col in egs_f22_cols:
#     print(col)

In [37]:
# print(egs_f22_data)
# print('\n\n')
# print(egs_f22_data[0])

In [5]:
# open EGS FIR
egs_fir_data = Table.read(path_catalogs+'\\egs_FIR_photometry_catalog.fits')
egs_fir_cols = egs_fir_data.columns

# # print all columns
# for col in egs_fir_cols:
#     print(col)

# 2. Match EGS catalogs using RA/DEC

In [6]:
# Get RA/DEC
egs_f22_RA  = np.array(egs_f22_data['RA']  )[0]
egs_f22_DEC = np.array(egs_f22_data['DEC'] )[0]
egs_fir_RA  = np.array(egs_fir_data['RA']  )
egs_fir_DEC = np.array(egs_fir_data['Decl'])

In [7]:
# get coordinants 
egs_f22_coord = SkyCoord(ra=egs_f22_RA*u.deg, dec=egs_f22_DEC*u.deg)
egs_fir_coord = SkyCoord(ra=egs_fir_RA*u.deg, dec=egs_fir_DEC*u.deg)

In [8]:
print('Number of EGS f22 sources:\t', len(egs_f22_RA))
print('Number of EGS fir sources:\t', len(egs_fir_RA))

Number of EGS f22 sources:	 66783
Number of EGS fir sources:	 41656


In [9]:
# idx are indices into catalog that are the closest objects to each of the coordinates in c, 
# d2d are the on-sky distances between them, and 
# d3d are the 3-dimensional distances. 
idx, d2d, d3d = egs_fir_coord.match_to_catalog_sky(egs_f22_coord) # idx, d2d, d3d = c.match_to_catalog_sky(catalog)

# separation constraint
egs_max_sep = 1.0 * u.arcsec
egs_sep_constraint = d2d < egs_max_sep  # use on 'c' (egs_fir_coord)
egs_idx_sep = idx[egs_sep_constraint]   # use on 'catalog' (egs_f22_coord)

# get matches
egs_f22_coord_matches = egs_f22_coord[egs_idx_sep]
egs_fir_coord_matches = egs_fir_coord[egs_sep_constraint]

# print length  
print('Number of matches:\t', len(egs_f22_coord_matches))
print('Number of matches:\t', len(egs_fir_coord_matches))

Number of matches:	 40850
Number of matches:	 40850


In [10]:
# test match
i=4
print(egs_f22_coord_matches[i])
print(egs_fir_coord_matches[i])

<SkyCoord (ICRS): (ra, dec) in deg
    (215.278675, 53.035885)>
<SkyCoord (ICRS): (ra, dec) in deg
    (215.27872, 53.03587)>


In [11]:
# TEST IF THERE ARE DUPLICATES BETWEEN CATALOGS

# make mask of unique soruces 
mask = np.zeros(len(egs_idx_sep), dtype=bool)
mask[np.unique(egs_idx_sep, return_index=True)[1]] = True

# get value of duplicates sources 
duplicates = np.unique(egs_idx_sep[~mask])

# set all non-unique sources to False 
for dup in duplicates :
    mask[np.where(egs_idx_sep == dup)] = False

# apply mask to get unique and duplicate sources
idx_sep_unique = egs_idx_sep[mask]
idx_sep_duplicates = egs_idx_sep[~mask]

# print info
print('Number of unique:\t',        len(idx_sep_unique))
print('Number of duplicates:\t',    len(idx_sep_duplicates))    # ??? TODO just exclude these sorces...(keep list of them though)
# print('Duplicate sources:\n',       idx_sep_duplicates)

Number of unique:	 40571
Number of duplicates:	 279


# 3. Match tphot to EGS f22 using RA/DEC

In [12]:
## get RA and DEC from catalogs

# get RA and DEC from tphot and make array
tphot_RA  = np.array(tphot_data['ra'])
tphot_DEC = np.array(tphot_data['dec'])
# get RA and DEC from egs_f22 
egs_f22_RA  = np.array(egs_f22_data['RA'] )[0][egs_idx_sep]
egs_f22_DEC = np.array(egs_f22_data['DEC'])[0][egs_idx_sep]

# get coordinants 
tphot_coord = SkyCoord(ra=tphot_RA*u.deg, dec=tphot_DEC*u.deg)
egs_f22_coord = SkyCoord(ra=egs_f22_RA*u.deg, dec=egs_f22_DEC*u.deg)

In [13]:
print('Number of tphot sources:\t', len(tphot_RA))
print('Number of egs_f22 sources:\t', len(egs_f22_RA))

Number of tphot sources:	 1734
Number of egs_f22 sources:	 40850


In [14]:
## match RA and DEC between catalogs
# DOC: https://docs.astropy.org/en/stable/coordinates/matchsep.html

# idx are indices into catalog that are the closest objects to each of the coordinates in c, 
# d2d are the on-sky distances between them, and 
# d3d are the 3-dimensional distances. 
idx, d2d, d3d = egs_f22_coord.match_to_catalog_sky(tphot_coord) # idx, d2d, d3d = c.match_to_catalog_sky(catalog)

# separation constraint
max_sep = 1.0 * u.arcsec
# max_sep = 0.5 * u.arcsec
sep_constraint = d2d < max_sep  # use on 'c' (egs_f22_coord)
idx_sep = idx[sep_constraint]   # use on 'catalog' (tphot)

# get matches
egs_f22_coord_matches = egs_f22_coord[sep_constraint]
tphot_coord_matches = tphot_coord[idx_sep]

# print length  
print('Number of matches:\t', len(egs_f22_coord_matches))
print('Number of matches:\t', len(tphot_coord_matches))

Number of matches:	 1659
Number of matches:	 1659


In [15]:
# test match
i=12
print(egs_f22_coord_matches[i])
print(tphot_coord_matches[i])

<SkyCoord (ICRS): (ra, dec) in deg
    (215.054121, 52.898698)>
<SkyCoord (ICRS): (ra, dec) in deg
    (215.054131, 52.89869)>


# 4. Find Duplicate RA/DEC Matches

In [16]:
# make mask of unique soruces 
mask = np.zeros(len(idx_sep), dtype=bool)
mask[np.unique(idx_sep, return_index=True)[1]] = True

# get value of duplicates sources 
duplicates = np.unique(idx_sep[~mask])

# set all non-unique sources to False 
for dup in duplicates :
    mask[np.where(idx_sep == dup)] = False

# apply mask to get unique and duplicate sources
idx_sep_unique = idx_sep[mask]
idx_sep_duplicates = idx_sep[~mask]

# print info
print('Number of unique:\t',        len(idx_sep_unique))
print('Number of duplicates:\t',    len(idx_sep_duplicates))
# print('Duplicate sources:\n',       idx_sep_duplicates)

Number of unique:	 1608
Number of duplicates:	 51


In [17]:
# apply mask to egs sources
egs_f22_i    = np.where(sep_constraint)[0]
egs_f22_i_unique = egs_f22_i[mask]
egs_f22_i_duplicates = egs_f22_i[~mask]

# print info
print('Number of unique:\t', len(egs_f22_i_unique))
print('Number of duplicates:\t', len(egs_f22_i_duplicates))
# print('Duplicates:\n', egs_f22_i_duplicates)

Number of unique:	 1608
Number of duplicates:	 51


In [18]:
# get values 
egs_f22_coord_unique = egs_f22_coord[egs_f22_i_unique]
tphot_coord_unique = tphot_coord[idx_sep_unique]

# test match
i=1
print(egs_f22_coord_unique[i])
print(tphot_coord_unique[i])

<SkyCoord (ICRS): (ra, dec) in deg
    (215.061586, 52.901517)>
<SkyCoord (ICRS): (ra, dec) in deg
    (215.061606, 52.901508)>


In [19]:
# get values 
egs_f22_coord_duplicates = egs_f22_coord[egs_f22_i_duplicates]
tphot_coord_duplicates = tphot_coord[idx_sep_duplicates]

# test match
i=3
print(egs_f22_coord_duplicates[i])
print(tphot_coord_duplicates[i])

<SkyCoord (ICRS): (ra, dec) in deg
    (215.015158, 52.9125)>
<SkyCoord (ICRS): (ra, dec) in deg
    (215.015239, 52.912741)>


# 5. Build Catalog

In [20]:
# helper indexing
tphot_i     = idx_sep_unique
egs_f22_i   = egs_idx_sep[egs_f22_i_unique] 
egs_fir_i   = egs_sep_constraint[egs_f22_i_unique]

# verify that all lengths match
print(len(tphot_i))
print(len(egs_f22_i))
print(len(egs_fir_i))

# save number of matches 
n_matches = len(tphot_i)

1608
1608
1608


In [21]:
print(type(tphot_data))
print(type(egs_f22_data))
print(type(egs_fir_data))

<class 'astropy.table.table.Table'>
<class 'astropy.table.table.Table'>
<class 'astropy.table.table.Table'>


In [22]:
# convert astropy table to pandas dataframe
tphot_df   = tphot_data.to_pandas()
egs_f22_df = egs_f22_data.to_pandas() # ??? busted... TODO build table by processing columns individually 
egs_fir_df = egs_fir_data.to_pandas()

# setup table with column names 
# populate table row by row


ValueError: Cannot convert a table with multidimensional columns to a pandas DataFrame. Offending columns are: ['ID', 'RA', 'DEC', 'CANDELS_RA', 'CANDELS_DEC', 'F606W', 'F814W', 'F105W', 'F125W', 'F140W', 'F160W', 'F36', 'F45', 'DF606W', 'DF814W', 'DF105W', 'DF125W', 'DF140W', 'DF160W', 'DF36', 'DF45', 'ZA', 'ZL68', 'ZU68', 'ZPEAK', 'ZA_NOIRAC', 'ZL68_NOIRAC', 'ZU68_NOIRAC', 'ZPEAK_NOIRAC']
One can filter out such columns using:
names = [name for name in tbl.colnames if len(tbl[name].shape) <= 1]
tbl[names].to_pandas(...)