In [2]:
## imports
# astronomy
from astropy.io import fits
from astropy.io import ascii
from astropy.table import Table
from astropy.coordinates import SkyCoord
import astropy.units as u
# plotting
import matplotlib.pyplot as plt
# data 
import numpy as np
import pandas as pd 

# Read Data

In [3]:
# file paths 
path_catalogs = 'D:\\AstroCatalogs\\AGNerds'

In [4]:
# open tphot
tphot_data = ascii.read(path_catalogs+'\\tphot.cat')
tphot_cols = tphot_data.colnames

# show table
tphot_data.show_in_notebook()
print(tphot_cols)

['field', 'ra', 'dec', 'f560w_uJy', 'f560w_uJy_err', 'f770w_uJy', 'f770w_uJy_err', 'f1000w_uJy', 'f1000w_uJy_err', 'f1280w_uJy', 'f1280w_uJy_err', 'f1500w_uJy', 'f1500w_uJy_err', 'f1800w_uJy', 'f1800w_uJy_err', 'f2100w_uJy', 'f2100w_uJy_err']


In [5]:
# open EGS F22
egs_f22_data = Table.read(path_catalogs+'\\EGS_F22.fits')
egs_f22_cols = egs_f22_data.columns

# print all columns
for col in egs_f22_cols:
    print(col)

ID
RA
DEC
CANDELS_RA
CANDELS_DEC
F606W
F814W
F105W
F125W
F140W
F160W
F36
F45
DF606W
DF814W
DF105W
DF125W
DF140W
DF160W
DF36
DF45
ZA
ZL68
ZU68
ZPEAK
ZA_NOIRAC
ZL68_NOIRAC
ZU68_NOIRAC
ZPEAK_NOIRAC


In [6]:
# open EGS FIR
egs_fir_data = Table.read(path_catalogs+'\\egs_FIR_photometry_catalog.fits')
egs_fir_cols = egs_fir_data.columns

# print all columns
for col in egs_fir_cols:
    print(col)

ID_iau
ID
RA
Decl
zphot
zp_l68
zp_u68
zspec
Mstar
eMstar
SNR_IR
z_IR
ez_IR
SFR_IR
eSFR_IR
FK
dFK
Fch1
dFch1
Fch2
dFch2
Fch3
dFch3
Fch4
dFch4
F24
dF24
Q24
F100
dF100
Q100
F160
dF160
Q160
F250
dF250
F350
dF350
F450
dF450
F500
dF500
F850_sh
dF850_sh
F850_d
dF850_d
F850
dF850
F1100
dF1100
Q1100
F20cm
dF20cm
xfAGN
xeAGN
xfTOT
xeTOT
xf70
xe70
xf100
xe100
xf160
xe160
xf250
xe250
xf350
xe350
xf450
xe450
xf500
xe500
xf850
xe850
xf1100
xe1100
xf1200
xe1200
xf2000
xe2000
xf2250
xe2250
xf20cm
xe20cm
Ubest
chi2_min
S_chi2_min
R_chi2_min
Type_FIR
Type_SED
Type_AGN


# Functions

In [7]:
def RaDec_to_Coord(ra,dec):
    return( SkyCoord(ra=ra*u.deg, dec=dec*u.deg) )

In [8]:
def GetCoordMatches(coord_cat, coord_c, separation=1.0) : 
    # match c to catalog 
    #   idx are indices into catalog that are the closest objects to each of the coordinates in c, 
    #   d2d are the on-sky distances between them, and 
    #   d3d are the 3-dimensional distances. 
    idx, d2d, d3d = coord_c.match_to_catalog_sky(coord_cat) # idx, d2d, d3d = c.match_to_catalog_sky(catalog)

    # separation constraint
    max_sep = separation * u.arcsec
    sep_constraint = d2d < max_sep  # use on 'c'
    idx_sep = idx[sep_constraint]   # use on 'catalog'

    # Get array of indeces 
    coordCat_matches = idx_sep
    coordC_matches   = np.where(sep_constraint)[0]
    
    # return matched sources
    return(coordCat_matches,coordC_matches)

In [9]:
def TestCoord(c1,c2,i):
    print('Coordinant 1: ', c1[i]) 
    print('Coordinant 2: ', c2[i])

In [10]:
def GetDuplicatesMask(arr): 
    # make mask of unique soruces 
    mask = np.zeros(len(arr), dtype=bool)
    mask[np.unique(arr, return_index=True)[1]] = True
    # get value of duplicates sources 
    duplicates = np.unique(arr[~mask])
    # set all non-unique sources to False 
    for dup in duplicates :
        mask[np.where(arr == dup)] = False
    # return boolean mask of length(arr) that is True for a value that occurs more than once
    return(mask)

# Match EGS catalogs using RA/DEC

In [11]:
# Get EGS coordinants
egs_f22_coord = RaDec_to_Coord(np.array(egs_f22_data['RA'])[0], np.array(egs_f22_data['DEC'])[0])
egs_fir_coord = RaDec_to_Coord(np.array(egs_fir_data['RA']),    np.array(egs_fir_data['Decl'])  )

# print info
print('Number of EGS f22 sources:\t', len(egs_f22_coord))
print('Number of EGS fir sources:\t', len(egs_fir_coord))

Number of EGS f22 sources:	 66783
Number of EGS fir sources:	 41656


In [12]:
# match 
egs_f22_toFIR_match, egs_fir_toF22_match = GetCoordMatches(egs_f22_coord,egs_fir_coord) # array of indeces to full egs catalog

# get matches
egs_f22_coord_matches = egs_f22_coord[egs_f22_toFIR_match]
egs_fir_coord_matches = egs_fir_coord[egs_fir_toF22_match]

# print info 
print('Number of egsF22 matches:\t',  len(egs_f22_coord_matches))
print('Number of egsFIR matches:\t',  len(egs_fir_coord_matches))

Number of egsF22 matches:	 40850
Number of egsFIR matches:	 40850


In [13]:
TestCoord(egs_f22_coord_matches,egs_fir_coord_matches,1)

Coordinant 1:  <SkyCoord (ICRS): (ra, dec) in deg
    (215.267114, 53.027554)>
Coordinant 2:  <SkyCoord (ICRS): (ra, dec) in deg
    (215.26714, 53.02755)>


# Get Unique sources from EGS

In [14]:
# get mask of egs objects that occur more than once 
egs_duplicatesMask = GetDuplicatesMask(egs_f22_toFIR_match)

In [15]:
# apply mask to get unique and duplicate sources
egs_f22_unique_i     = egs_f22_toFIR_match[egs_duplicatesMask]
egs_f22_duplicates_i = egs_f22_toFIR_match[~egs_duplicatesMask]
# apply mask to get unique and duplicate sources
egs_fir_unique_i     = egs_fir_toF22_match[egs_duplicatesMask]
egs_fir_duplicates_i = egs_fir_toF22_match[~egs_duplicatesMask]

In [16]:
# print info
print('Number of unique:\t', len(egs_f22_unique_i))
print('Number of duplicates:\t', len(egs_f22_duplicates_i))

Number of unique:	 40571
Number of duplicates:	 279


# Match tphot to EGS f22 using RA/DEC

In [17]:
# get coordinants
tphot_coord = RaDec_to_Coord(np.array(tphot_data['ra']), np.array(tphot_data['dec']) )
egs_coord   = RaDec_to_Coord(np.array(egs_f22_data['RA'] )[0][egs_f22_unique_i], np.array(egs_f22_data['DEC'])[0][egs_f22_unique_i])

# print info 
print('Number of tphot sources:\t', len(tphot_coord))
print('Number of egs sources:\t\t',   len(egs_coord))

Number of tphot sources:	 1734
Number of egs sources:		 40571


In [18]:
# match 
tphot_toEGS_match, egs_toTphot_match = GetCoordMatches(tphot_coord,egs_coord) # array of indeces to full catalog 

# get matches
tphot_coord_matches = tphot_coord[tphot_toEGS_match]
egs_coord_matches = egs_coord[egs_toTphot_match]

# print length  
print('Number of tphot matches:\t', len(tphot_coord_matches))
print('Number of EGS matches:\t\t', len(egs_coord_matches))

Number of tphot matches:	 1655
Number of EGS matches:		 1655


In [19]:
TestCoord(tphot_coord_matches,egs_coord_matches,1)

Coordinant 1:  <SkyCoord (ICRS): (ra, dec) in deg
    (215.061606, 52.901508)>
Coordinant 2:  <SkyCoord (ICRS): (ra, dec) in deg
    (215.061586, 52.901517)>


# Find Duplicate RA/DEC Matches

In [20]:
# get mask of tphot objects that occur more than once 
tphot_duplicateMask = GetDuplicatesMask(tphot_toEGS_match) # mask for array of indeces to full catalog

In [21]:
# apply mask to get unique and duplicate sources
tphot_unique_i     = tphot_toEGS_match[tphot_duplicateMask]     # array of indeces to full catalog 
tphot_duplicates_i = tphot_toEGS_match[~tphot_duplicateMask]
# apply mask to get unique and duplicate sources
egs_unique_i       = egs_toTphot_match[tphot_duplicateMask]
egs_duplicates_i   = egs_toTphot_match[~tphot_duplicateMask]

In [22]:
# print info
print('Number of unique:\t', len(egs_unique_i))
print('Number of duplicates:\t', len(egs_duplicates_i))

Number of unique:	 1608
Number of duplicates:	 47


In [38]:
# get coord values 
i=1
tphot_coord_d = tphot_coord[tphot_duplicates_i]
egs_coord_d = egs_coord[egs_duplicates_i]
TestCoord(tphot_coord_d,egs_coord_d,i)

Coordinant 1:  <SkyCoord (ICRS): (ra, dec) in deg
    (215.011364, 52.905286)>
Coordinant 2:  <SkyCoord (ICRS): (ra, dec) in deg
    (215.011755, 52.905199)>


# Write marked duplicates coordinantes to file

In [37]:
# write all duplicate coordinantes to file 
with open('tphotCoord_egs_duplicate_galaxies.csv', 'w') as file:
    # get data
    field   = np.array(tphot_data['field'])[tphot_duplicates_i]
    ra_t    = np.array(tphot_data['ra'])[tphot_duplicates_i]
    dec_t   = np.array(tphot_data['dec'])[tphot_duplicates_i]
    ra_egs  = np.array(egs_f22_data['RA'][0])[egs_f22_unique_i][egs_duplicates_i]
    dec_egs = np.array(egs_f22_data['DEC'][0])[egs_f22_unique_i][egs_duplicates_i]
    # write column names
    file.write('Field,RA_tphot,Dec_tphot,RA_egs,Dec_egs\n')
    # write field and coordinates
    for i in range(len(field)):
        file.write(str(field[i])+','+str(ra_t[i])+','+str(dec_t[i])+','+str(ra_egs[i])+','+str(dec_egs[i])+'\n' )

# Build Catalog

In [26]:
# helper indexing --- yes I tested that these index arrays all align correctly (11/8/22)
tphot_i     = tphot_unique_i
egs_f22_i   = egs_f22_unique_i[egs_unique_i] 
egs_fir_i   = egs_fir_unique_i[egs_unique_i]

# verify that all lengths match
print(len(tphot_i))
print(len(egs_f22_i))
print(len(egs_fir_i))

# save number of matches 
n_matches = len(tphot_i)

1608
1608
1608


In [27]:
# convert astropy table to pandas dataframe (does not work for egs_f22_data, dimension issues...)
tphot_df   = tphot_data.to_pandas()
egs_fir_df = egs_fir_data.to_pandas()

In [28]:
# # Make egs f22 into a dataframe 
# get columns 
egs_f22_cols_DFPREP = []
for col in egs_f22_cols :
    egs_f22_cols_DFPREP.append(col)
# get data
egs_f22_data_DFPREP = []
for col in egs_f22_data[0]:
    egs_f22_data_DFPREP.append(col)
egs_f22_data_DFPREP = np.array(egs_f22_data_DFPREP).T # transpose to access rows
# make dataframe 
egs_f22_df = pd.DataFrame(data=egs_f22_data_DFPREP, columns=egs_f22_cols_DFPREP)

In [29]:
# rename columns
egs_f22_df = egs_f22_df.rename(columns={'ID':'ID_EGSF22'})
egs_fir_df = egs_fir_df.rename(columns={'ID':'ID_EGSFIR'})

In [30]:
# get matches soureces for dfs
tphot_df_matched   = tphot_df.iloc[tphot_i]
egs_f22_df_matched = egs_f22_df.iloc[egs_f22_i]
egs_fir_df_matched = egs_fir_df.iloc[egs_fir_i]

In [31]:
# concatenate tables horizontally
full_df = pd.concat(
        [tphot_df_matched.reset_index(drop=True), 
        egs_f22_df_matched.reset_index(drop=True), 
        egs_fir_df_matched.reset_index(drop=True)],
        axis=1,
    )

print(full_df.shape) # verify shape

(1608, 136)


In [32]:
# # print columns 
# for col in full_df.columns:
#     print(col)

In [33]:
wantedCols = [
    # ========================  tphot (all cols)
    'field',                    
    'ra', 
    'dec', 
    'f560w_uJy', 
    'f560w_uJy_err', 
    'f770w_uJy', 
    'f770w_uJy_err', 
    'f1000w_uJy', 
    'f1000w_uJy_err', 
    'f1280w_uJy', 
    'f1280w_uJy_err', 
    'f1500w_uJy', 
    'f1500w_uJy_err', 
    'f1800w_uJy', 
    'f1800w_uJy_err', 
    'f2100w_uJy', 
    'f2100w_uJy_err',
    # ========================  egs f22
    'ID_EGSF22',
    'F36',
    'F45',
    'ZA',
    # ========================  egs fir
    'ID_EGSFIR',
    'Mstar',        # lg(solMass)   S17			Stellar mass measurement from the median of the linear value of the stellar mass measurements obtained taking into account nebular emission contamination; in Chabrier IMF 
    'Fch1',		    # µJy			S17			Spitzer IRAC 3.6µm flux
    'dFch1',        # µJy			S17			Spitzer IRAC 3.6µm flux error
    'Fch2',		    # µJy			S17			Spitzer IRAC 4.5µm flux
    'dFch2',		# µJy			S17			Spitzer IRAC 4.5µm flux error
    'Fch3',		    # µJy			S17			Spitzer IRAC 5.8µm flux
    'dFch3',		# µJy			S17			Spitzer IRAC 5.8µm flux error
    'Fch4',		    # µJy			S17			Spitzer IRAC 8.0µm flux
    'dFch4',		# µJy			S17			Spitzer IRAC 8.0µm flux error
    'F24',  		# µJy			...			Super-deblended Spitzer/MIPS 24µm flux
    'dF24',		    # µJy			...			Super-deblended Spitzer/MIPS 24µm flux error
    'Q24',  		# ...			...			Fit quality	3= good (fitted with 0.5 source/beam prior cat); 2=fair (fitted with 1 source/beam prior cat, SNR<3); 1=poor (fitted with 2 source/beam prior cat, SNR<3)
    'F100',		    # mJy			...			Super-deblended Herschel/PACS 100µm flux
    'dF100',		# mJy			...			Super-deblended Herschel/PACS 100µm flux error
    'Q100',		    # ...			...			Fit quality	3= good (fitted with 0.5 source/beam prior cat); 2=fair (fitted with 1 source/beam prior cat, SNR<3); 1=not fitted
    'F160',		    # mJy			...			Super-deblended Herschel/PACS 160µm flux
    'dF160',		# mJy			...			Super-deblended Herschel/PACS 160µm flux error
    'Q160',		    # ...			...			Fit quality	3= good (fitted with 0.5 source/beam prior cat); 2=fair (fitted with 1 source/beam prior cat, SNR<3); 1=not fitted
    'F250',		    # mJy			...			Super-deblended Herschel/SPIRE 250µm flux
    'dF250',		# mJy			...			Super-deblended Herschel/SPIRE 250µm flux error
    'F350',		    # mJy			...			Super-deblended Herschel/SPIRE 350µm flux
    'dF350',		# mJy			...			Super-deblended Herschel/SPIRE 350µm flux error
    'F450',		    # mJy			...			Super-deblended JCMT/SCUBA2 450µm flux
    'dF450',		# mJy			...			Super-deblended JCMT/SCUBA2 450µm flux error
    'F500', 		# mJy			...			Super-deblended Herschel/SPIRE 500µm flux
    'dF500',		# mJy			...			Super-deblended Herschel/SPIRE 500µm flux error
    'F850_sh',      # mJy			...			Super-deblended JCMT/SCUBA2 850µm flux; Geach+17 map (shallower)
    'dF850_sh',     # mJy			...			Super-deblended JCMT/SCUBA2 850µm flux error; Geach+17 map
    'F850_d',       # mJy			...			Super-deblended JCMT/SCUBA2 850µm flux; Zavala+17 map (deeper)
    'dF850_d',      # mJy			...			Super-deblended JCMT/SCUBA2 850µm flux error; Zavala+17 map
    'F850',		    # mJy			...			Super-deblended JCMT/SCUBA2 850µm flux; weighted combination
    'dF850',		# mJy			...			Super-deblended JCMT/SCUBA2 850µm flux error; weighted combination
    'F1100',		# mJy			...			Super-deblended ASTE/AzTEC 1100µm flux
    'dF1100'        # mJy			...			Super-deblended ASTE/AzTEC 1100µm flux error
]

In [34]:
# make table of only wanted columns
reduced_df = full_df[wantedCols]
print(reduced_df.shape) # verify shape
# output table to csv file 
reduced_df.to_csv('Merged_JWST_and_EGS_photometry.csv', index=False) # what to title table?

(1608, 56)
