In [1]:
## imports
# astronomy
from astropy.io import fits
from astropy.io import ascii
from astropy.table import Table
from astropy.coordinates import SkyCoord
import astropy.units as u
# plotting
import matplotlib.pyplot as plt
# data 
import numpy as np
import pandas as pd 

# Read Data

In [2]:
# file paths 
path_catalogs = 'D:\\AstroCatalogs\\AGNerds'

In [3]:
# open tphot
tphot_data = ascii.read(path_catalogs+'\\tphot.cat')
tphot_cols = tphot_data.colnames

# show table
tphot_data.show_in_notebook()
print(tphot_cols)

['field', 'ra', 'dec', 'f560w_uJy', 'f560w_uJy_err', 'f770w_uJy', 'f770w_uJy_err', 'f1000w_uJy', 'f1000w_uJy_err', 'f1280w_uJy', 'f1280w_uJy_err', 'f1500w_uJy', 'f1500w_uJy_err', 'f1800w_uJy', 'f1800w_uJy_err', 'f2100w_uJy', 'f2100w_uJy_err']


In [4]:
# open EGS F22
egs_f22_data = Table.read(path_catalogs+'\\EGS_F22.fits')
egs_f22_cols = egs_f22_data.columns

# print all columns
# for col in egs_f22_cols:
#     print(col)

In [5]:
# open EGS FIR
egs_fir_data = Table.read(path_catalogs+'\\egs_FIR_photometry_catalog.fits')
egs_fir_cols = egs_fir_data.columns

# print all columns
# for col in egs_fir_cols:
#     print(col)

# Functions

In [6]:
def RaDec_to_Coord(ra,dec):
    return( SkyCoord(ra=ra*u.deg, dec=dec*u.deg) )

In [7]:
def GetCoordMatches(coord_cat, coord_c, separation=1.0) : 
    # match c to catalog 
    #   idx are indices into catalog that are the closest objects to each of the coordinates in c, 
    #   d2d are the on-sky distances between them, and 
    #   d3d are the 3-dimensional distances. 
    idx, d2d, d3d = coord_c.match_to_catalog_sky(coord_cat) # idx, d2d, d3d = c.match_to_catalog_sky(catalog)

    # separation constraint
    max_sep = separation * u.arcsec
    sep_constraint = d2d < max_sep  # use on 'c'
    idx_sep = idx[sep_constraint]   # use on 'catalog'

    # Get array of indeces 
    coordCat_matches = idx_sep
    coordC_matches   = np.where(sep_constraint)[0]
    
    # return matched sources
    return(coordCat_matches,coordC_matches)

In [8]:
def TestCoord(c1,c2,i):
    print('Coordinant 1: ', c1[i]) 
    print('Coordinant 2: ', c2[i])

In [9]:
def GetDuplicatesMask(arr): 
    # make mask of unique soruces 
    mask = np.zeros(len(arr), dtype=bool)
    mask[np.unique(arr, return_index=True)[1]] = True
    # get value of duplicates sources 
    duplicates = np.unique(arr[~mask])
    # set all non-unique sources to False 
    for dup in duplicates :
        mask[np.where(arr == dup)] = False
    # return boolean mask of length(arr) that is True for a value that occurs only once
    return(mask)

# Match EGS catalogs using RA/DEC

In [10]:
# Get EGS coordinants
egs_f22_coord = RaDec_to_Coord(np.array(egs_f22_data['RA'])[0], np.array(egs_f22_data['DEC'])[0])
egs_fir_coord = RaDec_to_Coord(np.array(egs_fir_data['RA']),    np.array(egs_fir_data['Decl'])  )

# print info
print('Number of EGS f22 sources:\t', len(egs_f22_coord))
print('Number of EGS fir sources:\t', len(egs_fir_coord))

Number of EGS f22 sources:	 66783
Number of EGS fir sources:	 41656


In [11]:
# match 
egs_f22_toFIR_match, egs_fir_toF22_match = GetCoordMatches(egs_f22_coord,egs_fir_coord) # array of indeces to full egs catalog

# get matches
egs_f22_coord_matches = egs_f22_coord[egs_f22_toFIR_match]
egs_fir_coord_matches = egs_fir_coord[egs_fir_toF22_match]

# print info 
print('Number of egsF22 matches:\t',  len(egs_f22_coord_matches))
print('Number of egsFIR matches:\t',  len(egs_fir_coord_matches))

Number of egsF22 matches:	 40850
Number of egsFIR matches:	 40850


In [12]:
TestCoord(egs_f22_coord_matches,egs_fir_coord_matches,1)

Coordinant 1:  <SkyCoord (ICRS): (ra, dec) in deg
    (215.267114, 53.027554)>
Coordinant 2:  <SkyCoord (ICRS): (ra, dec) in deg
    (215.26714, 53.02755)>


# Get Unique sources from EGS

In [13]:
# get mask of egs objects that occur more than once 
egs_duplicatesMask = GetDuplicatesMask(egs_f22_toFIR_match)

In [14]:
# apply mask to get unique and duplicate sources
egs_f22_unique_i     = egs_f22_toFIR_match[egs_duplicatesMask]
egs_f22_duplicates_i = egs_f22_toFIR_match[~egs_duplicatesMask]
# apply mask to get unique and duplicate sources
egs_fir_unique_i     = egs_fir_toF22_match[egs_duplicatesMask]
egs_fir_duplicates_i = egs_fir_toF22_match[~egs_duplicatesMask]

In [15]:
# print info
print('Number of unique:\t', len(egs_f22_unique_i))
print('Number of duplicates:\t', len(egs_f22_duplicates_i))

Number of unique:	 40571
Number of duplicates:	 279


# Match tphot to EGS f22 using RA/DEC

In [16]:
# get coordinants
tphot_coord = RaDec_to_Coord(np.array(tphot_data['ra']), np.array(tphot_data['dec']) )
egs_coord   = RaDec_to_Coord(np.array(egs_f22_data['RA'])[0][egs_f22_unique_i], np.array(egs_f22_data['DEC'])[0][egs_f22_unique_i])

# print info 
print('Number of tphot sources:\t', len(tphot_coord))
print('Number of egs sources:\t\t',   len(egs_coord))

Number of tphot sources:	 1734
Number of egs sources:		 40571


In [17]:
# match 
# tphot_toEGS_match, egs_toTphot_match = GetCoordMatches(tphot_coord,egs_coord) # array of indeces to full catalog 
egs_toTphot_match, tphot_toEGS_match = GetCoordMatches(egs_coord, tphot_coord) # array of indeces to full catalog 

# get matches
tphot_coord_matches = tphot_coord[tphot_toEGS_match]
egs_coord_matches = egs_coord[egs_toTphot_match]

# print length  
print('Number of tphot matches:\t', len(tphot_coord_matches))
print('Number of EGS matches:\t\t', len(egs_coord_matches))

Number of tphot matches:	 1694
Number of EGS matches:		 1694


In [18]:
TestCoord(tphot_coord_matches,egs_coord_matches,1)

Coordinant 1:  <SkyCoord (ICRS): (ra, dec) in deg
    (215.132979, 53.056507)>
Coordinant 2:  <SkyCoord (ICRS): (ra, dec) in deg
    (215.132972, 53.056532)>


# Find Duplicate RA/DEC Matches

In [19]:
# get mask of tphot objects that occur more than once 
tphot_duplicateMask = GetDuplicatesMask(egs_toTphot_match) # mask for array of indeces to full catalog

In [20]:
# apply mask to get unique and duplicate sources
tphot_unique_i     = tphot_toEGS_match[tphot_duplicateMask]     # array of indeces to full catalog 
tphot_duplicates_i = tphot_toEGS_match[~tphot_duplicateMask]
# apply mask to get unique and duplicate sources
egs_unique_i       = egs_toTphot_match[tphot_duplicateMask]
egs_duplicates_i   = egs_toTphot_match[~tphot_duplicateMask]

In [21]:
# print info
print('Number of unique:\t', len(egs_unique_i))
print('Number of duplicates:\t', len(egs_duplicates_i))

Number of unique:	 1569
Number of duplicates:	 125


In [22]:
# get coord values 
i=0
tphot_coord_d = tphot_coord[tphot_duplicates_i]
egs_coord_d = egs_coord[egs_duplicates_i]
TestCoord(tphot_coord_d,egs_coord_d,i)

Coordinant 1:  <SkyCoord (ICRS): (ra, dec) in deg
    (215.135509, 53.055473)>
Coordinant 2:  <SkyCoord (ICRS): (ra, dec) in deg
    (215.135778, 53.055631)>


# Write marked duplicates coordinantes to file

In [23]:
def PairUp(dup, set):
    # dup : list with duplicate numbers in it
    # set : list with no duplicates
    # dup and set must be same length and be parallel (index in one list correlates to same index on other list)
    pairs = {}
    for key in np.unique(dup) : 
        i = np.where(dup == key)
        pairs[key] = list(set[i])
    return pairs 


pairUp_duplicates = PairUp(egs_duplicates_i, tphot_duplicates_i) # egsKey, tphotVal
print(pairUp_duplicates)

{1424: [1732, 1733], 2566: [1664, 1670], 4585: [1508, 1510], 6144: [1582, 1584], 6530: [1616, 1618], 6981: [1436, 1438], 7131: [1462, 1464], 7336: [1567, 1571], 9218: [1487, 1491], 11843: [571, 577], 12129: [1382, 1383], 12176: [600, 604], 12381: [280, 282], 12477: [244, 246], 12738: [1323, 1327], 12804: [1214, 1215], 13173: [272, 274], 13290: [169, 171], 13354: [206, 208, 218], 13545: [502, 503], 13621: [307, 311], 13808: [101, 108], 13841: [619, 620], 13901: [1183, 1186], 14068: [415, 416], 14344: [1223, 1225], 14370: [81, 83], 14428: [1210, 1212], 14599: [535, 536], 15120: [444, 452], 15143: [198, 202], 15527: [586, 589], 15846: [185, 186], 16155: [1148, 1149], 16305: [357, 365], 16614: [131, 134], 16753: [458, 464], 17150: [496, 497], 17630: [93, 96], 17714: [370, 375], 17863: [891, 895], 18069: [192, 193], 18094: [707, 711], 18166: [425, 429], 18168: [1267, 1268], 18504: [1259, 1260], 18698: [859, 860], 18723: [54, 55, 57], 18826: [354, 356], 18965: [1159, 1163], 18975: [743, 747]

In [24]:
# tphot ra and dec
ra_t  = np.array(tphot_data['ra'])
dec_t = np.array(tphot_data['dec'])
field = np.array(tphot_data['field'])
# egs ra and dec 
ra_egs  = np.array(egs_f22_data['RA'][0])[egs_f22_unique_i]
dec_egs = np.array(egs_f22_data['DEC'][0])[egs_f22_unique_i]

pairUp_duplicates_coordinants = {}
for key,val in pairUp_duplicates.items() : 
    # translate key to coordninates
    egsCoord = (ra_egs[key], dec_egs[key])
    # translate values to coordinants 
    tphotCoords = []
    for i in val : 
        tphotCoords.append( (ra_t[i], dec_t[i], field[i]) )
    # add to dictionary 
    pairUp_duplicates_coordinants[egsCoord] = tphotCoords

print(pairUp_duplicates_coordinants)


{(215.061708, 52.900944): [(215.061722, 52.900933, 'miri6'), (215.061896, 52.900765, 'miri6')], (215.044522, 52.894596): [(215.044535, 52.894589, 'miri6'), (215.044768, 52.894784, 'miri6')], (215.027832, 52.894548): [(215.027718, 52.894445, 'miri6'), (215.027837, 52.894547, 'miri6')], (215.033691, 52.909238): [(215.033689, 52.909234, 'miri6'), (215.033922, 52.909357, 'miri6')], (215.037311, 52.913392): [(215.03731, 52.913383, 'miri6'), (215.037553, 52.913339, 'miri6')], (215.016974, 52.901142): [(215.01698, 52.901149, 'miri6'), (215.017125, 52.901013, 'miri6')], (215.021841, 52.905244): [(215.021603, 52.9052, 'miri6'), (215.021839, 52.905239, 'miri6')], (215.032796, 52.915581): [(215.032782, 52.915564, 'miri6'), (215.033068, 52.915457, 'miri6')], (215.025932, 52.919903): [(215.025918, 52.919893, 'miri6'), (215.026152, 52.920017, 'miri6')], (215.172194, 53.03765): [(215.17185, 53.037536, 'miri1'), (215.172196, 53.037648, 'miri1')], (215.001349, 52.918369): [(215.001375, 52.918359, 'miri

In [25]:
# write all duplicate coordinantes to file 
with open('tphotCoord_egs_duplicate_galaxies.txt', 'w') as file:
    file.write(str(pairUp_duplicates_coordinants))

# Build Catalog

In [26]:
# helper indexing --- yes I tested that these index arrays all align correctly (11/8/22)
tphot_i     = tphot_unique_i
egs_f22_i   = egs_f22_unique_i[egs_unique_i] 
egs_fir_i   = egs_fir_unique_i[egs_unique_i]

# verify that all lengths match
print(len(tphot_i))
print(len(egs_f22_i))
print(len(egs_fir_i))

# save number of matches 
n_matches = len(tphot_i)

1569
1569
1569


In [27]:
# convert astropy table to pandas dataframe (does not work for egs_f22_data, dimension issues...)
tphot_df   = tphot_data.to_pandas()
egs_fir_df = egs_fir_data.to_pandas()

In [28]:
# # Make egs f22 into a dataframe 
# get columns 
egs_f22_cols_DFPREP = []
for col in egs_f22_cols :
    egs_f22_cols_DFPREP.append(col)
# get data
egs_f22_data_DFPREP = []
for col in egs_f22_data[0]:
    egs_f22_data_DFPREP.append(col)
egs_f22_data_DFPREP = np.array(egs_f22_data_DFPREP).T # transpose to access rows
# make dataframe 
egs_f22_df = pd.DataFrame(data=egs_f22_data_DFPREP, columns=egs_f22_cols_DFPREP)

In [29]:
# rename columns
egs_f22_df = egs_f22_df.rename(columns={'ID':'ID_EGSF22'})
egs_fir_df = egs_fir_df.rename(columns={'ID':'ID_EGSFIR'})

In [30]:
# get matches soureces for dfs
tphot_df_matched   = tphot_df.iloc[tphot_i]
egs_f22_df_matched = egs_f22_df.iloc[egs_f22_i]
egs_fir_df_matched = egs_fir_df.iloc[egs_fir_i]

In [31]:
# concatenate tables horizontally
full_df = pd.concat(
        [tphot_df_matched.reset_index(drop=True), 
        egs_f22_df_matched.reset_index(drop=True), 
        egs_fir_df_matched.reset_index(drop=True)],
        axis=1,
    )

print(full_df.shape) # verify shape

(1569, 136)


In [32]:
# # print columns 
# for col in full_df.columns:
#     print(col)

In [33]:
wantedCols = [
    # ========================  tphot (all cols)
    'field',                    
    'ra', 
    'dec', 
    'f560w_uJy', 
    'f560w_uJy_err', 
    'f770w_uJy', 
    'f770w_uJy_err', 
    'f1000w_uJy', 
    'f1000w_uJy_err', 
    'f1280w_uJy', 
    'f1280w_uJy_err', 
    'f1500w_uJy', 
    'f1500w_uJy_err', 
    'f1800w_uJy', 
    'f1800w_uJy_err', 
    'f2100w_uJy', 
    'f2100w_uJy_err',
    # ========================  egs f22
    'ID_EGSF22',
    'F36',
    'F45',
    'ZA',
    # ========================  egs fir
    'ID_EGSFIR',
    'Mstar',        # lg(solMass)   S17			Stellar mass measurement from the median of the linear value of the stellar mass measurements obtained taking into account nebular emission contamination; in Chabrier IMF 
    'Fch1',		    # µJy			S17			Spitzer IRAC 3.6µm flux
    'dFch1',        # µJy			S17			Spitzer IRAC 3.6µm flux error
    'Fch2',		    # µJy			S17			Spitzer IRAC 4.5µm flux
    'dFch2',		# µJy			S17			Spitzer IRAC 4.5µm flux error
    'Fch3',		    # µJy			S17			Spitzer IRAC 5.8µm flux
    'dFch3',		# µJy			S17			Spitzer IRAC 5.8µm flux error
    'Fch4',		    # µJy			S17			Spitzer IRAC 8.0µm flux
    'dFch4',		# µJy			S17			Spitzer IRAC 8.0µm flux error
    'F24',  		# µJy			...			Super-deblended Spitzer/MIPS 24µm flux
    'dF24',		    # µJy			...			Super-deblended Spitzer/MIPS 24µm flux error
    'Q24',  		# ...			...			Fit quality	3= good (fitted with 0.5 source/beam prior cat); 2=fair (fitted with 1 source/beam prior cat, SNR<3); 1=poor (fitted with 2 source/beam prior cat, SNR<3)
    'F100',		    # mJy			...			Super-deblended Herschel/PACS 100µm flux
    'dF100',		# mJy			...			Super-deblended Herschel/PACS 100µm flux error
    'Q100',		    # ...			...			Fit quality	3= good (fitted with 0.5 source/beam prior cat); 2=fair (fitted with 1 source/beam prior cat, SNR<3); 1=not fitted
    'F160',		    # mJy			...			Super-deblended Herschel/PACS 160µm flux
    'dF160',		# mJy			...			Super-deblended Herschel/PACS 160µm flux error
    'Q160',		    # ...			...			Fit quality	3= good (fitted with 0.5 source/beam prior cat); 2=fair (fitted with 1 source/beam prior cat, SNR<3); 1=not fitted
    'F250',		    # mJy			...			Super-deblended Herschel/SPIRE 250µm flux
    'dF250',		# mJy			...			Super-deblended Herschel/SPIRE 250µm flux error
    'F350',		    # mJy			...			Super-deblended Herschel/SPIRE 350µm flux
    'dF350',		# mJy			...			Super-deblended Herschel/SPIRE 350µm flux error
    'F450',		    # mJy			...			Super-deblended JCMT/SCUBA2 450µm flux
    'dF450',		# mJy			...			Super-deblended JCMT/SCUBA2 450µm flux error
    'F500', 		# mJy			...			Super-deblended Herschel/SPIRE 500µm flux
    'dF500',		# mJy			...			Super-deblended Herschel/SPIRE 500µm flux error
    'F850_sh',      # mJy			...			Super-deblended JCMT/SCUBA2 850µm flux; Geach+17 map (shallower)
    'dF850_sh',     # mJy			...			Super-deblended JCMT/SCUBA2 850µm flux error; Geach+17 map
    'F850_d',       # mJy			...			Super-deblended JCMT/SCUBA2 850µm flux; Zavala+17 map (deeper)
    'dF850_d',      # mJy			...			Super-deblended JCMT/SCUBA2 850µm flux error; Zavala+17 map
    'F850',		    # mJy			...			Super-deblended JCMT/SCUBA2 850µm flux; weighted combination
    'dF850',		# mJy			...			Super-deblended JCMT/SCUBA2 850µm flux error; weighted combination
    'F1100',		# mJy			...			Super-deblended ASTE/AzTEC 1100µm flux
    'dF1100'        # mJy			...			Super-deblended ASTE/AzTEC 1100µm flux error
]

In [34]:
# make table of only wanted columns
reduced_df = full_df[wantedCols]
print(reduced_df.shape) # verify shape
# output table to csv file 
reduced_df.to_csv('Merged_JWST_and_EGS_photometry.csv', index=False) # what to title table?

(1569, 56)
