In [23]:
# TODO
# 1. open tphot, egs_candles, and egs_merged to read
# 2. match RA and DEC between egs_merged and tphot
# 3. get ID from egs_merged for matches (#2) and match to egs_candles
# 4. make catalog with all data
# note: there may be 2 matches in #2, make note of these sources. We will have to match by eye. 

In [24]:
## imports
# astronomy
from astropy.io import fits
from astropy.io import ascii
from astropy.table import Table
from astropy.coordinates import SkyCoord
import astropy.units as u
# plotting
import matplotlib.pyplot as plt
# data 
import numpy as np
import pandas as pd 

# 1. Read Data

In [25]:
# file paths 
path_catalogs = 'C:\\Users\\polar\\OneDrive - The University of Kansas\\AGNerds\\Catalogs'

In [48]:
# open tphot
tphot_data = ascii.read(path_catalogs+'\\tphot.cat')
tphot_cols = tphot_data.colnames

# show table
# tphot_data.show_in_notebook()
# print(tphot_cols)

# important columns:
# name = 'ra'
# name = 'dec'

In [45]:
# open egs_merged 
# egs_merged_inf = fits.open(path_catalogs+'\\egs_merged_v1.1.fits')
# egs_merged_data = egs_merged_inf[1].data
# egs_merged_cols = egs_merged_inf[1].columns
# egs_merged_inf.close()

egs_merged_data = Table.read(path_catalogs+'\\egs_merged_v1.1.fits')
egs_merged_cols = egs_merged_data.columns

# print all columns
# print(egs_merged_cols)

# important columns:
# name = 'ID'; format = 'K'
# name = 'RA'; format = 'D'; unit = 'deg'
# name = 'DEC'; format = 'D'; unit = 'deg'
# name = 'z_best'; format = 'D'

In [28]:
# open egs_candels 
egs_candels_data_bySrc= np.genfromtxt(path_catalogs+'\\egs_candels_checkage_official.ir_fitting')
egs_candels_data_byCol = egs_candels_data_bySrc.T # transpose 
# manually type column names
egs_candels_cols = ['Source','z','L(8-1000)','er','L(3-1100)','er','L1_1','L1_2','L1_3','L1_4','L2_1','L2_2','L2_3','L2_4','L_008','er','L_012','er','L_015','er','L_024','er','SFR_TIR','er','SFR_008','er','SFR_012','er','SFR_015','er','SFR_024','er','SFRonly24','SFR_R+09','SFR_R+13','SFR_E+11','er','SFR_W+11','SFR1600','SFR2800','UV_beta','A(V)','SFR1600c','SFR2800c','A(V)e','SFR1600ce','SFR2800ce','qPAH_DL+07','Umin_DL+07','gamma_DL+07','Mdust_DL+07','temp_1','temp_2','temp_3','temp_4','temp_only24','factor_1','factor_2','factor_3','factor_4','factor_only24','F(24)','R+09_valA','R+09_valB','R+13_valA','R+13_valB','W+11_val','l_IRAC_58','f_IRAC_58','l_IRAC_80','f_IRAC_80','l_MIPS24','f_MIPS24','l_MIPS70','f_MIPS70','l_PACS_100','f_PACS_100','l_PACS_160','f_PACS_160','l_SPIRE_250','f_SPIRE_250','l_SPIRE_350','f_SPIRE_350','l_SPIRE_500','f_SPIRE_500','plot_maxy','Nfit','Nobs']

# important columns (access by index)
EGSCAN_SOURCE = 0 # Source    1

# 2. Match RA and DEC 

In [29]:
## get RA and DEC from catalogs

# get RA and DEC from tphot and make array
tphot_RA  = np.array(tphot_data['ra'])
tphot_DEC = np.array(tphot_data['dec'])
# get RA and DEC from egs_merged (already an array)
egs_merged_RA  = np.array(egs_merged_data['RA'])
egs_merged_DEC = np.array(egs_merged_data['DEC'])

# get coordinants 
tphot_coord = SkyCoord(ra=tphot_RA*u.deg, dec=tphot_DEC*u.deg)
egs_merged_coord = SkyCoord(ra=egs_merged_RA*u.deg, dec=egs_merged_DEC*u.deg)

In [30]:
print('Number of tphot sources:\t', len(tphot_RA))
print('Number of egs_merged sources:\t', len(egs_merged_RA))

Number of tphot sources:	 1734
Number of egs_merged sources:	 41457


In [31]:
## match RA and DEC between catalogs
# DOC: https://docs.astropy.org/en/stable/coordinates/matchsep.html

# idx are indices into catalog that are the closest objects to each of the coordinates in c, 
# d2d are the on-sky distances between them, and 
# d3d are the 3-dimensional distances. 
idx, d2d, d3d = egs_merged_coord.match_to_catalog_sky(tphot_coord) # idx, d2d, d3d = c.match_to_catalog_sky(catalog)

# separation constraint
max_sep = 1.0 * u.arcsec
sep_constraint = d2d < max_sep  # use on 'c' (egs_merged_coord)
idx_sep = idx[sep_constraint]   # use on 'catalog' (tphot)

# get matches
egs_merged_coord_matches = egs_merged_coord[sep_constraint]
tphot_coord_matches = tphot_coord[idx_sep]

# print length  
print('Number of matches:\t', len(egs_merged_coord_matches))
print('Number of matches:\t', len(tphot_coord_matches))

Number of matches:	 1661
Number of matches:	 1661


In [32]:
# test match
i=11
print(egs_merged_coord_matches[i])
print(tphot_coord_matches[i])

<SkyCoord (ICRS): (ra, dec) in deg
    (215.0598187, 52.90257107)>
<SkyCoord (ICRS): (ra, dec) in deg
    (215.059807, 52.90258)>


# 3. Find Duplicate Matches

In [33]:
## determine duplicate sources

# make mask of unique soruces 
mask = np.zeros(len(idx_sep), dtype=bool)
mask[np.unique(idx_sep, return_index=True)[1]] = True
# apply mask to get unique sources and duplicates
idx_sep_unique = idx_sep[mask]
idx_sep_duplicates = idx_sep[~mask]
# print info
print('Number of unique:\t', len(idx_sep_unique))
print('Number of duplicates:\t', len(idx_sep_duplicates))
print('Duplicates:\n', idx_sep_duplicates)

Number of unique:	 1633
Number of duplicates:	 28
Duplicates:
 [1412 1427 1273  930 1389  791  771  942 1106 1204   38   34 1427 1397
  801  660  379  970  127 1279  983  973  719  789 1092 1140 1075 1058]


# 4. Match ID

In [34]:
# get IDs
egs_merged_ID = egs_merged_data['ID']
egs_merged_ID_tphotMatches = egs_merged_ID[sep_constraint]
egs_candels_ID = egs_candels_data_byCol[EGSCAN_SOURCE]

In [35]:
# Used to match by id (code from Connor Auge)
def match(a, b):
    b_set = set(b)
    b_match = [i for i, v in enumerate(a) if v in b_set]
    a_set = set(a)
    a_match = [i for i, v in enumerate(b) if v in a_set]
    a_match = np.asarray(a_match)
    b_match = np.asarray(b_match)
    a_match2 = np.argsort(a[b_match])
    b_match2 = np.argsort(b[a_match])
    return b_match[a_match2],a_match[b_match2]

In [36]:
# match 
key_merged, key_candels = match(egs_merged_ID_tphotMatches, egs_candels_ID)

# apply match key
egs_merged_ID_tphotMatches_egsMatches = egs_merged_ID_tphotMatches[key_merged]
egs_candels_ID_egsMatches = egs_candels_ID[key_candels]

In [37]:
# test
i=1
print(egs_merged_ID_tphotMatches_egsMatches[i])
print(egs_candels_ID_egsMatches[i])

1498
1498.0


# Build Catalog

In [58]:
# helper indexing
tphot_i         = idx_sep
egs_merged_i    = np.where(sep_constraint)[0]
egs_candles_i   = key_candels

# verify that all lengths match
print(len(tphot_i))
print(len(egs_merged_i))
print(len(egs_candles_i))

# save number of matches 
n_matches = len(tphot_i)

1661
1661
1661


In [59]:
# convert astropy table to pandas dataframe
tphot_df = tphot_data.to_pandas()
# get matched sources
tphot_df_matched = tphot_df.iloc[tphot_i]
print(tphot_df_matched.shape) # verify shape

(1661, 17)


In [60]:
# convert astropy table to pandas dataframe
egs_merged_df = egs_merged_data.to_pandas()
# get matched sources
egs_merged_df_matched = egs_merged_df.iloc[egs_merged_i]
print(egs_merged_df_matched.shape) # verify shape

(1661, 707)


In [61]:
# convert numpy array to pandas dataframe 
egs_candels_df = pd.DataFrame(egs_candels_data_bySrc, columns=egs_candels_cols)
egs_candels_df['Source'] = egs_candels_df['Source'].astype(int) # correct data type
# get matched sources
egs_candels_df_matched = egs_candels_df.iloc[egs_candles_i]
print(egs_candels_df_matched.shape) # verify shape

# # TODO fix this?
# # these temp columns did not read from file correctly...
# print(egs_candels_data_byCol[55])
# print(egs_candels_df['temp_only24'])

(1661, 88)


In [65]:
# # TODO remove duplicate sources

# final_table = pd.concat(
#         [egs_merged_df_matched, egs_candels_df_matched, tphot_df_matched],
#         axis=1,
#         ignore_index=True,
#         sort=False
#     )

InvalidIndexError: Reindexing only valid with uniquely valued Index objects