In [1]:
# this sets up basic packages
import numpy as np
import pandas as pd
import astropy.units as u
import astropy.cosmology.units as cu

# this sets up matplotlib
import matplotlib.pyplot as plt
%matplotlib inline

# this sets up astropy
from astropy.io import fits
from astropy.wcs import WCS
from astropy.wcs.utils import pixel_to_skycoord
from astropy.utils.data import get_pkg_data_filename
from astropy.coordinates import SkyCoord, Angle, match_coordinates_sky, Distance
from astropy.cosmology import Planck15 as cosmo
from astropy.table import Table

from regions import Regions, CircleSkyRegion

In [2]:
# first, import the file of neighbor data, which is a csv
df = pd.read_csv('neighbor_data.csv')

In [3]:
# this reads in all the COSMOS-Web measurements in different bands
f_cweb_cols = [col for col in df.columns if 'f_auto' in col]
e_cweb_cols = [col for col in df.columns if 'e_auto' in col]

# this saves the names of the COSMOS-Web bands
band_names = []
for header in f_cweb_cols:
    name = header[-5:] # this gets the band name, which are the last 5 characters of the column header
    band_names.append(name)

# check how many bands we've had for the first time
print(band_names)

# this reads in all the PRIMER measurements in different bands
f_primer_cols = [col for col in df.columns if 'f_primer_auto' in col]
e_primer_cols = [col for col in df.columns if 'e_primer_auto' in col]

# this saves the names of the PRIMER bands
for header in f_primer_cols:
    name = header[-5:] # this gets the band name, which are the last 5 characters of the column header
    if name not in band_names:
        band_names.append(name)

# check how many bands we've had for the second time
print(band_names)

['f814w', 'f115w', 'f150w', 'f277w', 'f444w', 'f770w']
['f814w', 'f115w', 'f150w', 'f277w', 'f444w', 'f770w', 'f606w', 'f090w', 'f200w', 'f356w', 'f410m']


Now let's do the dreaded for-loop that goes through the data and gets the best fluxes and flux errors.

In [5]:
fluxes = []
errors = []

for name in band_names:    
    ### first, read in the headers of the bands that are available in both COSMOS-Web and PRIMER
    cweb_flux_header = [header for header in f_cweb_cols if name in header]
    cweb_err_header = [header for header in e_cweb_cols if name in header]
    primer_flux_header = [header for header in f_primer_cols if name in header]
    primer_err_header = [header for header in e_primer_cols if name in header]
    
    ### next, check to see if the bands with these headers actually exist for BOTH COSMOS-Web AND PRIMER
    if len(cweb_flux_header) != 0 and len(primer_flux_header) != 0:
        # next, check to see how much of the data under these headers is actually NaN.
        # if they're NaN, set them to -99.0, so CIGALE will ignore them later on.
        cweb_flux = df[cweb_flux_header[0]].replace(np.nan, -99.0)
        cweb_err = df[cweb_err_header[0]].replace(np.nan, -99.0)
        primer_flux = df[primer_flux_header[0]].replace(np.nan, -99.0)
        primer_err = df[primer_err_header[0]].replace(np.nan, -99.0)

        # next, check for values that are less than zero. these values will be set to -99.0 as well.
        cweb_flux[cweb_flux <= 0] = -99.0
        cweb_err[cweb_err <= 0] = -99.0
        primer_flux[primer_flux <= 0] = -99.0
        primer_err[primer_err <= 0] = -99.0
        
        # then we calculate and compare the signal-to-noise ratio (SNR), 
        # then take the one (PRIMER vs COSMOS-Web) with the greater SNR.
        cweb_snr = cweb_flux / cweb_err
        primer_snr = primer_flux / primer_err

        # create an empty array to save the flux and flux error with better SNR
        better_flux = np.zeros(np.size(cweb_snr))
        better_err = np.zeros(np.size(cweb_snr))
        better_snr = np.zeros(np.size(cweb_snr))
        for i in range(np.size(cweb_snr)):
            if primer_snr[i] > cweb_snr[i]: # if PRIMER's SNR is greater than CWeb's, then keep PRIMER's flux, error & SNR
                better_flux[i] = primer_flux[i]
                better_err[i] = primer_err[i]
                better_snr[i] = primer_snr[i]
            else: # else if PRIMER's SNR is smaller than CWeb's, then keep CWeb's flux, error & SNR
                better_flux[i] = cweb_flux[i]
                better_err[i] = cweb_err[i]
                better_snr[i] = cweb_snr[i]

    ### now, check the cases where there is CWeb data but NO PRIMER data
    elif len(cweb_flux_header) != 0:
        cweb_flux = df[cweb_flux_header[0]].replace(np.nan, -99.0)
        cweb_err = df[cweb_err_header[0]].replace(np.nan, -99.0)

        # then we calculate the SNR
        better_flux = cweb_flux
        better_err = cweb_err
        better_snr = cweb_flux / cweb_err

    ### finally, check the cases where there is PRIMER data but NO CWeb data
    else:
        primer_flux = df[primer_flux_header[0]].replace(np.nan, -99.0)
        primer_err = df[primer_err_header[0]].replace(np.nan, -99.0)

        # then we calculate the SNR
        better_flux = primer_flux
        better_err = primer_err
        better_snr = primer_flux / primer_err

    ### NOW, after gathering all this, we check for the final time to see if the SNR is less than or equal to 3.
    ### if yes, we'll set the error to be equal to the flux, and the flux to be equal to 0. we have to do this
    ### because CIGALE likes to make weird estimational violations when it comes to large upper limits.
    for i in range(np.size(better_snr)):
        SNR = better_snr[i]
        if SNR <= 3:
            better_err[i] = better_flux[i]
            better_flux[i] = 0

    ### finally, we convert all the fluxes and errors 
    ### from μJy (the unit used in Hollis's table) to mJy (the unit that CIGALE takes)
    better_flux = better_flux / 1e3
    better_err = better_err / 1e3

    ### at last, we can escape this hellscape of a for-loop.
    ### save the fluxes and errors into a list so we can read later.
    fluxes.append(better_flux)
    errors.append(better_err)

After this trial and tribulation of a for-loop, let's do a sanity check!

In [7]:
# let's find a random band, and check a random line in the array of flux values 
# that correspond to that band. assume this value to be from CWeb for simplicity.
print(band_names[2])
print(fluxes[2][19])

# then, let's retrieve that same CWeb flux value from the dataframe, knowing both the name 
# of the band and the index of the flux value in that band. we'll just use CWeb for this.
print(df['f_auto_f150w'][19] / 1e3)

# if these two flux values are the same, this subtraction should return 0.
print(fluxes[2][19] - df['f_auto_f150w'][19] / 1e3)

f150w
0.0003587962311750272
0.0003587962311750272
0.0


The subtraction DOES return 0, so we're good!

Now, let's start preparing the contents for the dataframe that we'll later turn into a .txt file for CIGALE to read.

In [10]:
### first, we need the column names. 
# let's make a list for this. we have two known names: id and redshift.
column_names = ['id', 'redshift']

# next, we retrieve all the band names and make column names with them as well
for name in band_names:
    column_names.append('jwst.nircam.' + name.upper()) # make a column name for the flux
    column_names.append('jwst.nircam.' + name.upper() + '_err') # make a column name for the error

print(column_names)

['id', 'redshift', 'jwst.nircam.F814W', 'jwst.nircam.F814W_err', 'jwst.nircam.F115W', 'jwst.nircam.F115W_err', 'jwst.nircam.F150W', 'jwst.nircam.F150W_err', 'jwst.nircam.F277W', 'jwst.nircam.F277W_err', 'jwst.nircam.F444W', 'jwst.nircam.F444W_err', 'jwst.nircam.F770W', 'jwst.nircam.F770W_err', 'jwst.nircam.F606W', 'jwst.nircam.F606W_err', 'jwst.nircam.F090W', 'jwst.nircam.F090W_err', 'jwst.nircam.F200W', 'jwst.nircam.F200W_err', 'jwst.nircam.F356W', 'jwst.nircam.F356W_err', 'jwst.nircam.F410M', 'jwst.nircam.F410M_err']


In [11]:
### finally, let's put everything into each of the columns! 
# to do this, we'll first create a dictionary.
mega_dict = {}

# next, we'll create an array of redshifts that are all set at -99.0.
# these are meant to be placeholders to CIGALE to fill in when it runs.
z_arr = np.full(np.size(fluxes[0]), -99.0)

# now we start populating the dictionary
for idx in range(len(column_names)-1):
    # FIRST, we add the id array, as defined above
    if idx == 0:
        mega_dict[column_names[idx]] = df['id']
    # NEXT, we add the redshift array, where all the values are set to -99.0
    elif idx == 1:
        mega_dict[column_names[idx]] = z_arr
    # FINALLY, we start adding the fluxes and errors, based on the band names. we know
    # the order of the band names in the band_names list and the column_names list are
    # the same, so one by one, we'll populate the next columns.
    else:
        if idx % 2 == 0:
            mega_dict[column_names[idx]] = fluxes[int(idx/2-1)]
            mega_dict[column_names[idx+1]] = errors[int(idx/2-1)]

cigale_df = pd.DataFrame(mega_dict)
print(cigale_df)

         id  redshift  jwst.nircam.F814W  jwst.nircam.F814W_err  \
0    756593     -99.0           0.000094               0.000009   
1    756631     -99.0           0.000746               0.000012   
2    756701     -99.0           0.000345               0.000023   
3    756746     -99.0           0.000451               0.000011   
4    780592     -99.0           0.000000               0.000005   
..      ...       ...                ...                    ...   
101  799686     -99.0           0.000159               0.000014   
102  823652     -99.0           0.000104               0.000014   
103  823655     -99.0           0.000000              -0.099000   
104  823728     -99.0           0.000000               0.000009   
105  823849     -99.0           0.000000              -0.099000   

     jwst.nircam.F115W  jwst.nircam.F115W_err  jwst.nircam.F150W  \
0             0.000159               0.000017           0.000164   
1             0.001323               0.000020           0.0

In [12]:
cigale_df.to_csv('cigale_data.txt', sep='\t', index=False)