# Prepare All Catalogs

This notebook contains a list of functions that prepare the complementary catalogs for future work.
It clears the catalogs of non-detections and flagged objects.

## Import libraries

In [2]:
# Astropy
from astropy.table import Table
from astropy.io import fits
from astropy.coordinates import SkyCoord
from astropy import units as u

import missingno
import numpy as np

In [11]:
desdr2_catalog_path = '/data/mfonseca/survey_data/DES_data/DES_in_EMU_0102-32_square.fits'
desy6gold_catalog_path = '/data/mfonseca/survey_data/DES_data/DESY6GOLD_in_EMU_0102-32_magauto.fits'

vikingdr5_catalog_path = '/data/mfonseca/survey_data/VIKING_data/VIKINGDR5_in_EMU_0102-32_allapermag3.fits'
catwise_catalog_path = '/data/mfonseca/survey_data/CATWISE_data/CATWISE2020_in_EMU_0102-32.fits'

## Convert VEGA magnitude to AB magnitude (VIKING DR5, CATWISE2020)

While the DES DR2 magnitudes are already in the AB system, the VIKING DR5 and CATWISE2020 magnitudes are in the Vega system and need to be converted.

In [None]:
def fix_ab_magnitude(file_path, ab_offset, mag_columns):
    '''
    Converts the vega magnitudes to ab magnitudes

    Args:
        file_path (str): Path to the FITS file.
        ab_offset (list): List of offsets for each magnitude column.
        mag_columns (list): List of magnitude column names to be corrected, same order as ab_offset.
    Returns:
        None: The function modifies the FITS file in place.
    
    '''

    catalog_table = Table.read(file_path)
    catalog_df = catalog_table.to_pandas()

    for col in mag_columns:
        # Apply the AB magnitude correction
        index = mag_columns.index(col)
        col_dered = [col+'_dered' for col in mag_columns]
        catalog_df[col_dered] = catalog_df[col] + ab_offset[index]


    # Save the updated DataFrame back to the original FITS file
    updated_catalog_table = Table.from_pandas(catalog_df)
    updated_catalog_table.write(file_path, overwrite=True)

For the ViKING DR5 we used the two magnitudes recomended in the webpage of VSA: AperMagNoAperCorr3 and AperMag3.

In [None]:
viking_AperMagNoAperCorr3_cols = ['zAperMagNoAperCorr3', 'yAperMagNoAperCorr3', 'jAperMagNoAperCorr3', 'hAperMagNoAperCorr3', 'ksAperMagNoAperCorr3']
viking_AperMag3_cols = ['zAperMag3', 'yAperMag3', 'jAperMag3', 'hAperMag3', 'ksAperMag3']


# http://casu.ast.cam.ac.uk/surveys-projects/vista/technical/filter-set
viking_ab_offset = [0.502, 0.600, 0.916, 1.366, 1.827]

fix_ab_magnitude(vikingdr5_catalog_path, viking_ab_offset, viking_AperMagNoAperCorr3_cols)
fix_ab_magnitude(vikingdr5_catalog_path, viking_ab_offset, viking_AperMag3_cols)

For the CATWISE2020 survey, given the big size of the aperture magnitudes, we use the PSF magnitudes.

In [None]:
# For CATWISE2020
catwise_psfmag_columns = ['w1mpro', 'w2mpro']

# https://wise2.ipac.caltech.edu/docs/release/allsky/expsup/sec4_4h.html
catwise_ab_offset = [2.699, 3.339]

fix_ab_magnitude(catwise_catalog_path, catwise_ab_offset, catwise_psfmag_columns)

## Correct for extinction (DESY6GOLD, VIKING DR5)

In [None]:
exctinction_columns = ['a_fiducial_g', 'a_fiducial_r', 'a_fiducial_i', 'a_fiducial_z', 'a_fiducial_y',]


In [None]:
def correct_extinction(file_path, filter_list, type='AperMag3'):
    '''
    Corrects the extinction in the magnitudes of the catalog when given the total extinction.

    Args:
        file_path (str): Path to the FITS file.
        filter_list (list): List of filters to be corrected, in lower case.
        type (str): Type of magnitude to be corrected. Default is 'AperMag3'.
    Returns:
        None: The function modifies the FITS file in place.
    
    '''

    catalog_table = Table.read(file_path)
    catalog_df = catalog_table.to_pandas()

    rows = catalog_df.shape[0]

    for filter in filter_list:
        mag_colum = filter + type
        excoef_column = 'a' + filter.upper()

        extinction = catalog_df[excoef_column]

        catalog_df[mag_colum] = catalog_df[mag_colum] - catalog_df[excoef_column]


    # Save the updated DataFrame back to the original FITS file
    updated_catalog_table = Table.from_pandas(catalog_df)
    updated_catalog_table.write(file_path, overwrite=True)

In [None]:
# For VIKING DR4
viking_filters = ['z', 'y', 'j', 'h', 'ks']

correct_extinction(vikingdr5_catalog_path, viking_filters, type='AperMag3')
correct_extinction(vikingdr5_catalog_path, viking_filters, type='AperMagNoAperCorr3')

In [None]:
def compute_dereddened_photometry(df, mag_cols, extinction_cols):
    """
    Adds dereddened magnitudes and colors to a DataFrame.

    Parameters:
    - df: pandas.DataFrame
    - mag_cols: list of strings, magnitude column names
    - extinction_cols: dict mapping band (str) to extinction column
    
    Returns:
    - df: DataFrame with added columns:
        - e.g., 'zPnt_dered', 'yPnt_dered', 'zmyPnt_dered'
    """
    # Dereddened magnitudes
    for band in extinction_cols:
        mag_col = next((col for col in mag_cols if col.lower().startswith(band.lower())), None)
        if mag_col:
            df[f"{mag_col}_dered"] = df[mag_col] - df[extinction_cols[band]]

    # Dereddened colors
    for i in range(len(mag_cols)):
        for j in range(i + 1, len(mag_cols)):
            band1, band2 = mag_cols[i], mag_cols[j]
            b1, b2 = band1[0].lower(), band2[0].lower()
            if b1 in extinction_cols and b2 in extinction_cols:
                color_name = f"{b1}m{b2}Pnt"
                dered_color = (df[band1] - df[band2]) - (df[extinction_cols[b1]] - df[extinction_cols[b2]])
                df[f"{color_name}_dered"] = dered_color

    return df

In [None]:
# For VIKING DR5
point_mag_cols = ['zmyPnt', 'yPnt', 'jPnt']
extinction_cols = {'z': 'aZ', 'y': 'aY', 'j': 'aJ'}

## Calculate Colors (DESY6GOLD, VIKING DR5, CATWISE2020)

In [None]:
# For DES DR2
desy6gold_catalog_table = Table.read(desy6gold_catalog_path)
desy6gold_catalog_df = desy6gold_catalog_table.to_pandas()

desy6gold_mag_cols = ['mag_auto_g', 'mag_auto_r', 'mag_auto_i', 'mag_auto_z', 'mag_auto_y']
desy6gold_catalog_df['g_r_dered'] = desy6gold_catalog_df['mag_auto_g_dered'] - desy6gold_catalog_df['mag_auto_r_dered']
desy6gold_catalog_df['r_i_dered'] = desy6gold_catalog_df['mag_auto_r_dered'] - desy6gold_catalog_df['mag_auto_i_dered']
desy6gold_catalog_df['i_z_dered'] = desy6gold_catalog_df['mag_auto_i_dered'] - desy6gold_catalog_df['mag_auto_z_dered']
desy6gold_catalog_df['z_y_dered'] = desy6gold_catalog_df['mag_auto_z_dered'] - desy6gold_catalog_df['mag_auto_y_dered']

updated_catalog_table = Table.from_pandas(desy6gold_catalog_df)
updated_catalog_table.write(desy6gold_catalog_path, overwrite=True)

In [None]:
# For the CATWISE catalog
catwise_catalog_table = Table.read(catwise_catalog_path)
catwise_catalog_df = catwise_catalog_table.to_pandas()

catwise_catalog_df['w1_w2_dered'] = catwise_catalog_df['w1mpro_dered'] - catwise_catalog_df['w2mpro_dered']

updated_catalog_table = Table.from_pandas(catwise_catalog_df)
updated_catalog_table.write(catwise_catalog_path, overwrite=True)

# Count non-detections (VIKING DR5, CATWISE2020)

In [10]:
catalog_table = Table.read(vikingdr5_catalog_path)
vikingdr5_catalog_df = catalog_table.to_pandas()

print(vikingdr5_catalog_df.columns)



Index(['sourceID', 'ra', 'dec', 'zAperMag3', 'zAperMagNoAperCorr3',
       'zAperMag3Err', 'yAperMag3', 'yAperMagNoAperCorr3', 'yAperMag3Err',
       'jAperMag3', 'jAperMagNoAperCorr3', 'jAperMag3Err', 'hAperMag',
       'hAperMagNoAperCorr3', 'hAperMag3Err', 'ksAperMag3',
       'ksAperMagNoAperCorr3', 'ksAperMag3Err', 'eBV', 'aZ', 'aY', 'aJ', 'aH',
       'aKs', 'zppErrBits', 'yppErrBits', 'ksErrBits', 'jErrBits', 'hErrBits',
       'mergedClassStat', 'pGalaxy', 'pStar', 'pNoise', 'pSaturated'],
      dtype='object')


In [None]:
print(f'the nan value is: {vikingdr5_catalog_df['zAperMag3'].tail(len(vikingdr5_catalog_df)).unique().min()}')

z_filter = vikingdr5_catalog_df[~np.isclose(vikingdr5_catalog_df['zAperMag3'], -999999488.0)]
y_filter = vikingdr5_catalog_df[~np.isclose(vikingdr5_catalog_df['yAperMag3'], -999999488.0)]
j_filter = vikingdr5_catalog_df[~np.isclose(vikingdr5_catalog_df['jAperMag3'], -999999488.0)]
#h_filter = viking_catalog_df[~np.isclose(viking_catalog_df['hAperMag3'], -999999488.0)]
ks_filter = vikingdr5_catalog_df[~np.isclose(vikingdr5_catalog_df['ksAperMag3'], -999999488.0)]


print(f'{len(z_filter)} ({(len(z_filter)/len(vikingdr5_catalog_df))*100:.2f}%) objects have a nan value in the z filter')
print(f'{len(y_filter)} ({(len(y_filter)/len(vikingdr5_catalog_df))*100:.2f}%) objects have a nan value in the y filter')
print(f'{len(j_filter)} ({(len(j_filter)/len(vikingdr5_catalog_df))*100:.2f}%) objects have a nan value in the j filter')
#print(f'{len(h_filter)} ({(len(h_filter)/len(viking_catalog_df))*100:.2f}%) objects have a nan value in the h filter')
print(f'{len(ks_filter)} ({(len(ks_filter)/len(vikingdr5_catalog_df))*100:.2f}%) objects have a nan value in the ks filter')

the nan value is: -999999488.0
863853 (90.25%) objects have a nan value in the z filter
859674 (89.81%) objects have a nan value in the y filter
957205 (100.00%) objects have a nan value in the j filter
957205 (100.00%) objects have a nan value in the ks filter


In [None]:
catalog_table = Table.read(catwise_catalog_path)
catwise_catalog_df = catalog_table.to_pandas()

In [None]:
# This shows us that catwise doesn't have nan values

# missingno.matrix(catwise_catalog_df)

print(f'the nan value is: {catwise_catalog_df['w1mpro'].tail(len(catwise_catalog_df)).unique().min()}')

# Remove EMU sources in empty VIKING spots

There are three rectangular patches in the VIKING area that don't have any objects, which is due to bad quality measurments.

In [4]:
emu_catalog_path = '/data/mfonseca/survey_data/EMU_data/EMU_0102-32/EMU_0102-32_1comp.fits'

In [27]:
emu_table = Table.read(emu_catalog_path)
emu_catalog_df = emu_table.to_pandas()

viking_table = Table.read(vikingdr5_catalog_path)
viking_catalog_df = viking_table.to_pandas()

emu_coords = SkyCoord(ra=emu_catalog_df['ra_deg_cont'].values * u.deg,
                      dec=emu_catalog_df['dec_deg_cont'].values * u.deg)

viking_coords = SkyCoord(ra=viking_catalog_df['ra'].values * u.deg,
                         dec=viking_catalog_df['dec'].values * u.deg)

# Perform many-to-many matching (find all EMU–VIKING pairs within 10 arcsec)
idx_viking, idx_emu, sep2d, _ = emu_coords.search_around_sky(viking_coords, 10 * u.arcsec)

# Get unique EMU indices that have at least one match
unique_matched_emu_indices = np.unique(idx_emu)

matched_emu_df = emu_catalog_df.iloc[unique_matched_emu_indices]

Table.from_pandas(matched_emu_df).write("/data/mfonseca/survey_data/EMU_data/EMU_0102-32/EMU_0102-32_1comp_filtered.fits", overwrite=True)



In [28]:
print(f'Number of original sources in EMU catalog: {len(emu_catalog_df)}')
print(f'Number of sources in EMU catalog after filtering: {len(matched_emu_df)}')

fraction = len(matched_emu_df) / len(emu_catalog_df)

print(f'Fraction of sources in EMU catalog after filtering: {fraction:.2%}')

Number of original sources in EMU catalog: 18736
Number of sources in EMU catalog after filtering: 11588
Fraction of sources in EMU catalog after filtering: 61.85%
