This notebook compiles redshift data (spectroscopic, grism, and photo-z) from a large number of public catalogs.

Creator: John Franklin Crenshaw  
Last run: June 18, 2025

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from astropy.table import Table
import astropy.constants as c
import astropy.units as u
from astropy.coordinates import SkyCoord

from pathlib import Path

sns.reset_orig()

In [None]:
def cone_mask(df: pd.DataFrame) -> np.ndarray:
    """Mask dataframe rows around the center of the ComCam ECDFS field.

    Parameters
    ----------
    df : pd.DataFrame
        Dataframe with RA and DEC columns.

    Returns
    -------
    np.ndarray
        Mask for dataframe rows. True indicates a source is inside the cone
        centered on the ComCam ECDFS field.
    """
    # Cone around ComCam ECDFS
    ra = 53.13
    dec = -28.10
    r = 0.72

    # Mask where True indicates a source inside the cone
    mask = np.sqrt((df.RA - ra)**2 + (df.DEC - dec)**2) <= r

    return mask

Below we load data from a variety of public catalogs. Each cell is pretty similar, with some specifics to each catalog. The top of each cell has links to documentation/data retrieval.

In [None]:
data_dir = Path("/sdf/data/rubin/shared/pz/users/crenshaw/comcam/data")
input_dir = data_dir / "raw_redshift_catalogs"
output_dir = data_dir / "catalogs"

In [None]:
# Load 2dFGRS catalog
# more info at http://www.2dfgrs.net
# https://arxiv.org/abs/astro-ph/0106498
tdFGRS = np.genfromtxt(input_dir / "2dFGRS_best.idz")

# Package important columns into a dataframe
# This includes a conversion of RA from hh:mm:ss to decimal degrees
# and Dec from dd:mm:ss to decimal degrees
tdFGRS = pd.DataFrame(
    np.vstack((
        15 * tdFGRS[:, 10] + 1/4 * tdFGRS[:, 11] + 1/240 * tdFGRS[:, 12],
        tdFGRS[:, 13] + tdFGRS[:, 14] / 60 + tdFGRS[:, 15] / 3600,
        tdFGRS[:, 24],
        tdFGRS[:, 26],
    )).T,
    columns=["RA", "DEC", "redshift", "quality"],
)

# Select galaxies with high redshift quality
# 3 = 0.90
# 4 = 0.99
# 5 = 1.00
mask = (tdFGRS["quality"] >= 3) & (tdFGRS["redshift"] > 0)
tdFGRS = tdFGRS[mask]
tdFGRS["quality"] = tdFGRS["quality"].replace({3: 0.90, 4: 0.99, 5: 1.00})

# Save redshift types
tdFGRS["type"] = len(tdFGRS) * ["s"]

# Select cone around ECDFS
tdFGRS = tdFGRS[cone_mask(tdFGRS)]

# Save name of catalog
tdFGRS["source"] = len(tdFGRS) * ["2dFGRS"]

print(len(tdFGRS))

In [None]:
# Load 2dflens catalog
# more info at https://2dflens.swin.edu.au/data.html
# https://arxiv.org/abs/1608.02668
tdflens = np.genfromtxt(input_dir / "2dflens_bestredshifts_goodz_withtypesandmags_final.dat")

# Package important columns into a dataframe
tdflens = pd.DataFrame(tdflens[:, :4], columns=["RA", "DEC", "redshift", "quality"])

# Select galaxies with high redshift quality
# 3 = 0.97
# 4 = 1.00
mask = ((tdflens["quality"] == 3) | (tdflens["quality"] == 4)) & (tdflens["redshift"] > 0)
tdflens = tdflens[mask]
tdflens["quality"] = tdflens["quality"].replace({3: 0.97, 4: 1.00})

# Save redshift types
tdflens["type"] = len(tdflens) * ["s"]

# Select cone around ECDFS
tdflens = tdflens[cone_mask(tdflens)]

# Save name of catalog
tdflens["source"] = len(tdflens) * ["2dflens"]

print(len(tdflens))

In [None]:
# Load 2MRS main catalog
# more info at http://tdc-www.harvard.edu/2mrs/
tmrs0 = Table.read(input_dir / "2mrs_v240/catalog/2mrs_1175_done.fits")

# Select columns we want
tmrs0 = tmrs0[["RA", "DEC", "V"]].to_pandas()
tmrs0 = tmrs0.rename({"V": "redshift"}, axis=1)

# Convert velocities to redshifts
tmrs0["redshift"] /= c.c.to(u.km / u.s).value

# Select cone around ECDFS
tmrs0 = tmrs0[cone_mask(tmrs0)]

# Keep only required columns
tmrs0 = tmrs0[["RA", "DEC", "redshift"]]

# Load 2MRS extra catalog
tmrs1 = Table.read(input_dir / "2mrs_v240/extra/2mrs_extra_done.fits")

# Select columns we want
tmrs1 = tmrs1[["RA", "DEC", "V", "DELRA", "DELDC"]].to_pandas()
tmrs1 = tmrs1.rename({"V": "redshift"}, axis=1)

# Convert velocities to redshifts
tmrs1["redshift"] /= c.c.to(u.km / u.s).value

# Select galaxies with close cross-matches
mask = np.sqrt(tmrs1.DELRA**2 + tmrs1.DELDC**2) < 1
tmrs1 = tmrs1[mask]

# Select cone around ECDFS
tmrs1 = tmrs1[cone_mask(tmrs1)]

# Keep only required columns
tmrs1 = tmrs1[["RA", "DEC", "redshift"]]

# Stack these two catalogs
tmrs = pd.concat((tmrs0, tmrs1))

# Save quality
# note this number is made up to supercede lower quality redshifts
tmrs["quality"] = len(tmrs) * [0.95]

# Save redshift types
tmrs["type"] = len(tmrs) * ["s"]

# Save name of catalog
tmrs["source"] = len(tmrs) * ["2MRS"]

print(len(tmrs))

In [None]:
# Load 6dFGRS catalog
# more info at http://www.6dfgs.net
# https://arxiv.org/abs/0903.5451
sdFGS = pd.read_csv(input_dir / "6dFGS_dr3.csv")

# Repackage dataframe
# This includes a conversion of RA from hh:mm:ss to decimal degrees
# and Dec from dd:mm:ss to decimal degrees
ra = []
for row in sdFGS.OBSRA:
    h, m, s = row.split(":")
    ra.append(15 * float(h) + 1/4 * float(m) + 1/240 * float(s))

dec = []
for row in sdFGS.OBSDEC:
    d, m, s = row.split(":")
    dec.append(float(d) + float(m) / 60 + float(s) / 3600)

sdFGS = pd.DataFrame(
    np.vstack((
        ra,
        dec,
        sdFGS.Z_HELIO,
        sdFGS.QUALITY,
    )).T,
    columns=["RA", "DEC", "redshift", "quality"],
)

# Select galaxies with high redshift quality
# 3 = 0.95
# 4 = 0.98
mask = (sdFGS["quality"] >= 3) & (sdFGS["redshift"] > 0)
sdFGS = sdFGS[mask]
sdFGS["quality"] = sdFGS["quality"].replace({3: 0.95, 4: 0.98})

# Save redshift types
sdFGS["type"] = len(sdFGS) * ["s"]

# Select cone around ECDFS
sdFGS = sdFGS[cone_mask(sdFGS)]

# Save name of catalog
sdFGS["source"] = len(sdFGS) * ["6dFGS"]

print(len(sdFGS))

In [None]:
# Load VVDS catalog
# more info at https://cesam.lam.fr/vvdspub/index.php
# https://arxiv.org/abs/1307.0545
vvds = np.genfromtxt(input_dir / "cesam_vvds_spCDFS_DEEP.txt")

# Package important columns into a dataframe
vvds = pd.DataFrame(vvds[:, 2:6], columns=["RA", "DEC", "redshift", "quality"])

# Select galaxies with high redshift quality
# 2 = 0.75
# 3 = 0.95
# 4 = 1.00
mask = ((vvds["quality"] == 3) | (vvds["quality"] == 4)) & (vvds["redshift"] > 0)
vvds = vvds[mask]
vvds["quality"] = vvds["quality"].replace({2: 0.75, 3: 0.95, 4: 1.00})

# Save redshift types
vvds["type"] = len(vvds) * ["s"]

# Select cone around ECDFS
vvds = vvds[cone_mask(vvds)]

# Save name of catalog
vvds["source"] = len(vvds) * ["VVDS"]

print(len(vvds))

In [None]:
# Load JADES catalog
# more info at https://jades-survey.github.io/scientists/data.html
# https://arxiv.org/abs/2404.06531
jades = Table.read(input_dir / "jades_dr3_medium_gratings_public_gs_v1.1.fits").to_pandas()
jades.z_Spec_flag = jades.z_Spec_flag.astype(str)

# Select columns we want
jades = jades[["RA_NIRCam", "Dec_NIRCam", "z_Spec", "z_Spec_flag"]]
jades = jades.rename({"RA_NIRCam": "RA", "Dec_NIRCam": "DEC", "z_Spec": "redshift", "z_Spec_flag": "quality"}, axis=1)

# Select galaxies with high redshift quality (numbers made up)
# A = 0.99
# B = 0.95
# C = 0.90
flags = jades["quality"].astype(str)
mask = ((flags == "A") | (flags == "B") | (flags == "C")) & (jades["redshift"] > 0)
jades = jades[mask]
jades["quality"] = jades["quality"].replace({"A": 0.99, "B": 0.95, "C": 0.90})

# Save redshift types
jades["type"] = len(jades) * ["s"]

# Select cone around ECDFS
jades = jades[cone_mask(jades)]

# Save name of catalog
jades["source"] = len(jades) * ["JADES"]

print(len(jades))

In [None]:
# Load MOSDEF catalog
# more info at https://mosdef.astro.berkeley.edu/for-scientists/data-releases/
mosdef = Table.read(input_dir / "mosdef_zcat.final_slitap.fits")

# Select columns we want
mosdef = mosdef[["RA", "DEC", "Z_MOSFIRE", "Z_MOSFIRE_ZQUAL"]].to_pandas()
mosdef = mosdef.rename({"DEC": "DEC", "Z_MOSFIRE": "redshift", "Z_MOSFIRE_ZQUAL": "quality"}, axis=1)

# Select galaxies with high redshift quality
# 6 = 0.95
# 7 = 0.99
mask = (mosdef["quality"] == 7) & (mosdef["redshift"] > 0)
mosdef = mosdef[mask]
mosdef["quality"] = mosdef["quality"].replace({6: 0.95, 7: 0.99})

# Save redshift types
mosdef["type"] = len(mosdef) * ["s"]

# Select cone around ECDFS
mosdef = mosdef[cone_mask(mosdef)]

# Save name of catalog
mosdef["source"] = len(mosdef) * ["MOSDEF"]

print(len(mosdef))

In [None]:
# Load OzDES
# more info at https://docs.datacentral.org.au/ozdes/overview/dr2/
# https://arxiv.org/abs/2006.00449
ozdes = pd.read_csv(input_dir / "ozdes.csv")

# Select columns we want
ozdes = ozdes[["Alpha_J2000", "Delta_J2000", "z", "qop"]]
ozdes = ozdes.rename({"Alpha_J2000": "RA", "Delta_J2000": "DEC", "z": "redshift", "qop": "quality"}, axis=1)

# Select galaxies with high redshift quality
# 3 = 0.95
# 4 = 0.99
mask = (ozdes["quality"] == 4) & (ozdes["redshift"] > 0)
ozdes = ozdes[mask]
ozdes["quality"] = ozdes["quality"].replace({3: 0.95, 4: 0.99})

# Save redshift types
ozdes["type"] = len(ozdes) * ["s"]

# Select cone around ECDFS
ozdes = ozdes[cone_mask(ozdes)]

# Save name of catalog
ozdes["source"] = len(ozdes) * ["OzDES"]

print(len(ozdes))


In [None]:
# Load PRIMUS catalog
# more info at https://primus.ucsd.edu/version1.html
# and https://arxiv.org/abs/1303.2672
primus = Table.read(input_dir / "PRIMUS_2013_zcat_v1.fits")

# Select columns we want
primus = primus[["RA", "DEC", "Z", "ZQUALITY"]].to_pandas()
primus = primus.rename({"Z": "redshift", "ZQUALITY": "quality"}, axis=1)

# Select galaxies with high redshift quality
# 3 = 0.85
# 4 = 0.92
mask = (primus["quality"] >= 3) & (primus["redshift"] > 0)
primus = primus[mask]
primus["quality"] = primus["quality"].replace({3: 0.85, 4: 0.92})

# Save redshift types
primus["type"] = len(primus) * ["g"]

# Select cone around ECDFS
primus = primus[cone_mask(primus)]

# Save name of catalog
primus["source"] = len(primus) * ["PRIMUS"]

print(len(primus))

In [None]:
# Load VANDELS catalog
# more info at http://vandels.inaf.it
# and https://www.eso.org/rm/api/v1/public/releaseDescriptions/168
vandels = Table.read(input_dir / "vandels_catalog.fits")

# Select columns we want
vandels = vandels[["alpha", "delta", "zspec", "zflg"]].to_pandas()
vandels = vandels.rename({"alpha": "RA", "delta": "DEC", "zspec": "redshift", "zflg": "quality"}, axis=1)

# Select galaxies with high redshift quality
# 2 = 0.75
# 9 = 0.80
# 3 = 0.95
# 4 = 1.00
mask = (vandels["quality"] == 4) & (vandels["redshift"] > 0)
vandels = vandels[mask]
vandels["quality"] = vandels["quality"].replace({2: 0.75, 9: 0.80, 3: 0.95, 4: 1.00})

# Save redshift types
vandels["type"] = len(vandels) * ["s"]

# Select cone around ECDFS
vandels = vandels[cone_mask(vandels)]

# Save name of catalog
vandels["source"] = len(vandels) * ["VANDELS"]

print(len(vandels))

In [None]:
# Load the VLT/VIMOS catalog
# https://www.eso.org/rm/api/v1/public/releaseDescriptions/55
vimos = Table.read(input_dir / "ADP.2014-12-12T08_28_53.490.fits").to_pandas()
vimos.QF = vimos.QF.astype(str)
vimos.ID_VIMOS = vimos.ID_VIMOS.astype(str)

# Select galaxies with high redshift quality
# For LR-Blue:
# A = 1.00
# For VIMOS MR:
# A = 1.00
# B = 0.95
vimos = vimos[vimos.SPEC_Z > 0]
quality = []
for q, ID in zip(vimos.QF, vimos.ID_VIMOS):
    if q == "A":
        quality.append(1.00)
    elif "MR" in ID and q == "B":
        quality.append(0.95)
    else:
        quality.append(0.00)

vimos["quality"] = quality
vimos = vimos[vimos.quality > 0.90]

# Select columns we want
vimos = vimos[["RAJ2000_WFI", "DEJ2000_WFI", "SPEC_Z", "quality"]]
vimos = vimos.rename({"RAJ2000_WFI": "RA", "DEJ2000_WFI": "DEC", "SPEC_Z": "redshift"}, axis=1)

# Save redshift types
vimos["type"] = len(vimos) * ["s"]

# Select cone around ECDFS
vimos = vimos[cone_mask(vimos)]

# Save name of catalog
vimos["source"] = len(vimos) * ["VIMOS"]

print(len(vimos))

In [None]:
# Load NED catalog
# more info in https://ned.ipac.caltech.edu/Documents/Guides/Database
ned = pd.read_csv(input_dir / "ned_specz.txt", sep = "|")

# Select galaxies with high redshift quality
mask = (ned["Type"] == "G") & (ned["Redshift"] > 0) & (ned["Redshift Flag"] == "SLS ")
ned = ned[mask]

# Select columns we want
ned = ned[["RA", "DEC", "Redshift"]]
ned = ned.rename({"Redshift": "redshift"}, axis=1)

# Select cone around ECDFS
ned = ned[cone_mask(ned)]

# Save quality
# note this number is made up to supercede lower quality redshifts
ned["quality"] = len(ned) * [0.98]

# Save redshift types
ned["type"] = len(ned) * ["s"]

# Save name of catalog
ned["source"] = len(ned) * ["NED"]

print(len(ned))

In [None]:
# Load CANDELS catalog
# more info in https://archive.stsci.edu/hlsps/candels/goods-s/catalogs/v2/hlsp_candels_hst_wfc3_goodss_multi_v2_redshift-cat-readme.pdf
# https://arxiv.org/abs/2210.01140
candels = pd.read_csv(input_dir / "hlsp_candels_hst_wfc3_goodss_multi_v2_redshift-cat.txt", sep=" ")

# Select galaxies with spec-z's or photo-z's
# note we ignore grisms because these match 3D-HST, which we already have
mask = (candels["z_best"] > 0) & ((candels["z_best_type"] == "s") | (candels["z_best_type"] == "p"))
candels = candels[mask]

# Save quality and redshift types
# s = 1.00
# p = 0.93
candels["quality"] = len(candels) * [1.00]
candels["type"] = len(candels) * ["s"]
candels.loc[candels["z_best_type"] == "p", "quality"] = 0.93
candels.loc[candels["z_best_type"] == "p", "type"] = "p"

# Select columns we want
candels = candels[["RA", "DEC", "z_best", "quality", "type"]]
candels = candels.rename({"z_best": "redshift"}, axis=1)

# Select cone around ECDFS
candels = candels[cone_mask(candels)]

# Save name of catalog
candels["source"] = len(candels) * ["CANDELS"]

print(len(candels))

In [None]:
# Load all 3D-HST tables
# More info in https://archive.stsci.edu/prepds/3d-hst/#observations
# and https://arxiv.org/abs/1510.02106
# and https://arxiv.org/abs/1403.3689
hst3D_phot = Table.read(input_dir / "goodss_3dhst.v4.1.cat.FITS").to_pandas().set_index("id")
hst3D_zfit = Table.read(input_dir / "goodss_3dhst.v4.1.5.zfit.concat.fits").to_pandas().set_index("phot_id")
hst3D_zbest = Table.read(input_dir / "goodss_3dhst.v4.1.5.zbest.fits").to_pandas().set_index("phot_id")
hst3D = hst3D_phot.join(hst3D_zfit, how="inner", rsuffix="_zfit").join(hst3D_zbest, how="inner", lsuffix="_zfit")

# Select spec-z's and good grisms
mask = (hst3D.z_best > 0)
mask &= (hst3D.z_spec > 0) | ((hst3D.use_zgrism == 1) & (hst3D.use_phot == 1) & (hst3D.flag1 == 0) & (hst3D.flag2 == 0))
mask &= (hst3D.z_best_s == 1) | (hst3D.z_best_s == 2)
hst3D = hst3D[mask]

# Save quality
# Grisms = 0.95
# Spec-z's = 0.99
hst3D["quality"] = len(hst3D) * [0.95]
hst3D.loc[hst3D.z_best_s == 1, "quality"] = 0.99

# Keep only required columns
hst3D = hst3D[["ra", "dec", "z_best", "quality"]]
hst3D = hst3D.rename({"ra": "RA", "dec": "DEC", "z_best": "redshift"}, axis=1)

# Save redshift types
hst3D["type"] = len(hst3D) * ["g"]

# Select cone around ECDFS
hst3D = hst3D[cone_mask(hst3D)]

# Save name of catalog
hst3D["source"] = len(hst3D) * ["3D-HST"]

# Drop duplicates
hst3D = hst3D.drop_duplicates()

print(len(hst3D))

In [None]:
# Load VUDS catalog
# more info at https://data.lam.fr/vuds/home
# https://arxiv.org/abs/1602.01842
vuds = pd.read_csv(input_dir / "cesam_vuds_spectra_dr1_ecdfs_catalog_1668100230.csv")

# Select columns we want
vuds = vuds[["alpha", "delta", "z_spec", "zflags"]]
vuds = vuds.rename({"alpha": "RA", "delta": "DEC", "z_spec": "redshift", "zflags": "quality"}, axis=1)

# Select galaxies with high redshift quality
# 9 = 0.80
# 3 = 0.95
# 4 = 1.00
mask = (vuds.quality == 9) | (vuds.quality == 3) | (vuds.quality == 4)
vuds = vuds[mask]
vuds["quality"] = vuds["quality"].replace({9: 0.80, 3: 0.95, 4: 1.00})

# Save redshift types
vuds["type"] = len(vuds) * ["s"]

# Select cone around ECDFS
vuds = vuds[cone_mask(vuds)]

# Save name of catalog
vuds["source"] = len(vuds) * ["VUDS"]

print(len(vuds))

In [None]:
# Load ASTRODEEP catalog
# more info at http://www.astrodeep.eu/astrodeep-gs43-catalogue/
# http://arxiv.org/abs/2103.09246
astrodeep_phot = np.genfromtxt(input_dir / "ASTRODEEP-GS43_phot.cat")
astrodeep_phys = np.genfromtxt(input_dir / "ASTRODEEP-GS43_phys.cat")

# Package important columns into a dataframe
astrodeep = pd.DataFrame(
    np.hstack((astrodeep_phot[:, 1:3], astrodeep_phys[:, 1].reshape(-1, 1))),
    columns=["RA", "DEC", "redshift"],
)

# Set quality
astrodeep["quality"] = len(astrodeep) * [0.97]

# Save redshift types
astrodeep["type"] = len(astrodeep) * ["p"]

# Select cone around ECDFS
astrodeep = astrodeep[cone_mask(astrodeep)]

# Save name of catalog
astrodeep["source"] = len(astrodeep) * ["astrodeep"]

print(len(astrodeep))

In [None]:
# Load ASTRODEEP-JWST
# more info at http://www.astrodeep.eu/astrodeep-jwst-catalogs/
# https://arxiv.org/abs/2409.00169
jades_gs = Table.read(input_dir / "JADES-GS_photoz.fits").to_pandas()
jades_ngdeep = Table.read(input_dir / "NGDEEP_photoz.fits").to_pandas()

# Select high-quality galaxies
def create_mask(cat):
    mask = []

    # Reject galaxies missing photometry in more than 3 JWST bands
    for flag in cat.flag:
        flag = str(flag)
        if len(flag) > 1 and int(flag[-2]) > 3:
            mask.append(False)
        else:
            mask.append(True)

    # Reject point sources and spurious objects
    mask = np.array(mask) & (cat.flag < 400)

    return mask

jades_gs = jades_gs[create_mask(jades_gs)]
jades_ngdeep = jades_ngdeep[create_mask(jades_ngdeep)]

# Set quality and redshift types
jades_gs["quality"] = len(jades_gs) * [0.92]
jades_gs["type"] = len(jades_gs) * ["p"]
jades_gs.loc[jades_gs["zspec"] > 0, "quality"] = 1.00
jades_gs.loc[jades_gs["zspec"] > 0, "type"] = "s"

jades_ngdeep["quality"] = len(jades_ngdeep) * [0.90]
jades_ngdeep["type"] = len(jades_ngdeep) * ["p"]
jades_ngdeep.loc[jades_ngdeep["zspec"] > 0, "quality"] = 1.00
jades_ngdeep.loc[jades_ngdeep["zspec"] > 0, "type"] = "s"

# Combine catalogs
astrodeep_jwst = pd.concat((jades_gs, jades_ngdeep), ignore_index=True)

# Set redshift
astrodeep_jwst["redshift"] = astrodeep_jwst["zphot"]
astrodeep_jwst.loc[astrodeep_jwst["zspec"] > 0, "redshift"] = astrodeep_jwst["zspec"]

# Select columns we want
astrodeep_jwst = astrodeep_jwst[["RA", "DEC", "redshift", "quality", "type"]]

# Select cone around ECDFS
astrodeep_jwst = astrodeep_jwst[cone_mask(astrodeep_jwst)]

# Save name of catalog
astrodeep_jwst["source"] = len(astrodeep_jwst) * ["astrodeep_jwst"]

print(len(astrodeep_jwst))

Now let's combine everything into a single catalog

In [None]:
def combine_without_duplicates(cat1: pd.DataFrame, cat2: pd.DataFrame, max_sep: float = 0.75) -> pd.DataFrame:
    """This function combines two redshift catalogs while removing duplicates.

    If duplicates are found, the redshift with the higher confidence is kept.
    If there is a tie, the redshift from cat1 is kept.

    Parameters
    ----------
    cat1 : pd.DataFrame
        First dataframe of redshift data. Note redshifts from this catalog
        will win ties in redshift confidence.
    cat2 : pd.DataFrame
        Second dataframe of redshift data.
    max_sep : float, optional
        Max separation, in arcseconds, to consider for duplicates.
        Default is 0.75.

    Returns
    -------
    pd.DataFrame
        The concatenated catalogs with duplicates removed.
    """
    # Create coordinates from each pair of catalogs
    coord1 = SkyCoord(ra=cat1.RA.values*u.degree, dec=cat1.DEC.values*u.degree)
    coord2 = SkyCoord(ra=cat2.RA.values*u.degree, dec=cat2.DEC.values*u.degree)

    # Find closest matches
    idx, d2d, _ = coord1.match_to_catalog_sky(coord2)
    d2d = d2d.to(u.arcsec).value

    # Look at every match, and if the distance is
    # close enough, remove entry with lower quality
    remove1 = []
    remove2 = []
    for i, (j, d) in enumerate(zip(idx, d2d)):
        if d > max_sep:
            continue
        if cat1.iloc[i].quality < cat2.iloc[j].quality:
            remove1.append(i)
        else:
            remove2.append(j)

    # Convert lists of rows to remove to lists of rows to keep
    keep1 = list(set(np.arange(len(cat1))) - set(remove1))
    keep2 = list(set(np.arange(len(cat2))) - set(remove2))

    # Return concatenated catalog, keeping only specified rows
    return pd.concat((cat1.iloc[keep1], cat2.iloc[keep2]), ignore_index=True)

In [None]:
# Combine every catalog
# Note this order is arbitrary
combined = combine_without_duplicates(astrodeep, astrodeep_jwst)
for cat in [jades, vvds, vandels, vuds, vimos, candels, ozdes, ned, tdFGRS, tdflens, tmrs, sdFGS, mosdef, hst3D, primus]:
    combined = combine_without_duplicates(combined, cat)

# Rename quality -> confidence
combined = combined.rename({"quality": "confidence"}, axis=1)

print(len(combined))

combined.to_parquet(output_dir / "comcam_ecdfs_redshift_catalog_20250618.parquet")

In [None]:
combined = pd.read_parquet(output_dir / "comcam_ecdfs_redshift_catalog_20250618.parquet")
combined

In [None]:
fig, (ax1, ax2, ax3, ax4) = plt.subplots(1, 4, figsize=(14, 2.5), dpi=200)

# Names and colors of surveys
names = combined.source.unique()
names.sort()
colors = list(sns.color_palette('tab20', n_colors=len(combined.source.unique())))
colors = colors[::-1]

for name, color in zip(names, colors):
    # Plot footprint of each survey
    subset = combined.query(f"source == '{name}'")
    ax1.scatter(subset.RA, subset.DEC, color=color, s=0.01)
    ax1.set(xlabel="RA (deg)", ylabel="Dec (deg)", aspect="equal")
    ax1.invert_xaxis()

combined.pivot(columns="source").redshift.plot.hist(stacked=True, bins=64, range=(0, 8), ax=ax2, color=colors)
ax2.set(
    xlabel="Redshift",
    xlim=(0, 8),
)
ax2.legend(fontsize=5, ncols=2, loc="upper right", frameon=False)

combined.pivot(columns="source").redshift.plot.hist(stacked=True, bins=64, range=(0, 8), ax=ax3, color=colors, legend=False)
ax3.set(
    xlabel="Redshift",
    yscale="log",
    xlim=(0, 8),
    ylabel=None,
)

combined.pivot(columns="type").redshift.plot.hist(stacked=False, bins=64, range=(0, 8), ax=ax4, histtype="step")
ax4.set(
    xlabel="Redshift",
    yscale="log",
    xlim=(0, 8),
    ylabel=None,
)
ax4.legend(fontsize=8, ncols=3, loc="upper right", handlelength=1, frameon=False, borderpad=0, columnspacing=1)

fig.subplots_adjust(wspace=0.25)