# 01 - ComCam x Milliquas
Author: Olivia Lynn & Neven Caplar
Last Run: June 16, 2025

## Set up
Imports and dask

In [None]:
import lsdb
print(lsdb.__version__)
import hats
print(hats.__version__)
import astropy.units as u
import matplotlib.pyplot as plt
from matplotlib.patches import Circle
import numpy as np
import pandas as pd

from lsdb.core.search import ConeSearch

from dask.distributed import Client
from nested_pandas import NestedDtype

In [None]:
client = Client(n_workers=4, memory_limit="24GB", threads_per_worker=1)
client

## Get catalogs
ComCam object catalog and MilliQuas - we will crossmatch



In [None]:
comcam_cat = lsdb.open_catalog('/sdf/data/rubin/shared/lsdb_commissioning/hats/v29_0_0/object_collection/object_lc')
comcam_cat_lite = lsdb.open_catalog('/sdf/data/rubin/shared/lsdb_commissioning/hats/v29_0_0/object_collection_lite/object_lc')

comcam_cat

In [None]:
miliquas_cat = lsdb.read_hats(
    "/sdf/data/rubin/user/olynn/AGNs/hats/Milliquas_v8",
    margin_cache="/sdf/data/rubin/u/olynn/AGNs/hats/Milliquas_v8_margin/Milliquas_v8_10arcs",
)

miliquas_cat

## Do crossmatch
ComCam x MilliQuas

In [None]:
comcam_x_milliquas = comcam_cat_lite.crossmatch(miliquas_cat, suffixes=["_COM", "_MIL"], radius_arcsec=.1)

comcam_x_milliquas_computed = comcam_x_milliquas.compute()

In [None]:
comcam_x_milliquas_computed.to_parquet("/sdf/home/n/ncaplar/AGN/comcam_x_milliquas_computed.parquet") # The output file path


## Verify crossmatch


In [None]:
comcam_x_milliquas.plot_pixels(plot_title="COMCAM x Milliquas")


In [None]:
client.close()


# Science 

In [None]:
len(comcam_x_milliquas_computed)

In [None]:
# Define the six fields from Data Preview 1 with RA and Dec coordinates
fields = {
    "ECDFS": (53.13, -28.10),  # Extended Chandra Deep Field South
    "EDFS": (59.10, -48.73),  # Euclid Deep Field South
    "Rubin_SV_38_7": (37.86, 6.98),  # Low Ecliptic Latitude Field
    "Rubin_SV_95_-25": (95.00, -25.00),  # Low Galactic Latitude Field
    "47_Tuc": (6.02, -72.08),  # 47 Tuc Globular Cluster
    "Fornax_dSph": (40.00, -34.45)  # Fornax Dwarf Spheroidal Galaxy
}

# Assign colors and linestyles
# Red for extragalactic fields, orange for dense fields
field_styles = {
    "ECDFS": ("red", "solid"),
    "EDFS": ("red", "dashed"),
    "Rubin_SV_38_7": ("red", "dotted"),
    "Rubin_SV_95_-25": ("orange", "solid"),
    "47_Tuc": ("orange", "dashed"),
    "Fornax_dSph": ("orange", "dotted"),
}


# Define a 2-degree (2*3600 arcseconds) search radius
radius_arcsec = 2 * 3600  # Convert 2 degree to arcseconds
# Create six cone searches
cones = {name: ConeSearch(ra=ra, dec=dec, radius_arcsec=radius_arcsec) for name, (ra, dec) in fields.items()}

In [None]:
# Plot
fig, ax = plt.subplots(figsize=(10, 7))

# Scatter COMCAM sources
ax.scatter(
    comcam_x_milliquas_computed['coord_ra_COM'],
    comcam_x_milliquas_computed['coord_dec_COM'],
    s=1,
    c='blue',
    alpha=0.5,
    label='COMCAM'
)

# Draw each field as a circle
field_radius = 1.5  # degrees
for name, (ra, dec) in fields.items():
    color, linestyle = field_styles[name]
    circle = Circle(
        (ra, dec),
        radius=field_radius,
        edgecolor=color,
        facecolor='none',
        linestyle=linestyle,
        linewidth=2,
        alpha=0.8,
        label=name
    )
    ax.add_patch(circle)

# Labels and formatting
ax.set_xlabel("RA [deg]")
ax.set_ylabel("Dec [deg]")
ax.set_title("COMCAM Detections with Data Preview 1 Field Overlays")
ax.legend(fontsize=9)
ax.set_aspect('equal', adjustable='box')
ax.grid(True, linestyle='--', alpha=0.5)
plt.tight_layout()
plt.show()


In [None]:
comcam_x_milliquas_computed[['coord_ra_COM','coord_ra_COM','objectForcedSource_COM']].head(5)


In [None]:
bands = ["u", "g", "r", "i", "z", "y"]

def count_bands(entry):
    result = {}
    for band in bands:
        try:
            result[band] = (entry['band'] == band).sum()
        except Exception:
            result[band] = 0
    return result


# Apply to each row
band_counts = comcam_x_milliquas_computed["objectForcedSource_COM"].apply(count_bands)
# Turn into a proper DataFrame
band_counts_df = pd.DataFrame(band_counts.tolist()).fillna(0).astype(int)

# Add to original
for band in bands:
    comcam_x_milliquas_computed[f"n_{band}"] = band_counts_df[band].values

In [None]:

# Define band info: (column name, color)
bands = [
    ('n_u', 'blue'),
    ('n_g', 'green'),
    ('n_r', 'red'),
    ('n_i', 'brown'),
    ('n_z', 'purple'),
    ('n_y', 'orange'),
]

# Determine common bin range
all_counts = pd.concat([comcam_x_milliquas_computed[band] for band, _ in bands])
bin_min, bin_max = int(all_counts.min()), int(all_counts.max()) + 1
bins = list(range(bin_min, bin_max + 10, 10))  # Bin width = 10

# Create 2x3 subplots
fig, axes = plt.subplots(2, 3, figsize=(15, 8))
axes = axes.flatten()

for ax, (band, color) in zip(axes, bands):
    data = comcam_x_milliquas_computed[band].dropna()
    total_detections = int(data.sum())
    
    ax.hist(data, bins=bins, color=color, edgecolor='black', alpha=0.7)
    ax.set_xlabel("Detection Count")
    ax.set_ylabel("Number of Sources")
    ax.grid(True, linestyle='--', alpha=0.5)
    ax.set_xlim(bin_min, bin_max)

    # Add total detection count above the plot
    ax.text(0.5, 1.05, f"Total detections: {total_detections:,}", 
            transform=ax.transAxes, ha='center', va='bottom', 
            fontsize=10, color='black')

plt.tight_layout()
plt.suptitle("Detection Counts per Band (COMCAM x Milliquas)", fontsize=16, y=1.05)
plt.show()

## Plot lightcurves
Mag LCs

In [None]:
comcam_AGN_many_r = comcam_x_milliquas_computed[comcam_x_milliquas_computed['n_r']>220]


In [None]:
comcam_AGN_many_r.iloc[0]['objectForcedSource_COM']

In [None]:

# Extract the nested DataFrame
df_lc = comcam_AGN_many_r.iloc[1]['objectForcedSource_COM']

# Define bands and their colors
band_colors = {
    'u': 'blue',
    'g': 'green',
    'r': 'red',
    'i': 'brown',
    'z': 'purple',
    'y': 'orange'
}

# Create figure
plt.figure(figsize=(10, 6))

# Loop over each band and plot
for band, color in band_colors.items():
    df_band = df_lc[df_lc['band'] == band]
    if df_band.empty:
        continue
    plt.errorbar(
        df_band['midpointMjdTai'], 
        df_band['psfMag'], 
        yerr=df_band['psfMagErr'], 
        fmt='o', 
        label=band, 
        color=color, 
        alpha=0.7
    )

# Configure plot
plt.gca().invert_yaxis()  # Magnitude scale
plt.xlabel("MJD (midpoint)")
plt.ylabel("PSF Magnitude")
plt.title("Lightcurve in All Bands")
plt.legend(title="Band")
plt.grid(True, linestyle='--', alpha=0.5)
plt.tight_layout()
plt.show()

In [None]:
# Identify all columns containing "Flag"
flag_cols = [col for col in df_lc.columns if "Flag" in col]

# Keep only rows where all Flag columns are False or NaN
df_lc_clean = df_lc[~df_lc[flag_cols].any(axis=1)]

# Create figure
plt.figure(figsize=(10, 6))

# Loop over each band and plot
for band, color in band_colors.items():
    df_band = df_lc_clean[df_lc_clean['band'] == band]
    if df_band.empty:
        continue
    plt.errorbar(
        df_band['midpointMjdTai'], 
        df_band['psfMag'], 
        yerr=df_band['psfMagErr'], 
        fmt='o', 
        label=band, 
        color=color, 
        alpha=0.7
    )

# Configure plot
plt.gca().invert_yaxis()  # Magnitude scale
plt.xlabel("MJD (midpoint)")
plt.ylabel("PSF Magnitude")
plt.title("Lightcurve in All Bands")
plt.legend(title="Band")
plt.grid(True, linestyle='--', alpha=0.5)
plt.tight_layout()
plt.show()

In [None]:
# Define band colors
band_colors = {
    'u': 'blue',
    'g': 'green',
    'r': 'red',
    'i': 'brown',
    'z': 'purple',
    'y': 'orange'
}

# Set up subplots: one for psfFlux, one for psfDiffFlux
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 6), sharex=True)

# Plot for each band
for band, color in band_colors.items():
    df_band = df_lc_clean[df_lc_clean['band'] == band]
    if df_band.empty:
        continue

    # Left plot: psfFlux
    ax1.errorbar(
        df_band['midpointMjdTai'],
        df_band['psfFlux'],
        yerr=df_band['psfFluxErr'],
        fmt='o',
        label=band,
        color=color,
        alpha=0.7
    )

    # Right plot: psfDiffFlux
    ax2.errorbar(
        df_band['midpointMjdTai'],
        df_band['psfDiffFlux'],
        yerr=df_band['psfDiffFluxErr'],
        fmt='o',
        label=band,
        color=color,
        alpha=0.7
    )

# Format left subplot
ax1.set_title("PSF Flux")
ax1.set_xlabel("MJD (midpoint)")
ax1.set_ylabel("Flux [nJy]")
ax1.grid(True, linestyle='--', alpha=0.5)
ax1.legend(title="Band")

# Format right subplot
ax2.set_title("PSF Diff Flux")
ax2.set_xlabel("MJD (midpoint)")
ax2.grid(True, linestyle='--', alpha=0.5)
ax2.legend(title="Band")

plt.tight_layout()
plt.show()

# Structure Function

In [None]:
df_lc_clean

In [None]:
from numba import njit

@njit
def calcSF(taumin, taumax, tclip, fclip, nstep):
    dtau = (np.log10(taumax) - np.log10(taumin)) / nstep

    tau1 = np.zeros(nstep)
    tau2 = np.zeros(nstep)
    N = np.zeros(nstep)
    SF = np.zeros(nstep)

    for k in range(nstep):
        tau1[k] = 10**(np.log10(taumin) + k*dtau)
        tau2[k] = 10**(np.log10(tau1[k]) + dtau)
        for i in range(tclip.size):
            dist = tclip[i:] - tclip[i]
            sel = (dist >= tau1[k]) & (dist < tau2[k])
            N[k] += tclip[i:][sel].size
            if tclip[i:][sel].size > 0:
                SF[k] += np.sum((fclip[i:][sel] - fclip[i])**2)
        SF[k] = np.sqrt(SF[k] / N[k]) if N[k] > 0 else np.nan

    return tau1, tau2, SF, N

In [None]:
def SF_analysis(df, nstep=50, Nmin=2):
    objects = df['Object_ID'].unique()

    for obj in objects:
        obj_data_all = df[df['Object_ID'] == obj]

        fig_sf, ax_sf = plt.subplots(figsize=(7, 3))
        ax_sf.set_title(f"Structure Function: {obj}")

        for filt in sorted(df['Filter'].unique()):
            filt_data = obj_data_all[obj_data_all['Filter'] == filt].drop_duplicates(subset='Epoch')
            if filt_data.empty:
                continue

            filt_data = filt_data.sort_values(by='Epoch')
            mag = filt_data['Mag']

            # Iterative 5σ clipping
            count_old = len(mag) + 1
            count = len(mag)
            excluded = [[], []]

            while count_old > count:
                mag_mean = mag.mean()
                mag_std = mag.std(ddof=1)
                outliers = np.abs(mag - mag_mean) >= 5 * mag_std
                excluded[0] += filt_data['Epoch'][outliers].to_list()
                excluded[1] += mag[outliers].to_list()
                filt_data = filt_data[~outliers]
                mag = filt_data['Mag']
                count_old = count
                count = len(mag)

            if filt_data.empty:
                continue

            # Convert mag → normalized flux
            flux = 10**(-0.4 * (mag - np.median(mag)))
            time = filt_data['Epoch'].to_numpy()

            # SF time limits
            dtmin = np.min(np.diff(time))
            dtmax = time[-1] - time[0]
            taumin = 2 * dtmin
            taumax = 0.5 * dtmax

            tau1, tau2, SF, N = calcSF(taumin, taumax, time, flux.to_numpy(), nstep)

            ax_sf.loglog(tau1[N > Nmin], SF[N > Nmin], label=filt, marker="o", linestyle="")

        ax_sf.set_xlabel("Time lag (days)")
        ax_sf.set_ylabel("Structure Function")
        ax_sf.grid(True)
        ax_sf.legend()
        plt.show()

In [None]:
SF_analysis(df_lc_clean, nstep=50, Nmin=2)

In [None]:
df_lc_clean