In [1]:
import astropy.units as u
import lsdb
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import dask
import scipy
import astropy.units as u
import warnings

from dask.distributed import Client
from nested_pandas.utils import count_nested
from dask.distributed import print as dask_print
from lsdb.core.search.pixel_search import PixelSearch
from lsdb.core.search.order_search import OrderSearch
from lsdb.core.search import ConeSearch
from io import StringIO
from nested_pandas import NestedDtype
from pathlib import Path

In [2]:
drp_release = "w_2025_11"
hats_dir = Path("/sdf/data/rubin/shared/lsdb_commissioning/hats")
hats_path = hats_dir / drp_release
output_path = Path(
    "/sdf/data/rubin/shared/lsdb_commissioning/science_projects/06_uncertainty/object_whiten"
)
hist_output_path = Path(
    "/sdf/data/rubin/shared/lsdb_commissioning/science_projects/06_uncertainty/object_whiten_hists"
)

There's not that much going on here! We did all the work on all the objects and lightcurves in the previous notebook, so this is just aggregating the histograms over all lightcurves.

In [3]:
## Useful constants

bands = ["u", "g", "r", "i", "z", "y"]

partial_meta = {
    "mag_bin": "float64",
    "hists": "int64",
}

bins = np.arange(-10, 10, 0.1)

fields = {
    "ECDFS": (53.13, -28.10),  # Extended Chandra Deep Field South
    "EDFS": (59.10, -48.73),  # Euclid Deep Field South
    "Rubin_SV_38_7": (37.86, 6.98),  # Low Ecliptic Latitude Field
    "Rubin_SV_95_-25": (95.00, -25.00),  # Low Galactic Latitude Field
    "47_Tuc": (6.02, -72.08),  # 47 Tuc Globular Cluster
    "Fornax_dSph": (40.00, -34.45),  # Fornax Dwarf Spheroidal Galaxy
}

# Define the radius for selecting sources
selection_radius_arcsec = 2.0 * 3600  # 2-degree radius

In [4]:
def dev_histogram(frame, pixel, band, plot=False):
    deviations = frame["whitened_data"].values.to_numpy()

    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        mags = u.nJy.to(u.ABmag, frame["wmean_flux"].values)
        mag_bins = np.floor(mags)

    unique_mags, unique_inverse = np.unique(mag_bins, return_inverse=True)
    result = {"mag_bin": unique_mags}
    hists = []
    for unique_index, mag_bin in enumerate(unique_mags):
        mag_devs = deviations[unique_inverse == unique_index]
        num_curves = len(mag_devs)
        mag_devs = np.concatenate(mag_devs)

        hist = np.histogram(mag_devs, bins=bins)
        hists.append(hist[0])

    if plot:
        fig, axs = plt.subplots(len(unique_mags), 1, sharex=True, figsize=(10, 3*len(unique_mags)), layout="constrained")
        for ax, mag_bin, histo in zip(axs.flat, unique_mags, hists):
            ax.hist(bins[:-1], bins, weights=histo)
            ax.set_title(f"{mag_bin} mag bin")
        fig.suptitle(f"{pixel} {band} band")

    result["hists"] = hists
    return result

In [5]:
def big_ol_hists(band, plot=False):
    print("    ", band, "band")
    plt.ioff()
    stats_catalog = lsdb.read_hats(output_path / f"{band}_band")

    pixel_results = stats_catalog.map_partitions(
        dev_histogram, meta=partial_meta, include_pixel=True, band=band
    ).compute()

    all_sky = pixel_results.groupby("mag_bin", dropna=False).sum().reset_index()
    
    unique_mags = all_sky["mag_bin"].values
    hists = all_sky["hists"].values
    
    fig, axs = plt.subplots(len(unique_mags), 1, sharex=True, figsize=(10, 3*len(unique_mags)), layout="constrained")
    for ax, mag_bin, histo in zip(axs.flat, unique_mags, hists):
        ax.hist(bins[:-1], bins, weights=histo)
        ax.set_title(f"{mag_bin} mag bin")
    fig.suptitle(f"All fields {band} band")
    fig.savefig(hist_output_path / f"{band}band_counts.png")
    plt.close(fig)
    
    fig, axs = plt.subplots(len(unique_mags), 1, sharex=True, figsize=(10, 3*len(unique_mags)), layout="constrained")
    for ax, mag_bin, histo in zip(axs.flat, unique_mags, hists):
        ax.hist(bins[:-1], bins, weights=histo, density=True)
        ax.plot(bins, scipy.stats.norm().pdf(bins))
        ax.set_title(f"{mag_bin} mag bin")
    fig.suptitle(f"All fields {band} band")
    fig.savefig(hist_output_path / f"{band}band_density.png")
    plt.close(fig)
    
    return band

In [6]:
with Client(n_workers=1, threads_per_worker=1, memory_limit="10GB") as client:
    dask.config.set({"dataframe.convert-string": False})

    all_sums = [big_ol_hists(band) for band in bands]

     u band
     g band


     r band


     i band


     z band


     y band
