In [None]:
# Standard library
import os

# Core scientific computing
import numpy as np
import pandas as pd
from tqdm import tqdm
from numba import njit
import random

# Visualization
import matplotlib.pyplot as plt

# Astronomy-specific
import astropy.units as u
from astropy.coordinates import SkyCoord

# Distributed computing
from dask.distributed import Client

# Nested pandas and HATS/LSDB
import nested_pandas as npd
from nested_pandas.utils import count_nested
import lsdb
from lsdb.core.search import ConeSearch
import hats

import lsst.daf.butler as dafButler
from lsst.analysis.ap import apdb
from lsst.ap.association import AssociationTask, AssociationConfig
from lsst.dax.apdb import Apdb, ApdbCassandra, ApdbTables
import lsst.geom as geom
from lsst.afw import image as afwImage

# Filesystem / cloud paths
from upath import UPath

In [None]:
repo = "embargo"
collection = "LSSTCam/runs/DRP/20250501_20250604/w_2025_23/DM-51284"
instrument = "LSSTCam"

butler = dafButler.Butler(repo, collections=collection, instrument=instrument)

In [None]:
obj_refs = butler.query_datasets("object")
lc_refs = butler.query_datasets("object_forced_source")
diaobj_refs = butler.query_datasets("dia_object")
dialc_refs = butler.query_datasets("dia_source")

print("Number of 'object' datasets:", len(obj_refs))
print("Number of 'object_forced_source' datasets:", len(lc_refs))
print("Number of 'dia_object' datasets:", len(diaobj_refs))
print("Number of 'dia_source' datasets:", len(dialc_refs))

In [None]:
# obj_ref_3163 = next(ref for ref in obj_refs if ref.dataId.get("tract") == 3163)
obj_ref_3163 = random.sample(obj_refs, 10)
obj_ref_3163

obj_tracts =  [ref.dataId["tract"] for ref in obj_ref_3163]
obj_tracts


In [None]:
is_refs = butler.query_datasets("isolated_star")
is_refs_in_tracts = [ref for ref in is_refs if ref.dataId.get("tract") in obj_tracts]
is_refs_in_tracts

In [None]:
lc_refs = butler.query_datasets("object_forced_source")
#lc_refs_3163 = [ref for ref in lc_refs if ref.dataId.get("tract") == 3163]
#lc_refs_3163
# Filter to those with tract in obj_tracts
lc_refs_in_tracts = [ref for ref in lc_refs if ref.dataId.get("tract") in obj_tracts]

# Sample 100 randomly (adjust number as needed)
sample_size = min(500, len(lc_refs_in_tracts))  # avoid ValueError if fewer than 100
lc_refs_3163 = random.sample(lc_refs_in_tracts, sample_size)

print(f"Sampled {len(lc_refs_3163)} object_forced_source refs from selected tracts.")
lc_refs_3163;

In [None]:
# Initialize list to hold object DataFrames
object_list = []

# Loop over tracts via obj_refs
for forsor_ref in tqdm(obj_ref_3163):
    try:
        # Load full object table for this tract
        table = butler.get("object", dataId=forsor_ref.dataId)
        # Convert to a pandas DataFrame
        table=pd.DataFrame(table.to_pandas())
        object_list.append(table)
    except FileNotFoundError:
        print(f"Warning: objectTable_tract not found for tract {forsor_ref.dataId['tract']}. Skipping.")

# Concatenate into one DataFrame
object_list = pd.concat(object_list, ignore_index=False) if object_list else pd.DataFrame()

In [None]:
# Initialize list to hold object DataFrames
isolated_star_list = []

# Loop over tracts via obj_refs
for forsor_ref in tqdm(is_refs_in_tracts):
    try:
        # Load full object table for this tract
        table = butler.get("isolated_star", dataId=forsor_ref.dataId)
        table=pd.DataFrame(table.to_pandas())
        isolated_star_list.append(table)
    except FileNotFoundError:
        print(f"Warning: objectTable_tract not found for tract {forsor_ref.dataId['tract']}. Skipping.")

# Concatenate into one DataFrame
isolated_star_list = pd.concat(isolated_star_list, ignore_index=False) if isolated_star_list else pd.DataFrame()

In [None]:
isolated_star_list

In [None]:
object_forced_source['band'].unique()

In [None]:
# Initialize list to hold object forced source  DataFrames
object_forced_source_list = []

# Loop over tracts via obj_refs
for forsor_ref in tqdm(lc_refs_3163):
    try:
        # Load full object table for this tract
        table = butler.get("object_forced_source", dataId=forsor_ref.dataId)
        # Convert to a pandas DataFrame
        table=pd.DataFrame(table.to_pandas())
        object_forced_source_list.append(table)
    except FileNotFoundError:
        print(f"Warning: objectTable_tract not found for tract {forsor_ref.dataId['tract']}. Skipping.")

# Concatenate into one DataFrame
object_forced_source = pd.concat(object_forced_source_list, ignore_index=False) if object_forced_source_list else pd.DataFrame()

In [None]:
os.environ["no_proxy"] += ",.consdb"
from lsst.summit.utils import ConsDbClient
token = 'gt-V4mYDSE4xUV72sN-2QIebw.bnPv71HEId3Oty-C46VtzA'
client = ConsDbClient(f"https://user:{token}@usdf-rsp.slac.stanford.edu/consdb")
visits = client.query(
    "SELECT * FROM cdb_lsstcam.visit1 WHERE day_obs >= 20250430 AND day_obs <= 20250605 and science_program = 'BLOCK-365'"
).to_pandas()
visits['visit'] = visits['visit_id']

In [None]:
object_forced_source = object_forced_source.merge(visits[['visit', 'exp_midpt_mjd']], on='visit', how='left')

# Drop rows where exp_midpt_mjd is missing
object_forced_source = object_forced_source.dropna(subset=['exp_midpt_mjd'])

# Now sort safely
object_forced_source = object_forced_source.sort_values('exp_midpt_mjd')

flag_columns = [col for col in object_forced_source.columns if 'Flag' in col]
# Remove rows where any flag column is True
object_forced_source = object_forced_source[~object_forced_source[flag_columns].any(axis=1)]

In [None]:
object_forced_source['ra'] = object_forced_source['coord_ra']
object_forced_source['dec'] = object_forced_source['coord_dec']

In [None]:
len(object_forced_source)

In [None]:
object_forced_source_cat = lsdb.from_dataframe(object_forced_source, highest_order=10)

In [None]:
object_forced_source_cat_isolated =lsdb.crossmatch(object_forced_source_cat, isolated_star_list, radius_arcsec=.1, suffixes=("", "_is"))
object_forced_source_cat_isolated = object_forced_source_cat_isolated.compute()




In [None]:
object_forced_source = object_forced_source_cat_isolated[['objectId', 'coord_ra', 'exp_midpt_mjd', 'coord_dec', 'psfFlux', 'psfFluxErr', 'psfDiffFlux', 'psfDiffFluxErr', 'band']].copy()

In [None]:
from tqdm import tqdm
import pandas as pd
import numpy as np

# g-band

object_forced_source_g = object_forced_source[object_forced_source['band'] == 'g']
flag_columns = [col for col in object_forced_source.columns if 'Flag' in col]

# Identify objectIds that appear more than once
# basically select all objectIds that have more than 25 observations all together
duplicate_ids = object_forced_source_g['objectId'].value_counts()
duplicate_ids = duplicate_ids[duplicate_ids > 25].index

object_list_band = object_list[object_list['objectId'].isin(duplicate_ids)]
object_list_band_s = object_list_band[object_list_band['r_extendedness'] == False]
duplicate_ids = object_list_band_s['objectId'].unique()

# Select all rows with those objectIds
duplicates_df = object_forced_source_g[object_forced_source_g['objectId'].isin(duplicate_ids)]

results = []
print(f"Processing {len(duplicates_df['objectId'].unique())} unique objectIds...")
for obj_id in tqdm(duplicates_df['objectId'].unique()[:10000]):
    single_object = duplicates_df[duplicates_df['objectId'] == obj_id].copy()

    # Skip if fewer than 20 points
    if len(single_object) < 20:
        print(f"Skipping objectId {obj_id} with {len(single_object)} valid points.")
        continue
    median_flux = single_object['psfFlux'].median()
    
    # Select only  10 rows
    single_object = single_object.sample(n=10, random_state=42)
    mean_DiffErr = single_object['psfDiffFluxErr'].median()*np.sqrt(2)
    mean_Err = single_object['psfFluxErr'].median()*np.sqrt(2)
    diff_flux = np.diff(single_object['psfDiffFlux'])
    delta_flux = np.diff(single_object['psfFlux'])
    diff_time = np.diff(single_object['exp_midpt_mjd'])

    temp_df = pd.DataFrame({
        'objectId': obj_id,
        'median_flux': median_flux,
        'mean_psfDiffFluxError': mean_DiffErr,
        'delta_psfDiffFlux': diff_flux,
        'mean_psfFluxError': mean_Err,
        'delta_psfFlux': delta_flux,
        'delta_exp_midpt_mjd': diff_time
    })

    results.append(temp_df)

# Combine results
final_df_g = pd.concat(results, ignore_index=True)

In [None]:
# r-band
object_forced_source_r = object_forced_source[object_forced_source['band'] == 'r']

# Identify objectIds that appear more than once
duplicate_ids = object_forced_source_r['objectId'].value_counts()
duplicate_ids = duplicate_ids[duplicate_ids > 25].index

object_list_band = object_list[object_list['objectId'].isin(duplicate_ids)]
object_list_band_s = object_list_band[object_list_band['r_extendedness'] == False]
duplicate_ids = object_list_band_s['objectId'].unique()

# Select all rows with those objectIds
duplicates_df = object_forced_source_r[object_forced_source_r['objectId'].isin(duplicate_ids)]

results = []
print(f"Processing {len(duplicates_df['objectId'].unique())} unique objectIds...")
for obj_id in tqdm(duplicates_df['objectId'].unique()[:10000]):
    single_object = duplicates_df[duplicates_df['objectId'] == obj_id].copy()
    
    # Skip if fewer than 20 points
    if len(single_object) < 20:
        continue
    median_flux = single_object['psfFlux'].median()
    
    # Select only  10 rows
    single_object = single_object.sample(n=10, random_state=42)
    mean_DiffErr = single_object['psfDiffFluxErr'].median()*np.sqrt(2)
    mean_Err = single_object['psfFluxErr'].median()*np.sqrt(2)
    diff_flux = np.diff(single_object['psfDiffFlux'])
    delta_flux = np.diff(single_object['psfFlux'])
    diff_time = np.diff(single_object['exp_midpt_mjd'])

    temp_df = pd.DataFrame({
        'objectId': obj_id,
        'median_flux': median_flux,
        'mean_psfDiffFluxError': mean_DiffErr,
        'delta_psfDiffFlux': diff_flux,
        'mean_psfFluxError': mean_Err,
        'delta_psfFlux': delta_flux,
        'delta_exp_midpt_mjd': diff_time
    })

    results.append(temp_df)

# Combine results
final_df_r = pd.concat(results, ignore_index=True)

In [None]:
# u-band
object_forced_source_u = object_forced_source[object_forced_source['band'] == 'u']
flag_columns = [col for col in object_forced_source.columns if 'flag' in col.lower()]
# Identify objectIds that appear more than once
duplicate_ids = object_forced_source_u['objectId'].value_counts()
duplicate_ids = duplicate_ids[duplicate_ids > 5].index

object_list_band = object_list[object_list['objectId'].isin(duplicate_ids)]
object_list_band_s = object_list_band[object_list_band['r_extendedness'] == False]
duplicate_ids = object_list_band_s['objectId'].unique()

# Select all rows with those objectIds
duplicates_df = object_forced_source_u[object_forced_source_u['objectId'].isin(duplicate_ids)]

results = []
print(f"Processing {len(duplicates_df['objectId'].unique())} unique objectIds...")
for obj_id in tqdm(duplicates_df['objectId'].unique()[:10000]):
    single_object = duplicates_df[duplicates_df['objectId'] == obj_id].copy()
    
    # Skip if fewer than 20 points
    if len(single_object) < 10:
        continue
    median_flux = single_object['psfFlux'].median()

    # Select only  10 rows
    single_object = single_object.sample(n=10, random_state=42)
    mean_DiffErr = single_object['psfDiffFluxErr'].median()*np.sqrt(2)
    mean_Err = single_object['psfFluxErr'].median()*np.sqrt(2)
    diff_flux = np.diff(single_object['psfDiffFlux'])
    delta_flux = np.diff(single_object['psfFlux'])
    diff_time = np.diff(single_object['exp_midpt_mjd'])

    temp_df = pd.DataFrame({
        'objectId': obj_id,
        'median_flux': median_flux,
        'mean_psfDiffFluxError': mean_DiffErr,
        'delta_psfDiffFlux': diff_flux,
        'mean_psfFluxError': mean_Err,
        'delta_psfFlux': delta_flux,
        'delta_exp_midpt_mjd': diff_time
    })

    results.append(temp_df)

# Combine results
final_df_u = pd.concat(results, ignore_index=True)

In [None]:
object_forced_source_i = object_forced_source[object_forced_source['band'] == 'i']
print(len(object_forced_source_i))
flag_columns = [col for col in object_forced_source.columns if 'flag' in col.lower()]
# Identify objectIds that appear more than once
duplicate_ids = object_forced_source_i['objectId'].value_counts()
duplicate_ids = duplicate_ids[duplicate_ids > 25].index

object_list_band = object_list[object_list['objectId'].isin(duplicate_ids)]
object_list_band_s = object_list_band[object_list_band['r_extendedness'] == False]
duplicate_ids = object_list_band_s['objectId'].unique()

# Select all rows with those objectIds
duplicates_df = object_forced_source_i[object_forced_source_i['objectId'].isin(duplicate_ids)]

results = []
print(f"Processing {len(duplicates_df['objectId'].unique())} unique objectIds...")
for obj_id in tqdm(duplicates_df['objectId'].unique()[:10000]):
    single_object = duplicates_df[duplicates_df['objectId'] == obj_id].copy()

    # Skip if fewer than 5 points
    if len(single_object) < 20:
        continue
    median_flux = single_object['psfFlux'].median()

    # Select only  10 rows
    single_object = single_object.sample(n=10, random_state=42)
    mean_DiffErr = single_object['psfDiffFluxErr'].median()*np.sqrt(2)
    mean_Err = single_object['psfFluxErr'].median()*np.sqrt(2)
    diff_flux = np.diff(single_object['psfDiffFlux'])
    delta_flux = np.diff(single_object['psfFlux'])
    diff_time = np.diff(single_object['exp_midpt_mjd'])

    temp_df = pd.DataFrame({
        'objectId': obj_id,
        'median_flux': median_flux,
        'mean_psfDiffFluxError': mean_DiffErr,
        'delta_psfDiffFlux': diff_flux,
        'mean_psfFluxError': mean_Err,
        'delta_psfFlux': delta_flux,
        'delta_exp_midpt_mjd': diff_time
    })

    results.append(temp_df)

# Combine results
final_df_i = pd.concat(results, ignore_index=True)

In [None]:
object_forced_source_z = object_forced_source[object_forced_source['band'] == 'z']
print(len(object_forced_source_z))
flag_columns = [col for col in object_forced_source.columns if 'flag' in col.lower()]
# Identify objectIds that appear more than once
duplicate_ids = object_forced_source_z['objectId'].value_counts()
duplicate_ids = duplicate_ids[duplicate_ids > 25].index

object_list_band = object_list[object_list['objectId'].isin(duplicate_ids)]
object_list_band_s = object_list_band[object_list_band['r_extendedness'] == False]
duplicate_ids = object_list_band_s['objectId'].unique()

# Select all rows with those objectIds
duplicates_df = object_forced_source_i[object_forced_source_i['objectId'].isin(duplicate_ids)]

results = []
print(f"Processing {len(duplicates_df['objectId'].unique())} unique objectIds...")
for obj_id in tqdm(duplicates_df['objectId'].unique()[:10000]):
    single_object = duplicates_df[duplicates_df['objectId'] == obj_id].copy()

    # Skip if fewer than 20 points
    if len(single_object) < 20:
        continue
    median_flux = single_object['psfFlux'].median()

    # Select only  10 rows
    single_object = single_object.sample(n=10, random_state=42)
    mean_DiffErr = single_object['psfDiffFluxErr'].median()*np.sqrt(2)
    mean_Err = single_object['psfFluxErr'].median()*np.sqrt(2)
    diff_flux = np.diff(single_object['psfDiffFlux'])
    delta_flux = np.diff(single_object['psfFlux'])
    diff_time = np.diff(single_object['exp_midpt_mjd'])

    temp_df = pd.DataFrame({
        'objectId': obj_id,
        'median_flux': median_flux,
        'mean_psfDiffFluxError': mean_DiffErr,
        'delta_psfDiffFlux': diff_flux,
        'mean_psfFluxError': mean_Err,
        'delta_psfFlux': delta_flux,
        'delta_exp_midpt_mjd': diff_time
    })

    results.append(temp_df)

# Combine results
final_df_z = pd.concat(results, ignore_index=True)

In [None]:
object_forced_source_y = object_forced_source[object_forced_source['band'] == 'y']
print(len(object_forced_source_y))
flag_columns = [col for col in object_forced_source.columns if 'flag' in col.lower()]
# Identify objectIds that appear more than once
duplicate_ids = object_forced_source_y['objectId'].value_counts()
duplicate_ids = duplicate_ids[duplicate_ids > 25].index

object_list_band = object_list[object_list['objectId'].isin(duplicate_ids)]
object_list_band_s = object_list_band[object_list_band['r_extendedness'] == False]
duplicate_ids = object_list_band_s['objectId'].unique()

# Select all rows with those objectIds
duplicates_df = object_forced_source_i[object_forced_source_i['objectId'].isin(duplicate_ids)]

results = []
print(f"Processing {len(duplicates_df['objectId'].unique())} unique objectIds...")
for obj_id in tqdm(duplicates_df['objectId'].unique()[:10000]):
    single_object = duplicates_df[duplicates_df['objectId'] == obj_id].copy()

    # Skip if fewer than 5 points
    if len(single_object) < 20:
        continue
    median_flux = single_object['psfFlux'].median()

    # Select only  10 rows
    single_object = single_object.sample(n=10, random_state=42)
    mean_DiffErr = single_object['psfDiffFluxErr'].median()*np.sqrt(2)
    mean_Err = single_object['psfFluxErr'].median()*np.sqrt(2)
    diff_flux = np.diff(single_object['psfDiffFlux'])
    delta_flux = np.diff(single_object['psfFlux'])
    diff_time = np.diff(single_object['exp_midpt_mjd'])

    temp_df = pd.DataFrame({
        'objectId': obj_id,
        'median_flux': median_flux,
        'mean_psfDiffFluxError': mean_DiffErr,
        'delta_psfDiffFlux': diff_flux,
        'mean_psfFluxError': mean_Err,
        'delta_psfFlux': delta_flux,
        'delta_exp_midpt_mjd': diff_time
    })

    results.append(temp_df)

# Combine results
final_df_y = pd.concat(results, ignore_index=True)

In [None]:
print(len(final_df_u), len(final_df_g), len(final_df_r), len(final_df_i), len(final_df_z), len(final_df_y))

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import astropy.units as u

def process_band_data_dia(final_df_band, label):
    # Convert to AB magnitude
    # take median flux of the object convert to magnitude, claim that as magnitude of the object 
    mag = u.nJy.to(u.ABmag, final_df_band['median_flux'])
    final_df_band = final_df_band.copy()
    # it is actually median mag, not mean
    final_df_band['median_psfMag'] = mag

    # Convert flux difference to magnitude difference
    flux_delta = final_df_band['delta_psfDiffFlux'] + final_df_band['median_flux']
    # magnitude of an individual source 
    mag_total_delta = u.nJy.to(u.ABmag, flux_delta)
    final_df_band['mag_total_delta'] = mag_total_delta
    final_df_band['mag_Diff_delta'] = final_df_band['mag_total_delta'] - final_df_band['median_psfMag']

    # same night 
    short_diff = final_df_band[final_df_band['delta_exp_midpt_mjd'] < 0.5]
    short_diff = short_diff[['median_psfMag', 'mag_Diff_delta', 'mean_psfDiffFluxError', 'median_flux']].dropna().copy()

    # Bin by mean magnitude
    bins = np.arange(16, 24.51, 0.5)
    short_diff['mag_bin'] = pd.cut(short_diff['median_psfMag'], bins)

    # Compute percentiles + mean error per bin
    bin_centers, low_vals, med_vals, high_vals, err_vals = [], [], [], [], []

    for bin_interval, group in short_diff.groupby('mag_bin'):
        if group.empty or len(group) < 100:
            continue
        bin_center = (bin_interval.left + bin_interval.right) / 2
        q16, q50, q84 = np.percentile(group['mag_Diff_delta'], [16, 50, 84])
        mean_err_flux = group['mean_psfDiffFluxError'].mean()
        mean_flux = group['median_flux'].mean()
        mean_mag = u.nJy.to(u.ABmag, mean_flux)
        mag_plus_err = u.nJy.to(u.ABmag, mean_flux + mean_err_flux)
        mag_minus_err = u.nJy.to(u.ABmag, mean_flux - mean_err_flux)
        mean_err_mag = float(abs(mag_plus_err - mag_minus_err)/2)

        bin_centers.append(bin_center)
        low_vals.append(q16)
        med_vals.append(q50)
        high_vals.append(q84)
        err_vals.append(mean_err_mag)

    return short_diff, bin_centers, low_vals, med_vals, high_vals, err_vals, label

# Add all 6 bands
bands = [
    (final_df_u, 'u-band'),
    (final_df_g, 'g-band'),
    (final_df_r, 'r-band'),
    (final_df_i, 'i-band'),
    (final_df_z, 'z-band'),
    (final_df_y, 'y-band')
][:4]

# 2x2 layout
fig, axes = plt.subplots(2, 2, figsize=(21, 12), sharey=True)
axes = axes.flatten()

for ax, (df, label) in zip(axes, bands):
    short_diff, bin_centers, low_vals, med_vals, high_vals, err_vals, title = process_band_data_dia(df, label)

    ax.scatter(short_diff['median_psfMag'], short_diff['mag_Diff_delta'], s=5, alpha=0.5, color='tab:blue', label='Data')
    ax.plot(bin_centers, high_vals, color='red', linestyle='--', label='84th percentile')
    ax.plot(bin_centers, med_vals, color='black', linestyle='-', label='50th percentile')
    ax.plot(bin_centers, low_vals, color='red', linestyle='--', label='16th percentile')
    

    upper_err_line = [m + e for m, e in zip(med_vals, err_vals)]
    lower_err_line = [m - e for m, e in zip(med_vals, err_vals)]

    ax.plot(bin_centers, upper_err_line, color='orange', linestyle=':', lw=4, label='+median flux error')
    ax.plot(bin_centers, lower_err_line, color='orange', linestyle=':', lw=4, label='–median flux error')


    actual_spread = (np.array(high_vals) - np.array(med_vals) + np.array(med_vals) - np.array(low_vals))/2
    delta_84_vs_err = np.sqrt(actual_spread**2 - np.array(err_vals)**2)
    # delta_84_vs_err = [np.abs(q84 / upper) for q84, upper in zip(high_vals, upper_err_line)]

    inset_ax = ax.inset_axes([0.05, 0.05, 0.4, 0.3])
    inset_ax.plot(bin_centers, delta_84_vs_err, color='purple', marker='o', markersize=3, linewidth=1)
    inset_ax.axhline(0, color='gray', linestyle='--', linewidth=0.5)
    inset_ax.set_title("additional err needed", fontsize=8)
    inset_ax.tick_params(labelsize=7)
    inset_ax.set_xlim(16, 24)
    inset_ax.set_ylim(0.0, 0.05)
    inset_ax.grid(True, linestyle='--', linewidth=0.3, alpha=0.6)

    ax.axhline(0, color='gray', linestyle='--', linewidth=1)
    ax.set_title(title, fontsize=13)
    ax.set_xlabel("Mean PSF Magnitude", fontsize=12)
    ax.set_xlim(16, 24)
    ax.set_ylim(-0.2, 0.2)
    ax.grid(True, linestyle='--', linewidth=0.5, alpha=0.7)

# Shared y-labels for left column
axes[0].set_ylabel("Δ Magnitude (psfDiffFlux)", fontsize=12)
axes[3].set_ylabel("Δ Magnitude (psfDiffFlux)", fontsize=12)

# Legend in the bottom-right plot
axes[3].legend(loc='upper right', fontsize=9)

plt.suptitle("r_extendedness=0, forced observations, psfDiffFlux Δmag vs. Mean Mag (< 0.5 days)", fontsize=15)
plt.tight_layout(rect=[0, 0, 1, 0.95])
plt.show()

In [None]:
actual_spread = (np.array(high_vals) - np.array(med_vals) + np.array(med_vals) - np.array(low_vals))/2

In [None]:
np.sqrt(actual_spread**2 - np.array(err_vals)**2)

In [None]:
np.array(err_vals)**2 + 0.01**2

In [None]:
def process_band_data(final_df_band, label):
    # Convert to AB magnitude
    # take median flux of the object convert to magnitude, claim that as magnitude of the object 
    mag = u.nJy.to(u.ABmag, final_df_band['median_flux'])
    final_df_band = final_df_band.copy()
    final_df_band['median_psfMag'] = mag

    # Convert flux difference to magnitude difference
    flux_delta = final_df_band['delta_psfFlux'] + final_df_band['median_flux']
    # magnitude of an individual source 
    mag_total_delta = u.nJy.to(u.ABmag, flux_delta)
    final_df_band['mag_total_delta'] = mag_total_delta
    final_df_band['mag_delta'] = final_df_band['mag_total_delta'] - final_df_band['median_psfMag']

    # same night 
    short_diff = final_df_band[final_df_band['delta_exp_midpt_mjd'] < 0.5]
    short_diff = short_diff[['median_psfMag', 'mag_delta', 'mean_psfFluxError', 'median_flux']].dropna().copy()

    # Bin by mean magnitude
    bins = np.arange(16, 24.51, 0.5)
    short_diff['mag_bin'] = pd.cut(short_diff['median_psfMag'], bins)

    # Compute percentiles + mean error per bin
    bin_centers, low_vals, med_vals, high_vals, err_vals = [], [], [], [], []

    for bin_interval, group in short_diff.groupby('mag_bin'):
        if group.empty or len(group) < 100:
            continue
        bin_center = (bin_interval.left + bin_interval.right) / 2
        q16, q50, q84 = np.percentile(group['mag_delta'], [16, 50, 84])
        mean_err_flux = group['mean_psfFluxError'].mean()
        mean_flux = group['median_flux'].mean()
        mean_mag = u.nJy.to(u.ABmag, mean_flux)
        mag_plus_err = u.nJy.to(u.ABmag, mean_flux + mean_err_flux)
        mag_minus_err = u.nJy.to(u.ABmag, mean_flux - mean_err_flux)
        mean_err_mag = float(abs(mag_plus_err - mag_minus_err)/2)

        bin_centers.append(bin_center)
        low_vals.append(q16)
        med_vals.append(q50)
        high_vals.append(q84)
        err_vals.append(mean_err_mag)

    return short_diff, bin_centers, low_vals, med_vals, high_vals, err_vals, label

# Add all 6 bands
bands = [
    (final_df_u, 'u-band'),
    (final_df_g, 'g-band'),
    (final_df_r, 'r-band'),
    (final_df_i, 'i-band'),
    (final_df_z, 'z-band'),
    (final_df_y, 'y-band')
][:4]

# 2x3 layout
fig, axes = plt.subplots(2, 2, figsize=(21, 12), sharey=True)
axes = axes.flatten()

for ax, (df, label) in zip(axes, bands):
    short_diff, bin_centers, low_vals, med_vals, high_vals, err_vals, title = process_band_data(df, label)

    ax.scatter(short_diff['median_psfMag'], short_diff['mag_delta'], s=5, alpha=0.5, color='tab:blue', label='Data')
    ax.plot(bin_centers, high_vals, color='red', linestyle='--', label='84th percentile')
    ax.plot(bin_centers, med_vals, color='black', linestyle='-', label='50th percentile')
    ax.plot(bin_centers, low_vals, color='red', linestyle='--', label='16th percentile')

    upper_err_line = [m + e for m, e in zip(med_vals, err_vals)]
    lower_err_line = [m - e for m, e in zip(med_vals, err_vals)]

    ax.plot(bin_centers, upper_err_line, color='orange', linestyle=':', lw=4, label='+median flux error')
    ax.plot(bin_centers, lower_err_line, color='orange', linestyle=':', lw=4, label='–median flux error')

    actual_spread = (np.array(high_vals) - np.array(med_vals) + np.array(med_vals) - np.array(low_vals))/2
    delta_84_vs_err = np.sqrt(actual_spread**2 - np.array(err_vals)**2)

    inset_ax = ax.inset_axes([0.05, 0.05, 0.4, 0.3])
    inset_ax.plot(bin_centers, delta_84_vs_err, color='purple', marker='o', markersize=3, linewidth=1)
    inset_ax.axhline(0, color='gray', linestyle='--', linewidth=0.5)
    inset_ax.set_title("obs_spread/err", fontsize=8)
    inset_ax.tick_params(labelsize=7)
    inset_ax.set_xlim(16, 24)
    inset_ax.set_ylim(0.0, 0.05)
    inset_ax.grid(True, linestyle='--', linewidth=0.3, alpha=0.6)

    ax.axhline(0, color='gray', linestyle='--', linewidth=1)
    ax.set_title(title, fontsize=13)
    ax.set_xlabel("Mean PSF Magnitude", fontsize=12)
    ax.set_xlim(16, 24)
    ax.set_ylim(-0.2, 0.2)
    ax.grid(True, linestyle='--', linewidth=0.5, alpha=0.7)

# Shared y-labels for left column
axes[0].set_ylabel("Δ Magnitude (psfFlux)", fontsize=12)
axes[3].set_ylabel("Δ Magnitude (psfFlux)", fontsize=12)

# Legend in the bottom-right plot
axes[3].legend(loc='upper right', fontsize=9)

plt.suptitle("r_extendedness=0, isolated stars, forced observations, psfFlux Δmag vs. Mean Mag (< 0.5 days)", fontsize=15)
plt.tight_layout(rect=[0, 0, 1, 0.95])
plt.show()

In [None]:
# TODO 

In [None]:
import numpy as np
import matplotlib.pyplot as plt

# Create figure with 1 row, 2 columns
fig, axes = plt.subplots(1, 2, figsize=(18, 6), sharey=True)

# Band labels and colors
bands = [
    (final_df_u, 'u', 'blue'),
    (final_df_g, 'g', 'green'),
    (final_df_r, 'r', 'red'),
    (final_df_i, 'i', 'orange'),
    (final_df_z, 'z', 'brown'),
    (final_df_y, 'y', 'purple')
][:4]  # Limit to first 4 bands for clarity

# Left panel: DIA
for df, label, color in bands:
    print(f"Processing {label} band data...")
    _, bin_centers, low_vals, med_vals, high_vals, err_vals, _ = process_band_data_dia(df, label)

    actual_spread = (np.array(high_vals) - np.array(med_vals) + np.array(med_vals) - np.array(low_vals)) / 2
    delta_84_vs_err = np.sqrt(np.clip(actual_spread**2 - np.array(err_vals)**2, 0, None))
    print(delta_84_vs_err)
    axes[0].plot(bin_centers, delta_84_vs_err, marker='o', label=label, color=color, lw=2)

axes[0].set_title("DIA: sqrt(obs_spread² - claimed_err²)", fontsize=13)
axes[0].set_xlabel("Median PSF Magnitude", fontsize=12)
axes[0].set_ylabel("Mag", fontsize=12)
axes[0].set_xlim(16, 24)
axes[0].set_ylim(0, 0.05)
axes[0].grid(True, linestyle='--', linewidth=0.5, alpha=0.7)
axes[0].legend(title="Band", fontsize=10)

# Right panel: reference
for df, label, color in bands:
    _, bin_centers, low_vals, med_vals, high_vals, err_vals, _ = process_band_data(df, label)

    actual_spread = (np.array(high_vals) - np.array(med_vals) + np.array(med_vals) - np.array(low_vals)) / 2
    delta_84_vs_err = np.sqrt(np.clip(actual_spread**2 - np.array(err_vals)**2, 0, None))  # clip to avoid sqrt of negative

    axes[1].plot(bin_centers, delta_84_vs_err, marker='o', label=label, color=color, lw=2)

axes[1].set_title("Reference: sqrt(obs_spread² - claimed_err²)", fontsize=13)
axes[1].set_xlabel("Median PSF Magnitude", fontsize=12)
axes[1].set_xlim(16, 24)
axes[1].set_ylim(0, 0.05)
axes[1].grid(True, linestyle='--', linewidth=0.5, alpha=0.7)
axes[1].legend(title="Band", fontsize=10)

plt.suptitle("Additional error needed to explain observations", fontsize=15)
plt.tight_layout(rect=[0, 0, 1, 0.95])
plt.show()

In [None]:
np.sqrt(np.array(err_vals) **2 + 0.02**2)