In [None]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
import pandas as pd
from tqdm import tqdm

from scipy.spatial import ConvexHull

import lsst.daf.butler as dafButler

import lsdb

from mpl_toolkits.axes_grid1 import make_axes_locatable
from astropy.visualization import ZScaleInterval, SqrtStretch, ImageNormalize, ManualInterval, AsinhStretch, MinMaxInterval, LogStretch


import warnings
warnings.filterwarnings('ignore')
warnings.simplefilter('ignore')

from typing import TYPE_CHECKING, cast

import astropy.units as u
from astropy.coordinates import SkyCoord

plt.set_loglevel('WARNING')

In [None]:
# Load collection
repo = "embargo"
instrument = "LSSTCam"

collection_all = "LSSTCam/runs/nightlyValidation"

butler = dafButler.Butler(repo, collections=collection_all, instrument=instrument)
# Query all matching collections
all_collections = list(butler.registry.queryCollections("LSSTCam/runs/nightlyValidation/2025*"))

# Filter collections ending with the digit '7'
filtered_collections = [c for c in all_collections if c.endswith("7")]

# Sort and print
for coll in sorted(filtered_collections):
    print(coll)

In [None]:
butler = dafButler.Butler(repo, collections=filtered_collections, instrument=instrument)
butler_single_day = dafButler.Butler(repo, collections='LSSTCam/runs/nightlyValidation/20250419/d_2025_04_19/DM-50157', instrument=instrument)

dataset_type = "single_visit_star"
# 
# datasets = list(butler_single_day.registry.queryDatasets(dataset_type, instrument="LSSTCam", collections='LSSTCam/runs/nightlyValidation/20250419/d_2025_04_19/DM-50157'))
datasets = list(butler_single_day.registry.queryDatasets(dataset_type, instrument="LSSTCam", collections='LSSTCam/runs/nightlyValidation/20250418/w_2025_16/DM-50157'))

# Extract visit IDs
visit_ids = [d.dataId["visit"] for d in datasets]
len(visit_ids)

In [None]:
science_visit_ids = np.array([2025041700758, 2025041700759, 2025041700760, 2025041700761,
       2025041800614, 2025041800615, 2025041800616, 2025041800617,
       2025041800618, 2025041800619, 2025041800620, 2025041800621,
       2025041800655, 2025041800656, 2025041800657, 2025041800658,
       2025041800659, 2025041800660, 2025041800661, 2025041800662,
       2025041800663, 2025041800664, 2025041900328, 2025041900329,
       2025041900330, 2025041900331, 2025041900332, 2025041900333,
       2025041900334, 2025041900335, 2025041900336, 2025041900337,
       2025041900338, 2025041900339, 2025041900340, 2025041900341,
       2025041900342, 2025041900343, 2025041900344, 2025041900345,
       2025041900346, 2025041900347, 2025041900348, 2025041900349,
       2025041900350, 2025041900351, 2025041900352, 2025041900353,
       2025041900354, 2025041900355, 2025041900356, 2025041900357])

In [None]:
science_visit_mjd = np.array([60783.34339728, 60783.34390062, 60783.34439903, 60783.34490588,
       60784.24403167, 60784.24486688, 60784.25079856, 60784.25164119,
       60784.25248479, 60784.2533253 , 60784.25417138, 60784.25500849,
       60784.30088569, 60784.30172504, 60784.30256876, 60784.30340408,
       60784.3042451 , 60784.30508359, 60784.30594175, 60784.30679976,
       60784.30772254, 60784.30858717, 60785.19597767, 60785.1968219 ,
       60785.19817999, 60785.19902863, 60785.19987024, 60785.20076997,
       60785.20171889, 60785.20259361, 60785.20387198, 60785.20488903,
       60785.20573064, 60785.20657325, 60785.20747193, 60785.20831916,
       60785.20916988, 60785.21009364, 60785.21092441, 60785.21177818,
       60785.21263423, 60785.21714858, 60785.21799712, 60785.21884532,
       60785.21974823, 60785.22060109, 60785.22150293, 60785.22234958,
       60785.22318949, 60785.22403674, 60785.22488073, 60785.22574227])

In [None]:
dfs = []

for i, visit in enumerate(tqdm(science_visit_ids)):
    try:
        # Get the catalog
        cat = butler.get('single_visit_star', visit=visit, instrument="LSSTCam")

        # Convert to Pandas DataFrame
        df = cat.to_pandas()

        # Keep only primary detections
        df = df[df['detect_isPrimary'] == True]

        # Drop rows where coord_ra is NaN
        df = df.dropna(subset=["coord_ra"])

        # Remove rows where sky_source is True
        df = df[df['sky_source'] == False]

        # Skip if DataFrame is now empty
        if df.empty:
            print(f"Skipping visit {visit} — no valid rows left.")
            continue

        # Add visit_id and mjd columns
        df["visit_id"] = visit
        df["mjd"] = science_visit_mjd[i]

        # Store
        dfs.append(df)

    except Exception as e:
        print(f"Skipping visit {visit} due to error: {e}")

print(f"Loaded {len(dfs)} dataframes.")

In [None]:
col_selection = ["coord_ra", "coord_dec", 'psfFlux', 'psfFluxErr', "mjd"]
df_0 = dfs[0][col_selection]
df_1 = dfs[1][col_selection]

In [None]:
test = lsdb.crossmatch(df_0, df_1, ra_column="coord_ra", dec_column="coord_dec", suffixes=("_0", "_1"))

In [None]:
test_computed = test.compute()

In [None]:
test_computed