In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import lsst.daf.butler as dafButler

from tqdm import tqdm

import warnings
warnings.filterwarnings('ignore')
warnings.simplefilter('ignore')
plt.set_loglevel('WARNING')

In [None]:
%pip install git+https://github.com/astronomy-commons/lsdb.git@rubin_index
%pip install git+https://github.com/astronomy-commons/hats.git@main

In [None]:
import hats
print(hats.__version__)
import lsdb
print(lsdb.__version__)

# Import data

In [2]:
# Load collection
repo = "embargo_new"
collection = "LSSTComCam/runs/DRP/20241101_20241211/w_2024_50/DM-48128"
instrument = "LSSTComCam"

butler = dafButler.Butler(repo, collections=collection, instrument=instrument)
# older collections
butler_49 = dafButler.Butler(repo, collections="LSSTComCam/runs/DRP/20241101_20241204/w_2024_49/DM-47988", instrument=instrument)
butler_48 = dafButler.Butler(repo, collections="LSSTComCam/runs/DRP/20241101_20241127/w_2024_48/DM-47841", instrument=instrument)

In [5]:
bands = ["u", "g", "r", "i", "z", "y"]
verbose = False

for band in bands:
    datasetRefs_dia = butler.query_datasets("goodSeeingDiff_differenceExp", where=f"band='{band}'")

    if verbose:
        print(f"\nDataset references for band '{band}':")
        for dr in datasetRefs_dia:
            print(dr)

    print(f"Found {len(datasetRefs_dia)} differenceExps for band '{band}'")

In [None]:
forcedsrc_refs = butler.query_datasets("forcedSourceTable")
obj_refs = butler.query_datasets("objectTable")

print(f"Number of tracts in forcedsrc_refs (current): {len(forcedsrc_refs)}")
print(f"Number of tracts in obj_refs (current): {len(obj_refs)}")

In [7]:
diaSrc_refs = butler.query_datasets("diaSourceTable_tract")
diaObj_refs = butler.query_datasets("diaObjectTable_tract")
diaObj_refs_49 = butler_49.query_datasets("diaObjectTable_tract")
diaObj_refs_48 = butler_48.query_datasets("diaObjectTable_tract")

print(f"Number of tracts in diaSrc_refs (current): {len(diaSrc_refs)}")
print(f"Number of tracts in diaObj_refs (current): {len(diaObj_refs)}")
print(f"Number of tracts in diaObj_refs_49 (weekly 49): {len(diaObj_refs_49)}")
print(f"Number of tracts in diaObj_refs_48 (weekly 48): {len(diaObj_refs_48)}")

In [8]:
# List of tract indices to process
tract_indices = np.arange(0, len(diaObj_refs))

# Initialize lists to hold diaObj and diaSource DataFrames
dia_Obj_list = []
diaSource_list = []

# Loop through the specified tracts and fetch the data
for idx in tqdm(tract_indices):
    dia_Obj = butler.get('diaObjectTable_tract', dataId=diaObj_refs[idx].dataId)  # Fetch diaObj for this tract
    diaSource = butler.get('diaSourceTable_tract', dataId=diaSrc_refs[idx].dataId)  # Fetch diaSource for this tract
    
    # Append the results to the lists
    dia_Obj_list.append(dia_Obj)
    diaSource_list.append(diaSource)

# Concatenate all diaObj and diaSource DataFrames
combined_dia_Obj = pd.concat(dia_Obj_list, ignore_index=False)
combined_diaSource = pd.concat(diaSource_list, ignore_index=False)

dia_Obj = combined_dia_Obj.copy()
diaSource = combined_diaSource.copy()

# assuming the lastest one is weekly 50
dia_Obj_50 = combined_dia_Obj.copy()
diaSource_50 = combined_diaSource.copy()

# Compute the histogram of nDiaSources
counts, bin_edges = np.histogram(combined_dia_Obj['nDiaSources'], bins=40, range=(0, 40))

# Take the logarithm (base 10) of the counts
log_counts = np.log10(counts + 1)  # Add 1 to avoid log(0)

# Plot the histogram
plt.bar(bin_edges[:-1], log_counts, width=np.diff(bin_edges), align='edge', edgecolor='black', alpha=0.7)
plt.xlabel('Number of diaSources per diaObject')
plt.ylabel('Log10(Number of diaObjects)')
plt.title('Log Number of diaObjects per Number of Sources per Object')

In [9]:
catalog_diaSource_50 = lsdb.from_dataframe(
    diaSource,
    catalog_name="diaSource_50",
    catalog_type="source")

catalog_diaObj_50 = lsdb.from_dataframe(
    dia_Obj,
    catalog_name="diaObject_50",
    catalog_type="object")

In [15]:
# Save it to disk in HATS format
catalog_diaSource_50.to_hats("diaSource_50", overwrite=True)
catalog_diaObj_50.to_hats("diaObject_50", overwrite=True)

### Forced Sources

In [None]:
# Process forcedSourceTable and objectTable (812 tracts)
forcedSource_list = []

for idx in tqdm(range(len(forcedsrc_refs))):
    forcedSource = butler.get('forcedSourceTable', dataId=forcedsrc_refs[idx].dataId)  # Fetch forcedSource
    # Append to the lists
    forcedSource_list.append(forcedSource)

# Concatenate forcedSource and object DataFrames
forcedSource = pd.concat(forcedSource_list, ignore_index=False)
del forcedSource_list

In [7]:
catalog_forcedSource_50 = lsdb.from_dataframe(
    forcedSource,
    catalog_name="forcedSource_50",
    catalog_type="source",
    ra_column="coord_ra",
    dec_column="coord_dec",
    highest_order=10)

In [9]:
catalog_forcedSource_50.to_hats("forcedSource_50", overwrite=True)

### Objects

In [None]:
# Process forcedSourceTable and objectTable (812 tracts)
object_list = []

for idx in tqdm(range(len(forcedsrc_refs))):
    obj = butler.get('objectTable', dataId=obj_refs[idx].dataId)  # Fetch object table
    # Append to the lists
    object_list.append(obj)

# Concatenate forcedSource and object DataFrames
Objects = pd.concat(object_list, ignore_index=False)
del object_list

In [6]:
catalog_Obj_50 = lsdb.from_dataframe(
    Objects,
    catalog_name="Obj_50",
    catalog_type="object",
    ra_column="coord_ra",
    dec_column="coord_dec",
    highest_order=10)

In [7]:
catalog_Obj_50.to_hats("Object_50", overwrite=True)