## HATS Data Preview 1 on RSP

This notebook tests access to Data Preview 1 (DP1) data in the HATS format. 

**Goal:** To load a randomized sample of the data, to be used for scale testing within the RSP.

In [None]:
%pip install lsdb --quiet

In [None]:
import lsdb
import numpy as np
from upath import UPath

In [None]:
base_path = UPath("/sdf/data/rubin/shared/lsdb_commissioning/hats/v29_0_0")
object_collection = lsdb.read_hats(base_path / "object_collection_lite")

In [None]:
pixel_statistics = object_collection.per_pixel_statistics()
pixel_counts = pixel_statistics["objectId: row_count"].astype(np.int64) 

In [None]:
partition_indices = []
for percentile in [10, 50, 90]:
    q = np.percentile(pixel_counts, percentile)
    print(f"Percentile: {percentile}, Quartile: {q}")
    index = int(np.argmin(np.abs(pixel_counts - q)))
    closest_value = pixel_counts.iloc[index]
    print(f"Closest value: {closest_value}, partition index: {index}")
    partition_indices.append(index)

In [None]:
for index in partition_indices:
    print(f"Sampling partition {index} of size {pixel_counts.iloc[index]}")
    %timeit object_collection.sample(index, n=100, seed=10)