In [1]:
from pathlib import Path

import lsdb
import pyarrow.parquet as pq

output_dir = Path("tmp")

cat = lsdb.open_catalog(
    # Path is for Rubin Science Platform, see this documentation page:
    # https://docs.lsdb.io/en/latest/tutorials/pre_executed/rubin_dp1.html
    'dia_object_collection',
    columns=[
        'ra', 'dec',
        'diaSource.band', 'diaSource.midpointMjdTai', 'diaSource.psfFlux', 'diaSource.psfFlux_flag',
        'diaObjectForcedSource.band', 'diaObjectForcedSource.midpointMjdTai', 'diaObjectForcedSource.psfFlux',
        'diaObjectForcedSource.psfFlux_flag',]
)

cat.map_partitions(lambda df: df.head()).write_catalog(output_dir, overwrite=True)

df = cat.head()

In [2]:
content = """
<VOTABLE version="1.4" xmlns="http://www.ivoa.net/xml/VOTable/v1.3">
  <RESOURCE>
    <TABLE name="DIA Object">
      <DESCRIPTION>Rubin DP1 DIA Object table with nested Source and Force Source tables</DESCRIPTION>
      <PARAM name="author" datatype="char" arraysize="*" value="Vera C. Rubin Observatory"/>
      <FIELD datatype="double" name="ra" ucd="pos.eq.ra" unit="deg">
        <DESCRIPTION>DIA Object ICRS Right Ascension</DESCRIPTION>
      </FIELD>
      <FIELD datatype="double" name="dec" ucd="pos.eq.dec" unit="deg">
        <DESCRIPTION>ICRS Declination</DESCRIPTION>
      </FIELD>
      <GROUP name="diaSource">
        <DESCRIPTION>Properties of transient-object detections on the single-epoch difference images</DESCRIPTION>
          <FIELDref ref="diaSource.band"/>
          <FIELDref ref="diaSource.midpointMjdTai"/> 
          <FIELDref ref="diaSource.psfFlux"/>
          <FIELDref ref="diaSource.psfFlux_flag"/>
      </GROUP>
      <!-- It is actually "double-nested", because single band is an array of unicode chars,
        and here we have an array of bands-->
      <FIELD datatype="unicodeChar" name="diaSource.band">
        <DESCRIPTION>Band used to take this observation</DESCRIPTION>
      </FIELD>
      <FIELD datatype="double" name="diaSource.midpointMjdTai">
        <DESCRIPTION>Midpoint time for exposure at the fiducial center of the focal plane array. TAI, accurate to 10ms</DESCRIPTION>
      </FIELD>
      <FIELD datatype="float" name="diaSource.psfFlux" unit="nJy">
        <DESCRIPTION>Flux derived from linear least-squares fit of PSF model</DESCRIPTION>
      </FIELD>
      <FIELD datatype="boolean" name="diaSource.psfFlux_flag">
        <DESCRIPTION>Failure to derive linear least-squares fit of psf model. Another psfFlux flag field should also be set to provide more information</DESCRIPTION>
      </FIELD>
      <GROUP name="diaObjectForcedSource">
        <DESCRIPTION>Point-source forced-photometry measurements on individual single-epoch visit images and difference images, based on and linked to the entries in the DiaObject table</DESCRIPTION>
          <FIELDref ref="diaObjectForcedSource.band"/>
          <FIELDref ref="diaObjectForcedSource.midpointMjdTai"/> 
          <FIELDref ref="diaObjectForcedSource.psfFlux"/>
          <FIELDref ref="diaObjectForcedSource.psfFlux_flag"/>
      </GROUP>
      <!-- It is actually "double-nested", because single band is an array of unicode chars,
        and here we have an array of bands-->
      <FIELD datatype="unicodeChar" name="diaObjectForcedSource.band">
        <DESCRIPTION>Band used to take this observation</DESCRIPTION>
      </FIELD>
      <FIELD datatype="double" name="diaObjectForcedSource.midpointMjdTai">
        <DESCRIPTION>Midpoint time for exposure at the fiducial center of the focal plane array. TAI, accurate to 10ms</DESCRIPTION>
      </FIELD>
      <FIELD datatype="float" name="diaObjectForcedSource.psfFlux" unit="nJy">
        <DESCRIPTION>Flux derived from linear least-squares fit of PSF model</DESCRIPTION>
      </FIELD>
      <FIELD datatype="boolean" name="diaObjectForcedSource.psfFlux_flag">
        <DESCRIPTION>Failure to derive linear least-squares fit of psf model. Another psfFlux flag field should also be set to provide more information</DESCRIPTION>
      </FIELD>
    </TABLE>
  </RESOURCE>
</VOTABLE>
"""

version = "1.0"



In [3]:
for path in output_dir.glob("**/*.parquet"):
    table = pq.read_table(path)
    metadata = {b"IVOA.VOTable-Parquet.content": content.encode(), b"IVOA.VOTable-Parquet.version": version.encode()}
    pq.write_table(table.replace_schema_metadata(metadata), path)