# Convert a JSON reference file into parquet files
**Input**:
* a single combined reference JSON file for a dataset

**Output**:
* one parquet file for the coordinates and global metadata
* one parquet file per data variable for references

import fsspec
import xarray as xr
import ujson
import os
from kerchunk import hdf, combine, df
from fsspec.implementations.reference import DFReferenceFileSystem

In [2]:
fs = fsspec.filesystem('s3', anon=True, 
                        client_kwargs={'endpoint_url':'https://ncsa.osn.xsede.org'})

In [3]:
url = 's3://esip/noaa/nwm/grid1km/LDAS_combined.json'

In [4]:
%%time
refs = ujson.load(fs.open(url))

CPU times: user 2min 21s, sys: 26.8 s, total: 2min 48s
Wall time: 7min 40s


In [5]:
type(refs)

dict

In [6]:
fs2 = fsspec.filesystem('file')

In [7]:
parquet_dir = 'combined.parq'

In [8]:
fs2.mkdirs(parquet_dir, exist_ok=True)

In [9]:
%%time
df.refs_to_dataframe(refs, parquet_dir, partition=True)

CPU times: user 6min 48s, sys: 49.5 s, total: 7min 38s
Wall time: 7min 40s


In [10]:
fs = DFReferenceFileSystem(parquet_dir, lazy=True, 
        remote_protocol="s3", remote_options=dict(requester_pays=True))

In [11]:
ds = xr.open_dataset(fs.get_mapper(), engine="zarr", 
        backend_kwargs={"consolidated": False})

In [12]:
ds