# Create a virtual Zarr dataset from the individual AORC-OWP JSON files

In [None]:
import fsspec
import xarray as xr
import ujson   # fast json
from pathlib import Path
from kerchunk.combine import MultiZarrToZarr, auto_dask, JustLoad

In [None]:
fs_write = fsspec.filesystem('file')

##### Use a helper function specific to this Nebari deployment to start Dask cluster

In [None]:
from dask.distributed import LocalCluster, Client

cluster = LocalCluster(threads_per_worker=1)
client = Client(cluster)
client

In [None]:
json_dir = '/caldera/hytest_scratch/scratch/rsignell/json_files'

In [None]:
json_list = fs_write.glob(f'{json_dir}/*.json')

In [None]:
json_list[0]

In [None]:
so = dict(mode='rb', default_fill_cache=False, default_cache_type='first')

## merge individual jsons into single combined JSON (serial process)

#### merge individual jsons into single combined JSON in parallel using Dask

In [None]:
worker_max=80

In [None]:
%%time
d = auto_dask(
    json_list,
    single_driver=JustLoad,
    single_kwargs={"storage_options": {}},
    mzz_kwargs={"concat_dims": ["time"]},
    n_batches=worker_max,   # give one batch to each worker
#    remote_protocol="s3",
#    remote_options={"requester_pays": True}
)

#### Explore the combined JSON

In [None]:
%%time
fs = fsspec.filesystem("reference", fo=d, ref_storage_args={'skip_instance_cache':True})
m = fs.get_mapper("")

ds = xr.open_dataset(m, engine="zarr", backend_kwargs={'consolidated':False}, chunks={})

In [None]:
ds

In [None]:
ds.APCP_surface

## visualize 

In [None]:
import hvplot.xarray

In [None]:
da = ds.APCP_surface.sel(time='2000-02-15 06:00').load()

In [None]:
da.hvplot.image(x='longitude', y='latitude', geo=True, rasterize=True, cmap='turbo', tiles='OSM')

#### Extract a time series

In [None]:
%%time
da = ds.APCP_surface.sel(longitude=-115.18, latitude=46.65, method='nearest').load()

In [None]:
da.hvplot(x='time', grid=True)

write combined JSON to storage

In [None]:
combined_json = '/caldera/hytest_scratch/scratch/rsignell/sudhir.json'

In [None]:
%%time
with fs_write.open(combined_json, 'wb') as f:
    f.write(ujson.dumps(d).encode());