# Combine 3 kerchunked datasets into one virtual dataset

In [None]:
import warnings
warnings.filterwarnings("ignore")

In [None]:
import json
import fsspec
import xarray as xr
import hvplot.xarray

I like to use `fsspec` even for local file systems:

In [None]:
fs = fsspec.filesystem('s3', profile='esip-qhub')

In [None]:
#a = json.load(fs.open('gridmet_ppt_1979-2020_nc4.json'))
#b = json.load(fs.open('gridmet_tmin_1979-2020_nc4.json'))
#c = json.load(fs.open('gridmet_tmax_1979-2020_nc4.json'))
a = json.load(fs.open('s3://esip-qhub/usgs/nhgf/gridmet_100mb/jsons/gridmet_ppt_1979-2020_nc4.json'))
b = json.load(fs.open('s3://esip-qhub/usgs/nhgf/gridmet_100mb/jsons/gridmet_tmax_1979-2020_nc4.json'))
c = json.load(fs.open('s3://esip-qhub/usgs/nhgf/gridmet_100mb/jsons/gridmet_tmin_1979-2020_nc4.json'))

Start with the first file, which we will then add to later:

In [None]:
combined = a

Each individual JSON has a single URL template called "u".

In the combined JSON we will call the 3 templates "u", "v" and "w", so assign them here:

In [None]:
combined['templates']['v'] = b['templates']['u']
combined['templates']['w'] = c['templates']['u']

In [None]:
b = json.loads(json.dumps(b).replace('{{u}}','{{v}}'))
c = json.loads(json.dumps(c).replace('{{u}}','{{w}}'))

In [None]:
for k,v in b['refs'].items():
    if 'daily_maximum_temperature' in k:
        combined['refs'][k] = v

In [None]:
for k,v in c['refs'].items():
    if 'daily_minimum_temperature' in k:
        combined['refs'][k] = v

In [None]:
with open('combined.json', 'w') as outfile:
    json.dump(combined, outfile)

##### push the virtual kerchunked dataset JSON to S3

In [None]:
fs_s3 = fsspec.filesystem('s3', profile='esip-qhub')

In [None]:
_ = fs_s3.upload('combined.json', 's3://esip-qhub/usgs/nhgf/gridmet_100mb/combined.json')

#### Examine the virtual Kerchunked dataset

In [None]:
rpath = 's3://esip-qhub/usgs/nhgf/gridmet_100mb/combined.json'
s_opts = {'requester_pays':True, 'skip_instance_cache':True}
r_opts = {'requester_pays':True}
fs2 = fsspec.filesystem("reference", fo=rpath, ref_storage_args=s_opts,
                       remote_protocol='s3', remote_options=r_opts)

fsspec makes the remote files looks like a zarr dataset on a file system:

In [None]:
fs2.ls('')

So we can read it into Xarray using the `zarr` engine:

In [None]:
m = fs2.get_mapper("")
ds = xr.open_dataset(m, engine="zarr",chunks={'day': 1220, 'lat': 98, 'lon': 231},
                    backend_kwargs={'consolidated':False})

In [None]:
ds.data_vars

In [None]:
ds['precipitation_amount'].encoding

In [None]:
%%time
var = 'precipitation_amount'
date = '2017-08-26'
ppt = ds[var].sel(day=date).load()
ppt.hvplot.image(x='lon', y='lat', geo=True, colormap='turbo', rasterize=True, 
                 tiles='OSM', title=f'{var}:{date}')

In [None]:
%%time
var1 = 'daily_minimum_temperature'
var2 = 'daily_maximum_temperature'
ds[[var1, var2]].sel(lon=-105.1352977, lat=39.7633285, method='nearest').hvplot(grid=True)