# Subset operator with kerchunk

* https://projectpythia.org/kerchunk-cookbook/notebooks/using_references/Datatree.html
* https://guide.cloudnativegeo.org/kerchunk/kerchunk-in-practice.html

## Import clisops tools

In [None]:
import clisops.utils.dataset_utils as clidu
from clisops.ops.subset import subset

## Example with testdata from CEDA

In [None]:
import clisops.utils.testing as clite

mini_esgf_data = clite.get_kerchunk_datasets()
mini_esgf_data

### Open remote dataset with clisops

In [None]:
# using .json

ds = clidu.open_xr_dataset(mini_esgf_data['CMIP6_KERCHUNK_HTTPS_OPEN_JSON'])
ds

In [None]:
# using .zst with compression

ds = clidu.open_xr_dataset(mini_esgf_data['CMIP6_KERCHUNK_HTTPS_OPEN_ZST'])
ds

### Subset remote dataset with clisops

In [None]:
outputs = subset(
        ds=ds,
        time="1900-01-01/1900-12-31",
        area=(0.0, 10.0, 175.0, 90.0),
        output_type="xarray",
    )

print(f"There are {len(outputs)} outputs.")
outputs[0]

## Example from project Pythia

Open pythia dataset with xarray.

* https://projectpythia.org/kerchunk-cookbook/notebooks/using_references/Datatree.html

### Test dataset

In [None]:
url = 's3://carbonplan-share/nasa-nex-reference/references_prod/ACCESS-CM2_historical/reference.parquet'


### Open remote s3 dataset with xarray open_dataset

In [None]:
import xarray as xr
from fsspec.implementations.reference import ReferenceFileSystem

fs = ReferenceFileSystem(
    url,
    remote_protocol="s3",
    target_protocol="s3",
    remote_options={"anon": True},
    target_options={"anon": True},
    lazy=True,
)

ds = xr.open_dataset(
    fs.get_mapper(),
    engine="zarr",
    backend_kwargs={
        "consolidated": False,
        "zarr_format": 2,
    },
    chunks={"time": 3},
)

ds

### Open dataset with xarray open_zarr

In [None]:
import fsspec

mapper = fsspec.get_mapper(
        "reference://",
        fo=url,
        target_options={"anon": True},
        remote_options={"anon": True},
        remote_protocol="s3",
        target_protocol="s3",
)

ds = xr.open_zarr(mapper, consolidated=False, zarr_format=2)
ds

### Open Pythia s3 dataset with clisops

In [None]:
options = {
    "remote_protocol": "s3",
    "target_protocol": "s3",
    "remote_options": {"anon": True},
    "target_options": {"anon": True},
}

ds = clidu.open_xr_dataset(url, **options)
ds

### Subset Python S3 dataset with clisops

In [None]:
outputs = subset(
        ds=ds,
        time="2000-01-01/2000-01-31",
        area=(0.0, 10.0, 175.0, 90.0),
        output_type="xarray",
    )

print(f"There are {len(outputs)} outputs.")
outputs[0]