In [None]:
# default_exp core

# Core reading utils
> API details.

In [None]:
# hide
from nbdev.showdoc import *

In [None]:
# export
import fnmatch
import socket
from pathlib import Path

import xarray as xr

hostname = socket.gethostname()
if fnmatch.fnmatch(hostname, "luna?.diviner.ucla.edu"):
    ON_LUNA = True
else:
    ON_LUNA = False

In [None]:
# export
def raster_to_xarray(fpath, chunk_scale=2):
    """Read raster image into an xarray.DataArray.

    Using the `chunks` keyword in the open_rasterio method
    activates the return of an out-of-memory virtual array instead
    of the in-memory xarray.DataArray

    fpath: pathlib.Path, str
    chunk_scale: int
        Multiplier for the hardcoded 2048/1024 chunk-sizes for
        x/y axes.
    """
    fpath = Path(fpath)
    return xr.open_rasterio(
        fpath, chunks={"x": chunk_scale * 2048, "y": chunk_scale * 1024}
    )

Using the `chunks` parameter in `open_rasterio` returns a `dask.array` version of `xarray.DataArray`.
To actually see values of the array, the `compute()` method needs to be called on the array:

In [None]:
l3_path = Path("/luna4/maye/l3_data/")
fpaths = list(l3_path.glob("*.tif"))
fpaths[:3]

[Path('/luna4/maye/l3_data/dgdr_st_clc_cyl_20120405n_128_jp2.tif'),
 Path('/luna4/maye/l3_data/dgdr_st_clc_cyl_20100523n_128_jp2.tif'),
 Path('/luna4/maye/l3_data/dgdr_st_clc_cyl_20090705n_128_jp2.tif')]

In [None]:
da = raster_to_xarray(fpaths[0])
da

Unnamed: 0,Array,Chunk
Bytes,1.76 GiB,16.00 MiB
Shape,"(1, 20480, 46080)","(1, 2048, 4096)"
Count,121 Tasks,120 Chunks
Type,int16,numpy.ndarray
"Array Chunk Bytes 1.76 GiB 16.00 MiB Shape (1, 20480, 46080) (1, 2048, 4096) Count 121 Tasks 120 Chunks Type int16 numpy.ndarray",46080  20480  1,

Unnamed: 0,Array,Chunk
Bytes,1.76 GiB,16.00 MiB
Shape,"(1, 20480, 46080)","(1, 2048, 4096)"
Count,121 Tasks,120 Chunks
Type,int16,numpy.ndarray


In [None]:
da.sel(y=slice(79, 78)).max().compute()

In [None]:
# export
def read_images_into_stacked_array(image_paths, name, chunk_scale=2):
    arrays = [raster_to_xarray(p, chunk_scale) for p in image_paths]
    stack = xr.concat(arrays, "time")
    stack.name = name
    return stack

In [None]:
read_images_into_stacked_array(fpaths, "soil_temp")

Unnamed: 0,Array,Chunk
Bytes,181.05 GiB,16.00 MiB
Shape,"(103, 1, 20480, 46080)","(1, 1, 2048, 4096)"
Count,37183 Tasks,12360 Chunks
Type,int16,numpy.ndarray
"Array Chunk Bytes 181.05 GiB 16.00 MiB Shape (103, 1, 20480, 46080) (1, 1, 2048, 4096) Count 37183 Tasks 12360 Chunks Type int16 numpy.ndarray",103  1  46080  20480  1,

Unnamed: 0,Array,Chunk
Bytes,181.05 GiB,16.00 MiB
Shape,"(103, 1, 20480, 46080)","(1, 1, 2048, 4096)"
Count,37183 Tasks,12360 Chunks
Type,int16,numpy.ndarray


In [None]:
# export
def mapcycle_to_xarray(fpath, chunk_scale=2):
    """Read an Diviner map cycle rasterio image into a dask.array.

    The cycle identifier will be read from the filename and added
    to the xarray as a coordinate value.

    fpath: str, pathlib.Path
    chunk_scale: int
        Scaling the chunk
    """
    fpath = Path(fpath)
    da = raster_to_xarray(fpath)
    cycle = int(fpath.name.split("_")[4][:-1])
    da = da.assign_coords(band=[cycle])
    da = da.rename({"band": "mapcycle", "y": "lat", "x": "lon"})
    da.lat.attrs["long_name"] = "Latitude"
    da.lon.attrs["long_name"] = "Longitude"
    for coord in ["lat", "lon"]:
        da[coord].attrs["units"] = "deg"
    return da

In [None]:
mapcycle_to_xarray(fpaths[0])

Unnamed: 0,Array,Chunk
Bytes,1.76 GiB,16.00 MiB
Shape,"(1, 20480, 46080)","(1, 2048, 4096)"
Count,121 Tasks,120 Chunks
Type,int16,numpy.ndarray
"Array Chunk Bytes 1.76 GiB 16.00 MiB Shape (1, 20480, 46080) (1, 2048, 4096) Count 121 Tasks 120 Chunks Type int16 numpy.ndarray",46080  20480  1,

Unnamed: 0,Array,Chunk
Bytes,1.76 GiB,16.00 MiB
Shape,"(1, 20480, 46080)","(1, 2048, 4096)"
Count,121 Tasks,120 Chunks
Type,int16,numpy.ndarray


In [None]:
# export
def read_mapcycles_into_stacked_array(image_paths, name, chunk_scale=2):
    with xr.set_options(keep_attrs=True):
        arrays = [mapcycle_to_xarray(p, chunk_scale) for p in image_paths]
        stack = xr.concat(arrays, "mapcycle")
        stack.name = name
        return stack

In [None]:
read_mapcycles_into_stacked_array(fpaths, "soil_temp")

Unnamed: 0,Array,Chunk
Bytes,181.05 GiB,16.00 MiB
Shape,"(103, 20480, 46080)","(1, 2048, 4096)"
Count,24823 Tasks,12360 Chunks
Type,int16,numpy.ndarray
"Array Chunk Bytes 181.05 GiB 16.00 MiB Shape (103, 20480, 46080) (1, 2048, 4096) Count 24823 Tasks 12360 Chunks Type int16 numpy.ndarray",46080  20480  103,

Unnamed: 0,Array,Chunk
Bytes,181.05 GiB,16.00 MiB
Shape,"(103, 20480, 46080)","(1, 2048, 4096)"
Count,24823 Tasks,12360 Chunks
Type,int16,numpy.ndarray
