In [1]:
%load_ext autoreload
%autoreload 2

import numpy as np
import xarray as xr
import hvplot.xarray
import geoviews as gv
import geoviews.feature as gf
import cartopy.crs as ccrs
import matplotlib.pyplot as plt

# If you want to a notebook representation of the xarray datasets
# with nested attributes, you can use the xopr accessor like this:L
# > ds.xopr
import xopr.xarray_repr_fix.xopr_accessor

import xopr.opr_access as xopr

hvplot.extension('bokeh')

import fsspec
import h5py

In [None]:
# OLD version -- use the accessor instead
# from xopr.xarray_monkeypatch import formatting_html
# xr.Dataset._repr_html_ = formatting_html.dataset_repr

In [3]:
# Establish an OPR session
# You'll probably want to set a cache directory if you're running this locally to speed
# up subsequent requests. You can do other things like customize the STAC API endpoint,
# but you shouldn't need to do that for most use cases.
opr = xopr.OPRConnection(cache_dir="radar_cache")

# Or you can open a connection without a cache directory (for example, if you're parallelizing
# this on a cloud cluster without persistent storage).
#opr = xopr.OPRConnection()

In [4]:
season, flight_id = '2022_Antarctica_BaslerMKB', '20230109_01'
#season, flight_id = '2016_Antarctica_DC8', None
print(f"Selected flight: {flight_id} from season {season}")

stac_items = opr.query_frames(seasons=[season], flight_ids=[flight_id], max_items=1)
frames = opr.load_frames(stac_items)

Selected flight: 20230109_01 from season 2022_Antarctica_BaslerMKB
Found 1 frames matching the query criteria
Failed to decode variable filter at /param_array/array_proc/fcs: maximum recursion depth exceeded in comparison
Failed to decode variable hidden_depend_funs at /param_array/cluster: setting an array element with a sequence. The requested array has an inhomogeneous shape after 4 dimensions. The detected shape was (31, 1, 2, 1) + inhomogeneous part.
Failed to decode variable hidden_depend_funs at /param_records/cluster: setting an array element with a sequence. The requested array has an inhomogeneous shape after 4 dimensions. The detected shape was (31, 1, 2, 1) + inhomogeneous part.
Failed to decode variable hidden_depend_funs at /param_sar/cluster: setting an array element with a sequence. The requested array has an inhomogeneous shape after 4 dimensions. The detected shape was (31, 1, 2, 1) + inhomogeneous part.
Failed to decode variable cmd at /param_sar/collate_coh_noise/pa

In [6]:
frames[0].xopr

# IGNORE BELOW

In [None]:
source_url = frames[0].attrs['source_url']
file = fsspec.open_local(f"{opr.fsspec_url_prefix}{source_url}", filecache=opr.fsspec_cache_kwargs)

In [None]:
xr.open_datatree(file, engine='h5netcdf', phony_dims='sort')

In [None]:
from collections.abc import Iterable

def dereference_h5value(value, h5file):
    if isinstance(value, Iterable):
        return [dereference_h5value(v, h5file=h5file) for v in value]
    elif isinstance(value, h5py.Group):
        return decode_matlab_variable(value)
    elif isinstance(value, h5py.Reference):
        return dereference_h5value(h5file[value], h5file=h5file)
    elif isinstance(value, h5py.Dataset):
        return np.squeeze(value[:])
    elif isinstance(value, np.number):
        return value.item()
    else:
        return value

def decode_matlab_variable(h5var, skip_variables=False, debug_path="", skip_errors=True, h5file=None):
    """
    Decode a MATLAB variable stored in an HDF5 file.
    This function assumes the variable is stored as a byte string.
    """
    if h5file is None:
        h5file = h5var.file
    matlab_class = h5var.attrs.get('MATLAB_class', None)
    
    if matlab_class and matlab_class == b'cell':
        return np.squeeze(dereference_h5value(h5var[:], h5file=h5file))
    elif matlab_class and matlab_class == b'char':
        return h5var[:].astype(dtype=np.uint8).tobytes().decode('utf-8')
    elif isinstance(h5var, (h5py.Group, h5py.File)):
        attrs = {}
        for k in h5var:
            if k.startswith('#'):
                continue
            if isinstance(h5var[k], h5py.Dataset):
                if not skip_variables:
                    try:
                        attrs[k] = decode_matlab_variable(h5var[k], debug_path=debug_path + "/" + k, skip_errors=skip_errors, h5file=h5file)
                    except Exception as e:
                        print(f"Failed to decode variable {k} at {debug_path}: {e}")
                        if not skip_errors:
                            raise e
            else:
                attrs[k] = decode_matlab_variable(h5var[k], debug_path=debug_path + "/" + k, skip_errors=skip_errors, h5file=h5file)
        return attrs
    else:
        return np.squeeze(h5var[:])


ds = xr.open_dataset(file, engine='h5netcdf', phony_dims='sort')
ds

h5pyfile = h5py.File(file, 'r')
#h5group_to_ds_attrs(h5pyfile, ds.attrs, skip_variables=True)

ds.attrs['properties'] = decode_matlab_variable(h5pyfile, skip_variables=True, skip_errors=True)

ds.radar

In [None]:
import h5py
h5pyfile = h5py.File(file, 'r')
h5pyfile.keys()

In [None]:
#/param_array/array_proc/fcs
ref = h5pyfile['param_array']['array_proc']['fcs']['filter'][:].flat[0]
#h5pyfile[ref]['function_handle']['file']
for k in h5pyfile[ref]['function_handle'].keys():
    print(dereference_h5value(h5pyfile[ref]['function_handle'][k], h5file=h5pyfile))
h5pyfile[ref]['function_handle']

In [None]:
decode_matlab_variable(h5pyfile['param_array']['array_proc']['fcs'])

In [None]:
vectorized_dereference = np.vectorize(lambda x: dereference_h5value(x, h5pyfile))

vectorized_dereference(h5pyfile['param_records']['array']['imgs'][:])

In [None]:
ref = h5pyfile['param_records']['array']['imgs'][0][0]
#h5pyfile[ref][:]

In [None]:
import h5netcdf

h5file = h5netcdf.File(file, 'r', phony_dims='sort')
h5file #.groups['param_array']

In [None]:
for k in h5file.groups['param_records']:
    #print(k)
    #print(type(h5file.groups['param_records'][k]))
    if isinstance(h5file.groups['param_records'][k], h5netcdf.Variable):
        print(k)
        print(h5file.groups['param_records'][k].attrs)
        print(h5file.groups['param_records'][k][...])