# Generic NB for Testing Values on Slicing

### Generate metadata for inspection

In [1]:
# gen ref for metadata inspection
import kerchunk, fsspec, ujson
from kerchunk import hdf

def process(url, 
            outputfile, 
            storage_options_in={}, 
            storage_options_out={}):
    """ generate h5 ref file using kerchunk known lib
        https://github.com/fsspec/kerchunk/blob/37d75267cbf1c8d6ee1a9a69764cb661aa3f5e29/docs/source/advanced.rst#L30
        class SingleHdf5ToZarr
    """
    transformer = hdf.SingleHdf5ToZarr(url, **storage_options_in)
    refs = transformer.translate()
    with fsspec.open(outputfile, mode="wt", **storage_options_out) as f:
        ujson.dump(refs, f)

URL = "/Users/katrinasharonin/Downloads/OR_ABI-L2-FDCC-M3_G17_s20182390052191_e20182390054564_c20182390055159.nc"
# URL = "/Users/katrinasharonin/Downloads/h5ex_d_gzip.h5"
OUTPUTFILE = "/Users/katrinasharonin/Downloads/kerchunkC/jsons/sliderule_md_inspect.json"

print("Start JSON extraction from H5 file...")
process(URL, OUTPUTFILE)
print("JSON extraction complete")

Start JSON extraction from H5 file...
JSON extraction complete


### Slice and dice

In [2]:
import xarray as xr 

group_path = "ancillary_data/calibrations/first_photon_bias/gt1l"
ds = xr.open_dataset(URL) #, group=group_path)

varible = ds["Power"]
print(varible.attrs)
fill = varible.attrs["standard_name"]
print(fill)

{'long_name': 'ABI L2+ Fire-Hot Spot Characterization: Fire Radiative Power', 'standard_name': 'fire_radiative_power', 'valid_range': array([     0., 200000.], dtype=float32), 'units': 'MW', 'resolution': 'y: 0.000056 rad x: 0.000056 rad', 'grid_mapping': 'goes_imager_projection', 'cell_measures': 'area: Area', 'cell_methods': 'sunglint_angle: point (no pixel produced) local_zenith_angle: point (good quality pixel produced) solar_zenith_angle: point (good quality pixel produced) t: point', 'ancillary_variables': 'DQF'}
fire_radiative_power


In [3]:
print("Variables:")
for variable_name in ds.variables:
    print(variable_name)
    # for attr_name in ds[variable_name].attrs:
       #  print(attr_name)
    #     if attr_name == "_FillValue":
    #         print("found")
    #         print(variable_name)
    #         print(attr_name)


print("_FillValue" in ds.variables) # not a global one

Variables:
Area
Temp
Mask
Power
DQF
t
y
x
time_bounds
goes_imager_projection
y_image
y_image_bounds
x_image
x_image_bounds
nominal_satellite_subpoint_lat
nominal_satellite_subpoint_lon
nominal_satellite_height
geospatial_lat_lon_extent
sunglint_angle
sunglint_angle_bounds
local_zenith_angle
local_zenith_angle_bounds
solar_zenith_angle
solar_zenith_angle_bounds
total_number_of_pixels_with_fires_detected
total_number_of_pixels_with_fire_temperature
total_number_of_pixels_with_fire_area
total_number_of_pixels_with_fire_radiative_power
fire_temperature_outlier_pixel_count
fire_area_outlier_pixel_count
fire_radiative_power_outlier_pixel_count
minimum_fire_temperature
maximum_fire_temperature
mean_fire_temperature
standard_deviation_fire_temperature
minimum_fire_area
maximum_fire_area
mean_fire_area
standard_deviation_fire_area
minimum_fire_radiative_power
maximum_fire_radiative_power
mean_fire_radiative_power
standard_deviation_fire_radiative_power
algorithm_dynamic_input_data_container
pro

In [5]:
print("\n Attributes")
for attr_name in ds["DQF"].attrs:
    print(attr_name)
    # if "_" in attr_name:
        # print(attr_name)
print(type(ds.attrs))
print(type(attr_name))

print("_FillValue" in ds.attrs.keys())


 Attributes
long_name
standard_name
valid_range
units
resolution
grid_mapping
cell_measures
cell_methods
ancillary_variables
<class 'dict'>
<class 'str'>
False


KeyError: 'scale_factor'

In [11]:
# attr version
print(ds.attrs.keys())
print('version' in ds.attrs.keys())
version_attribute = ds.attrs.get('version', 'Version attribute not found')
print(version_attribute)

dict_keys(['naming_authority', 'Conventions', 'Metadata_Conventions', 'standard_name_vocabulary', 'institution', 'project', 'production_site', 'production_environment', 'spatial_resolution', 'orbital_slot', 'platform_ID', 'instrument_type', 'scene_id', 'instrument_ID', 'dataset_name', 'iso_series_metadata_id', 'title', 'summary', 'keywords', 'keywords_vocabulary', 'license', 'processing_level', 'date_created', 'cdm_data_type', 'time_coverage_start', 'time_coverage_end', 'timeline_id', 'production_data_source', 'id'])
False
Version attribute not found


In [12]:
# netcdf visit
import netCDF4
nc_file = netCDF4.Dataset(URL, 'r')
format_version = nc_file.file_format
print(format_version)

NETCDF4


In [13]:
# try raw/IO request with offset 

def read_netcdf_version(file_path):
    try:
        with open(file_path, 'rb') as file:
            
            magic_bytes = file.read(4)
            version = file.read(1)

            
            if magic_bytes == b'CDF\x01' and version == b'\x02':
                print(magic_bytes)
                return 'NetCDF-4'
            elif magic_bytes == b'CDF\x01' and version == b'\x01':
                print(magic_bytes)
                return 'NetCDF Classic'
            else:
                print(magic_bytes)
                return 'Unknown NetCDF Version'
    except Exception as e:
        return f"Error: {str(e)}"

file_path = URL
version = read_netcdf_version(file_path)
print(f"The NetCDF version of the file is: {version}")


b'\x89HDF'
The NetCDF version of the file is: Unknown NetCDF Version


In [17]:
file_path = URL
alt_path = "/Users/katrinasharonin/Downloads/h5ex_d_gzip.h5"

try:
    with open(file_path, 'rb') as file:
        print(file.read(20))

except Exception as e:
    print(f"Error: {str(e)}")

try:
    with open(alt_path, 'rb') as file:
        print(file.read(20))

except Exception as e:
    print(f"Error: {str(e)}")

b'\x89HDF\r\n\x1a\n\x02\x08\x08\x00\x00\x00\x00\x00\x00\x00\x00\x00'
b'\x89HDF\r\n\x1a\n\x00\x00\x00\x00\x00\x08\x08\x00\x04\x00\x10\x00'
