# Sample Reference GOES NC File read with Python Xarray Engine

In [1]:
import zlib
import json
import boto3
import boto
import fsspec
import fsspec.utils
import numpy as np
import xarray as xr
import zarr
import os
import ujson
import s3fs
import numcodecs
import h5py

import kerchunk.combine
from kerchunk.zarr import single_zarr
from kerchunk.combine import MultiZarrToZarr
from kerchunk.hdf import SingleHdf5ToZarr
from pathlib import Path

In [2]:
# load meta data into python dict
json_path = "/Users/katrinasharonin/Downloads/kerchunkC/jsons/2023-006.json"
f = open(json_path)
meta = json.load(f)

print('metadata display')
print(meta.keys())

from itertools import islice

def take(n, iterable):
    """Return the first n items of the iterable as a list."""
    return list(islice(iterable, n))

n_items = take(10, meta['refs'].items())
print('sample dict values')
print(n_items[0])
print(n_items[1])
print(n_items[2])
print(n_items[3])
print(n_items[4])
print(n_items[5])

metadata display
dict_keys(['version', 'refs'])
sample dict values
('.zgroup', '{"zarr_format":2}')
('bandn/.zarray', '{\n    "chunks": [\n        1\n    ],\n    "compressor": null,\n    "dtype": "<i4",\n    "fill_value": null,\n    "filters": null,\n    "order": "C",\n    "shape": [\n        1\n    ],\n    "zarr_format": 2\n}')
('bandn/0', '\x01\x00\x00\x00')
('bandn/.zattrs', '{\n    "_ARRAY_DIMENSIONS": [\n        "bandn"\n    ]\n}')
('.zattrs', '{"Conventions":"CF-1.7","LUT_Filenames":"SpaceLookParams(FM2A_CDRL79RevP_PR_09_08_04)-724222106.0.h5 QTableBand01(FM2A_CDRL79RevM_DO_08_00_00)-601637632.0.h5 CalTargetTimeIntervals(FM2A_CDRL79RevP_DO_09_01_00)-611906700.0.h5 BandSaturationLimits(FM2A_CDRL79RevC_DO_07_00_00)-524728502.0.h5 SolarSpaceLookParams(FM2A_CDRL79RevN_DO_09_00_00)-600765435.0.h5 DeadRowListParams(FM2A_CDRL79RevC_DO_07_00_00)-524728502.0.h5 Mirror_Record(FM2A_CDRL79RevC_DO_08_00_00)-604635792.0.h5 KalmanAstroConsts(FM2A_CDRL79RevC_DO_07_00_00)-582860861.0.xml KalmanFi

### xarray engine demo; proper expected output

In [3]:
ds = xr.open_dataset("reference://",mask_and_scale=True, engine="zarr", backend_kwargs={
                    "consolidated": False,
                    "storage_options": {"fo": json_path}
                    })


# aim for 17th index for vals
# subset = ds.isel(band=slice(0, 1), time=slice(0, 1), x=slice(0, 226), y=slice(3842, 4068))["Rad"]
subset = ds.isel(band=slice(0, 1), time=slice(0, 1), x=slice(3842, 4068), y=slice(0, 226))["Rad"]
print(subset)

# Retrieve the values as a NumPy array
values = subset.values

# print(values)
# [226][3842:4068]
print(values[0][0][225])

# Filter and print non-NaN values
# non_nan_values = values[~np.isnan(values)]
# print(non_nan_values)

<xarray.DataArray 'Rad' (bandn: 1, time: 1, y: 226, x: 226)>
[51076 values with dtype=float32]
Coordinates:
  * bandn    (bandn) int32 1
    t        (bandn, time) datetime64[ns] ...
  * time     (time) datetime64[us] 2023-01-06T05:00:00
  * x        (x) float64 -0.04428 -0.04425 -0.04423 ... -0.03801 -0.03798
    x_image  (bandn, time) float32 ...
  * y        (y) float64 0.1519 0.1518 0.1518 0.1518 ... 0.1456 0.1456 0.1456
    y_image  (bandn, time) float32 ...
Attributes:
    ancillary_variables:    DQF
    cell_methods:           t: point area: point
    grid_mapping:           goes_imager_projection
    long_name:              ABI L1b Radiances
    resolution:             y: 0.000028 rad x: 0.000028 rad
    sensor_band_bit_depth:  10
    standard_name:          toa_outgoing_radiance_per_unit_wavelength
    units:                  W m-2 sr-1 um-1
    valid_range:            [0, 1022]
[        nan         nan         nan         nan         nan         nan
         nan         nan  

In [7]:
print(ds)

<xarray.Dataset>
Dimensions:                                           (bandn: 1, time: 6,
                                                       y: 10848, x: 10848,
                                                       band: 1,
                                                       num_star_looks: 24,
                                                       number_of_time_bounds: 2,
                                                       number_of_image_bounds: 2)
Coordinates:
    band_id                                           (bandn, time, band) int8 ...
    band_wavelength                                   (bandn, time, band) float32 ...
    band_wavelength_star_look                         (bandn, time, num_star_looks) float32 ...
  * bandn                                             (bandn) int32 1
    t                                                 (bandn, time) datetime64[ns] ...
    t_star_look                                       (bandn, time, num_star_looks) datetime64[ns

In [8]:
print(ds.variables)

Frozen({'DQF': <xarray.Variable (bandn: 1, time: 6, y: 10848, x: 10848)>
[706074624 values with dtype=float32]
Attributes: (12/14)
    cell_methods:                                           t: point area: point
    flag_meanings:                                          good_pixel_qf con...
    flag_values:                                            [0, 1, 2, 3, 4]
    grid_mapping:                                           goes_imager_proje...
    long_name:                                              ABI L1b Radiances...
    number_of_qf_values:                                    5
    ...                                                     ...
    percent_good_pixel_qf:                                  0.9999815225601196
    percent_no_value_pixel_qf:                              1.290000000153668...
    percent_out_of_range_pixel_qf:                          4.000000046744389...
    standard_name:                                          status_flag
    units:                    

### Break down steps given inconsistencies with translation

In [4]:
def bytes_to_binary(data):
    binary_string = ' '.join(format(byte, '08b') for byte in data)
    return binary_string

In [5]:
# taken directly from Rad metadata
# "{\"chunks\":[1,1,226,226],\"compressor\":null,
# \"dtype\":\"<i2\",\"fill_value\":1023,\"filters\":
# [{\"id\":\"zlib\",\"level\":1}],\"order\":\"C\",\
#   "shape\":[1,6,10848,10848],\"zarr_format\":2}"

# for index of 0.0.0.17:

start_byte = 25830
num_bytes = 806
dtype_extract = '<i2'
file_path =  "/Users/katrinasharonin/Downloads/GOES_17_recreation/GOES_17_Sample_Data/2023/006/05/OR_ABI-L1b-RadF-M6C01_G17_s20230060550309_e20230060559376_c20230060559423.nc"
add_offset = -25.9366474
scale_factor = 0.812106371

# local file read extract 
with open(file_path, 'rb') as file:
    file.seek(start_byte)
    content = file.read(num_bytes)

print('\n')
print('fetched sample bytes from nc')
print(content[:50])
binary_representation = bytes_to_binary(content[:40])
print(binary_representation)

buf = zlib.decompress(content)

print('\n')
print('after decompressing:')
for value in buf[:10]:
    print(bin(value)[2:].zfill(8) + " ", end = '')

# EXCLUDE SHUFFLE
# buf = numcodecs.shuffle.Shuffle(4).decode(buf)

# print('\n')
# print('after shuffle:')
# for value in buf[:20]:
#     print(bin(value)[2:].zfill(8) + " ", end = '')

# numpy dtype application
chunk = np.frombuffer(buf, dtype=dtype_extract)

# apply offset and scaling on data
chunk_scale_off = [x * scale_factor + add_offset for x in chunk]
# chunk_scale_off = [ (x-add_offset)*scale_factor for x in chunk]

print('\n')
print('chunk 1 using 0.0.0.17 indexing')
print(chunk[51050:])

print('\n')
print('applied offset and scale')
print(len(chunk_scale_off))
print(chunk_scale_off[51050:])



fetched sample bytes from nc
b'x\x01\xed\xdaYn\xc20\x10\x00P\xa4\x1e\xa4\xf4\xfe\x87\xa4E\x15\x15u\x13\xb7Yl\xcf\xb8O|\x80\x93x\x967D\x80\xc4\xed\xe5\xe6A\x80\x00\x01\x02\x04\x08\x10 '
01111000 00000001 11101101 11011010 01011001 01101110 11000010 00110000 00010000 00000000 01010000 10100100 00011110 10100100 11110100 11111110 10000111 10100100 01000101 00010101 00010101 01110101 00010011 10110111 01011001 01101100 11001111 10111000 01001111 01111100 10000000 10010011 01111000 10010110 00110111 01000100 10000000 11000100 11101101 11100101


after decompressing:
11111111 00000011 11111111 00000011 11111111 00000011 11111111 00000011 11111111 00000011 

chunk 1 using 0.0.0.17 indexing
[32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 31 32 32 32 32 32 32
 32 32]


applied offset and scale
51076
[0.05075647200000333, 0.05075647200000333, 0.05075647200000333, 0.05075647200000333, 0.05075647200000333, 0.05075647200000333, 0.05075647200000333, 0.05075647200000333, 0.05075647200000333, 0.

### idea: possible offset/scaling of vars throws off values

check to see if values even exist in the known arr

In [6]:
# flatten and check existing vars 
array1 = np.array(chunk_scale_off)
common_elements = np.intersect1d(array1, array2)

print(common_elements)

NameError: name 'array2' is not defined

### try low level h5py read 

In [14]:
with h5py.File(file_path, 'r') as file:
    print(file)
    print(file["Rad"])
    dataset = file["Rad"]
    # attributes
    print(dataset.attrs)
    # sample data dump
    # print(dataset[226][3842:4068])
    print(dataset[226][3842:4500])
    print(len(dataset[226][3842:]))

    # mimic s.isel(band=slice(0, 1), time=slice(0, 1), x=slice(0, 226), y=slice(3842, 4068))["Rad"]


<HDF5 file "OR_ABI-L1b-RadF-M6C01_G17_s20230060550309_e20230060559376_c20230060559423.nc" (mode r)>
<HDF5 dataset "Rad": shape (10848, 10848), type "<i2">
<Attributes of HDF5 object at 140271255064064>
[1023 1023 1023 1023 1023 1023 1023 1023 1023 1023 1023 1023 1023 1023
 1023 1023 1023 1023 1023 1023 1023 1023 1023 1023 1023 1023 1023 1023
 1023 1023 1023 1023 1023 1023 1023 1023 1023 1023 1023 1023 1023 1023
 1023 1023 1023 1023 1023 1023 1023 1023 1023 1023 1023 1023 1023 1023
 1023 1023 1023 1023 1023 1023 1023 1023 1023 1023 1023 1023 1023 1023
 1023 1023 1023 1023 1023 1023 1023 1023 1023 1023 1023 1023 1023 1023
 1023 1023 1023   32   32   32   32   32   32   32   32   32   32   32
   32   32   32   32   32   32   31   32   32   32   32   32   32   32
   32   32   32   32   32   32   32   32   32   32   32   32   32   32
   31   32   32   32   32   32   32   32   32   32   32   32   32   32
   32   32   32   32   32   32   32   32   32   32   32   32   32   32
   32   32   32  

Unfinished; work on local read configuration