In [1]:
import numpy as np
import matplotlib.pyplot as plt
import xarray as xr

import subprocess
from functools import reduce

In [2]:
from ufs2arco import sources

In [3]:
hrrr = sources.AWSHRRRArchive(
    t0={"start": "2015-12-31T00", "end": "2024-12-31T00", "freq": "1YE"},
    fhr={"start": 0, "end": 6, "step": 6},
)

### First, figure out surface stepTypes available

In [4]:
typeOfLevel = "atmosphere"

In [5]:
dsdict = {}
for t0 in hrrr.t0:
    dsdict[t0] = {}

    for fhr in hrrr.fhr:
        print(f"Reading (t0, fhr) = ({str(t0)}, {int(fhr)})")
        a = hrrr._open_local(
            dims={"t0": t0, "fhr": fhr},
            file_suffix="sfc",
            cache_dir="./gribcache",
        )
        output = subprocess.check_output(
            ["grib_ls", "-p", "typeOfLevel,stepType", a],
            stderr=subprocess.DEVNULL
        ).decode()

        step_types = []
        for line in output.splitlines():
            parts = line.strip().split()
            if len(parts) >= 2:
                type_of_level, step_type = parts[-2], parts[-1]
                if type_of_level == typeOfLevel:
                    step_types.append(step_type)
        dsdict[t0][fhr] = sorted(set(step_types))
        #dsdict[t0][fhr] = {
        #    stepType: hrrr.open_grib_level(a, typeOfLevel=typeOfLevel, stepType=stepType)
        #    for steptype in sorted(step_types)
        #}

Reading (t0, fhr) = (2015-12-31 00:00:00, 0)
Reading (t0, fhr) = (2015-12-31 00:00:00, 6)
Reading (t0, fhr) = (2016-12-31 00:00:00, 0)
Reading (t0, fhr) = (2016-12-31 00:00:00, 6)
Reading (t0, fhr) = (2017-12-31 00:00:00, 0)
Reading (t0, fhr) = (2017-12-31 00:00:00, 6)
Reading (t0, fhr) = (2018-12-31 00:00:00, 0)
Reading (t0, fhr) = (2018-12-31 00:00:00, 6)
Reading (t0, fhr) = (2019-12-31 00:00:00, 0)
Reading (t0, fhr) = (2019-12-31 00:00:00, 6)
Reading (t0, fhr) = (2020-12-31 00:00:00, 0)
Reading (t0, fhr) = (2020-12-31 00:00:00, 6)
Reading (t0, fhr) = (2021-12-31 00:00:00, 0)
Reading (t0, fhr) = (2021-12-31 00:00:00, 6)
Reading (t0, fhr) = (2022-12-31 00:00:00, 0)
Reading (t0, fhr) = (2022-12-31 00:00:00, 6)
Reading (t0, fhr) = (2023-12-31 00:00:00, 0)
Reading (t0, fhr) = (2023-12-31 00:00:00, 6)
Reading (t0, fhr) = (2024-12-31 00:00:00, 0)
Reading (t0, fhr) = (2024-12-31 00:00:00, 6)


In [6]:
for t0, fdict in dsdict.items():
    print(f"t0 = {t0}")
    print(f"\t{fdict[0]} \t {fdict[6]}")

t0 = 2015-12-31 00:00:00
	['instant'] 	 ['instant']
t0 = 2016-12-31 00:00:00
	['instant'] 	 ['instant']
t0 = 2017-12-31 00:00:00
	['instant'] 	 ['instant']
t0 = 2018-12-31 00:00:00
	['instant', 'max'] 	 ['instant', 'max']
t0 = 2019-12-31 00:00:00
	['instant', 'max'] 	 ['instant', 'max']
t0 = 2020-12-31 00:00:00
	['instant', 'max'] 	 ['instant', 'max']
t0 = 2021-12-31 00:00:00
	['instant', 'max'] 	 ['instant', 'max']
t0 = 2022-12-31 00:00:00
	['instant', 'max'] 	 ['instant', 'max']
t0 = 2023-12-31 00:00:00
	['instant', 'max'] 	 ['instant', 'max']
t0 = 2024-12-31 00:00:00
	['instant', 'max'] 	 ['instant', 'max']


Ok for now just going to do `instant`

### Now, get the variables

In [7]:
print(typeOfLevel)

atmosphere


In [8]:
vdict = {
    "instant": {},
}
for stepType in vdict.keys():
    vdict[stepType] = {}
    for t0 in hrrr.t0:
        vdict[stepType][t0] = {}
        dslist = []
        varlist = []
        for fhr in hrrr.fhr:
            xds = hrrr.open_grib(
                dims={"t0": t0, "fhr": fhr},
                file_suffix="prs",
                cache_dir="./gribcache",
                filter_by_keys={
                    "typeOfLevel": typeOfLevel,
                    "stepType": stepType,
                },
            )
            vdict[stepType][t0][fhr] = set(xds.data_vars)

Ignoring index file './gribcache/d815141b7bfc22ca98a97856ed62fa1ea8efcc976cae7d76820d2bba9349bc87.5b7b6.idx' older than GRIB file
Ignoring index file './gribcache/0fa0a73761223a93595cf6b74398fd90d86a835df3a796a2826c4e64d93d6365.5b7b6.idx' older than GRIB file
Ignoring index file './gribcache/85efab00e589ff6f9977f0e461f55d00cc4210c96cc8c1206ca9c495f991ca37.5b7b6.idx' older than GRIB file
Ignoring index file './gribcache/c15c8471240d8f68df4b8e55af2f931dfff1a9cf64d2d77dd690f0a114f515e8.5b7b6.idx' older than GRIB file
Ignoring index file './gribcache/017bef1204f0aa5db22aea27f9eccd43f901b470d4697492abd414bea7e17c71.5b7b6.idx' older than GRIB file
Ignoring index file './gribcache/037b21da7a5ae78ef8140227dbfd0571e8957b117f271baaf954c07a56b92038.5b7b6.idx' older than GRIB file
Ignoring index file './gribcache/a1121d5d9d9f6dbf4fe59daf7aaeca5c8b341147d8e9ed5cd49c535fb08e27e7.5b7b6.idx' older than GRIB file
Ignoring index file './gribcache/9269fbe54d5443f04d4d68af4671d69cf85622b3da93e25597628fc49

In [11]:
vdict

{'instant': {Timestamp('2015-12-31 00:00:00'): {np.int64(0): {'refc',
    'tcc',
    'veril'},
   np.int64(6): {'refc', 'tcc', 'veril'}},
  Timestamp('2016-12-31 00:00:00'): {np.int64(0): {'ltng',
    'refc',
    'tcc',
    'unknown',
    'veril'},
   np.int64(6): {'ltng', 'refc', 'tcc', 'unknown', 'veril'}},
  Timestamp('2017-12-31 00:00:00'): {np.int64(0): {'ltng',
    'refc',
    'tcc',
    'unknown',
    'veril'},
   np.int64(6): {'ltng', 'refc', 'tcc', 'unknown', 'veril'}},
  Timestamp('2018-12-31 00:00:00'): {np.int64(0): {'ltng',
    'refc',
    'tcc',
    'unknown',
    'veril'},
   np.int64(6): {'ltng', 'refc', 'tcc', 'unknown', 'veril'}},
  Timestamp('2019-12-31 00:00:00'): {np.int64(0): {'ltng',
    'refc',
    'tcc',
    'unknown',
    'veril'},
   np.int64(6): {'ltng', 'refc', 'tcc', 'unknown', 'veril'}},
  Timestamp('2020-12-31 00:00:00'): {np.int64(0): {'ltng',
    'refc',
    'tcc',
    'unknown',
    'veril'},
   np.int64(6): {'ltng', 'refc', 'tcc', 'unknown', 'veril'}

In [12]:
for stepType, d2 in vdict.items():
    for t0, d3 in d2.items():
        intersect = reduce(set.intersection, [set(x) for x in d3.values()]) 
        if len(d3[0] - intersect) > 0:
            print(f"More in analysis t0 = {t0}, stepType = {stepType}")
        if len(d3[6] - intersect) > 0:
            print(f"More in forecast t0 = {t0}, stepType = {stepType}")

OK, so everything is the same in analysis and forecast

### Get the common variables in each

In [13]:
intersect = {
    key: sorted(reduce(set.intersection, [set(x[0]) for x in vdict[key].values()]))
    for key in vdict.keys()
}

In [14]:
intersect

{'instant': ['refc', 'tcc', 'veril']}

### Get the unique per t0 variables

In [15]:
for stepType, d2 in vdict.items():
    print(f"stepType = {stepType}")
    for t0, d3 in d2.items():
        unique = d3[0] - set(intersect[stepType])
        if len(unique) > 0:
            print(f"\t{t0}")
            print(f"\t\t{unique}")

stepType = instant
	2016-12-31 00:00:00
		{'unknown', 'ltng'}
	2017-12-31 00:00:00
		{'unknown', 'ltng'}
	2018-12-31 00:00:00
		{'unknown', 'ltng'}
	2019-12-31 00:00:00
		{'unknown', 'ltng'}
	2020-12-31 00:00:00
		{'unknown', 'ltng'}
	2021-12-31 00:00:00
		{'unknown', 'ltng'}
	2022-12-31 00:00:00
		{'unknown', 'ltng'}
	2023-12-31 00:00:00
		{'unknown', 'ltng'}
	2024-12-31 00:00:00
		{'unknown', 'ltng'}


In [16]:
intersect

{'instant': ['refc', 'tcc', 'veril']}

Ok so we'll just drop lightning for now. It could be added later if needed, and then we'd need to specify valid time bounds.

### Now, let's open a dataset, get these variables, and write out an updated dict

In [17]:
dsdict = {}
for stepType in intersect.keys():
    xds = hrrr.open_grib(
        dims={"t0": hrrr.t0[0], "fhr": hrrr.fhr[0]},
        file_suffix="prs",
        cache_dir="./gribcache",
        filter_by_keys={
            "typeOfLevel": typeOfLevel,
            "stepType": stepType,
        },
    )
    xds = xds[sorted(intersect[stepType])]
    if "unknown" in xds:
        xds = xds.drop_vars("unknown")
    dsdict[stepType] = xds

Ignoring index file './gribcache/d815141b7bfc22ca98a97856ed62fa1ea8efcc976cae7d76820d2bba9349bc87.5b7b6.idx' older than GRIB file


In [20]:
dsdict["instant"]

### This is not necessary for atmosphere typeOfLevel, but keeping it for notebook flow

In [21]:
newdict = {}
for xds in dsdict.values():
    for varname in sorted(xds.data_vars):
        newdict[varname] = {
            "filter_by_keys": {
                "typeOfLevel": xds[varname].GRIB_typeOfLevel,
                "paramId": xds[varname].GRIB_paramId,
            },
            "long_name": xds[varname].long_name,
            "file_suffixes": ["prs"],
        }
        if xds[varname].GRIB_typeOfLevel == "heightAboveGround":
            newdict[varname]["filter_by_keys"]["level"] = xds[varname].attrs["GRIB_level"]
        elif xds[varname].GRIB_typeOfLevel == "surface":
            newdict[varname]["filter_by_keys"]["stepType"] = xds[varname].attrs["GRIB_stepType"]
        if "original_name" in xds[varname].attrs:
            newdict[varname]["original_name"] = xds[varname].original_name

In [22]:
newdict = {key: newdict[key] for key in sorted(list(newdict.keys()))}

In [23]:
newdict

{'refc': {'filter_by_keys': {'typeOfLevel': 'atmosphere', 'paramId': 260390},
  'long_name': 'Maximum/Composite radar reflectivity',
  'file_suffixes': ['prs']},
 'tcc': {'filter_by_keys': {'typeOfLevel': 'atmosphere', 'paramId': 228164},
  'long_name': 'Total Cloud Cover',
  'file_suffixes': ['prs']},
 'veril': {'filter_by_keys': {'typeOfLevel': 'atmosphere', 'paramId': 260136},
  'long_name': 'Vertically-integrated liquid',
  'file_suffixes': ['prs']}}

In [24]:
import yaml

In [25]:
sources.__path__[0]

'/Users/tsmith/work/ufs2arco/ufs2arco/sources'

In [26]:
with open(f"{sources.__path__[0]}/reference.hrrr.yaml", "r") as f:
    reference = yaml.safe_load(f)

In [27]:
updated = reference.copy()

In [28]:
updated.update(newdict)


In [29]:
updated["lsm"]

{'file_suffixes': ['prs'],
 'filter_by_keys': {'paramId': 172,
  'stepType': 'instant',
  'typeOfLevel': 'surface'},
 'long_name': 'Land-sea mask'}

In [30]:
reference["lsm"]

{'file_suffixes': ['prs'],
 'filter_by_keys': {'paramId': 172,
  'stepType': 'instant',
  'typeOfLevel': 'surface'},
 'long_name': 'Land-sea mask'}

In [31]:
updated = {key: updated[key] for key in sorted(updated.keys())}

In [32]:
with open("reference.hrrr.yaml", "w") as f:
    yaml.dump(updated, f)