In [1]:
import numpy as np
import matplotlib.pyplot as plt
import xarray as xr

import subprocess
from functools import reduce

In [2]:
from ufs2arco import sources

In [3]:
import pandas as pd

In [74]:
# This was the original, now I find the point where prmsl started being mslma
#hrrr = sources.AWSHRRRArchive(
#    t0={"start": "2014-12-31T00", "end": "2024-12-31T00", "freq": "1YE"},
#    fhr={"start": 0, "end": 6, "step": 6},
#)
hrrr = sources.AWSHRRRArchive(
    t0={"start": "2016-08-22T15", "end": "2016-08-22T18", "freq": "1h"},
    fhr={"start": 0, "end": 6, "step": 6},
)

### First, figure out stepTypes available

In [75]:
typeOfLevel = "meanSea"

In [76]:
dsdict = {}
for t0 in hrrr.t0:
    dsdict[t0] = {}

    for fhr in hrrr.fhr:
        print(f"Reading (t0, fhr) = ({str(t0)}, {int(fhr)})")
        a = hrrr._open_local(
            dims={"t0": t0, "fhr": fhr},
            file_suffix="prs",
            cache_dir="./gribcache",
        )
        output = subprocess.check_output(
            ["grib_ls", "-p", "typeOfLevel,stepType", a],
            stderr=subprocess.DEVNULL
        ).decode()

        step_types = []
        for line in output.splitlines():
            parts = line.strip().split()
            if len(parts) >= 2:
                type_of_level, step_type = parts[-2], parts[-1]
                if type_of_level == typeOfLevel:
                    step_types.append(step_type)
        dsdict[t0][fhr] = sorted(set(step_types))
        #dsdict[t0][fhr] = {
        #    stepType: hrrr.open_grib_level(a, typeOfLevel=typeOfLevel, stepType=stepType)
        #    for steptype in sorted(step_types)
        #}

Reading (t0, fhr) = (2016-08-22 15:00:00, 0)
Reading (t0, fhr) = (2016-08-22 15:00:00, 6)
Reading (t0, fhr) = (2016-08-22 16:00:00, 0)
Reading (t0, fhr) = (2016-08-22 16:00:00, 6)
Reading (t0, fhr) = (2016-08-22 17:00:00, 0)
Reading (t0, fhr) = (2016-08-22 17:00:00, 6)
Reading (t0, fhr) = (2016-08-22 18:00:00, 0)
Reading (t0, fhr) = (2016-08-22 18:00:00, 6)


In [77]:
for t0, fdict in dsdict.items():
    print(f"t0 = {t0}")
    print(f"\t{fdict[0]} \t {fdict[6]}")

t0 = 2016-08-22 15:00:00
	['instant'] 	 ['instant']
t0 = 2016-08-22 16:00:00
	['instant'] 	 ['instant']
t0 = 2016-08-22 17:00:00
	['instant'] 	 ['instant']
t0 = 2016-08-22 18:00:00
	['instant'] 	 ['instant']


Ok it's all `instant`

### Now, get the variables

In [78]:
print(typeOfLevel)

meanSea


In [79]:
vdict = {
    "instant": {},
}
for stepType in vdict.keys():
    vdict[stepType] = {}
    for t0 in hrrr.t0:
        vdict[stepType][t0] = {}
        dslist = []
        varlist = []
        for fhr in hrrr.fhr:
            xds = hrrr.open_grib(
                dims={"t0": t0, "fhr": fhr},
                file_suffix="prs",
                cache_dir="./gribcache",
                filter_by_keys={
                    "typeOfLevel": typeOfLevel,
                    "stepType": stepType,
                },
            )
            vdict[stepType][t0][fhr] = set(xds.data_vars)

In [80]:
vdict

{'instant': {Timestamp('2016-08-22 15:00:00'): {np.int64(0): {'prmsl'},
   np.int64(6): {'prmsl'}},
  Timestamp('2016-08-22 16:00:00'): {np.int64(0): {'prmsl'},
   np.int64(6): {'prmsl'}},
  Timestamp('2016-08-22 17:00:00'): {np.int64(0): {'prmsl'},
   np.int64(6): {'prmsl'}},
  Timestamp('2016-08-22 18:00:00'): {np.int64(0): {'mslma'},
   np.int64(6): {'mslma'}}}}

In [81]:
for stepType, d2 in vdict.items():
    for t0, d3 in d2.items():
        intersect = reduce(set.intersection, [set(x) for x in d3.values()]) 
        if len(d3[0] - intersect) > 0:
            print(f"More in analysis t0 = {t0}, stepType = {stepType}")
        if len(d3[6] - intersect) > 0:
            print(f"More in forecast t0 = {t0}, stepType = {stepType}")

OK, so everything is the same in analysis and forecast

### Get the common variables in each

In [82]:
intersect = {
    key: sorted(reduce(set.intersection, [set(x[0]) for x in vdict[key].values()]))
    for key in vdict.keys()
}

In [83]:
intersect


{'instant': []}

:( 

So... it looks like there are no common variables here.
However, if we look at [this table](https://rapidrefresh.noaa.gov/hrrr/GRIB2Table_hrrrncep_2d.txt), posted in 2014, it looks like they both use the MAPS reduction technique.

So we should just be able to rename prmsl to mslma and we'll force rename prmsl to mslma

In [84]:
intersect = {"instant": ["mslma"]}

### Get the unique per t0 variables

In [85]:
for stepType, d2 in vdict.items():
    print(f"stepType = {stepType}")
    for t0, d3 in d2.items():
        unique = d3[0] - set(intersect[stepType])
        if len(unique) > 0:
            print(f"\t{t0}")
            print(f"\t\t{unique}")

stepType = instant
	2016-08-22 15:00:00
		{'prmsl'}
	2016-08-22 16:00:00
		{'prmsl'}
	2016-08-22 17:00:00
		{'prmsl'}


### Now, let's open a dataset, get these variables, and write out an updated dict

In [103]:
dsdict = {}
for stepType in intersect.keys():
    xds = hrrr.open_grib(
        dims={"t0": hrrr.t0[0], "fhr": hrrr.fhr[0]},
        file_suffix="prs",
        cache_dir="./gribcache",
        filter_by_keys={
            "typeOfLevel": typeOfLevel,
            "stepType": stepType,
        },
    )
    if "prmsl" in xds.data_vars:
        xds["prmsl"].attrs.update(
            {
                "GRIB_paramId": 260323,
                "GRIB_shortName": "mslma",
                "GRIB_cfVarName": "mslma",
                "GRIB_name": "MSLP (MAPS System Reduction)",
                "long_name": "MSLP (MAPS System Reduction)",
            },
        )
        xds = xds.rename({"prmsl": "mslma"})
    
    #xds = xds[sorted(intersect[stepType])]
    if "unknown" in xds:
        xds = xds.drop_vars("unknown")
    dsdict[stepType] = xds

In [104]:
xds

### This is not necessary for atmosphere typeOfLevel, but keeping it for notebook flow

In [105]:
newdict = {}
for xds in dsdict.values():
    for varname in sorted(xds.data_vars):
        newdict[varname] = {
            "filter_by_keys": {
                "typeOfLevel": xds[varname].GRIB_typeOfLevel,
                "paramId": xds[varname].GRIB_paramId,
            },
            "long_name": xds[varname].long_name,
            "file_suffixes": ["prs"],
            "alternative_name": "prmsl",
            "time_bounds": ["2016-08-22T18", None],
        }
        if xds[varname].GRIB_typeOfLevel == "heightAboveGround":
            newdict[varname]["filter_by_keys"]["level"] = xds[varname].attrs["GRIB_level"]
        elif xds[varname].GRIB_typeOfLevel == "surface":
            newdict[varname]["filter_by_keys"]["stepType"] = xds[varname].attrs["GRIB_stepType"]
        if "original_name" in xds[varname].attrs:
            newdict[varname]["original_name"] = xds[varname].original_name
newdict["prmsl"] = {
    "filter_by_keys": {
        "typeOfLevel": typeOfLevel,
        "paramId": 260074,
    },
    "long_name": "Pressure reduced to MSL",
    "file_suffixes": ["prs"],
    "time_bounds": [None, "2016-08-22T17"],
}

In [106]:
newdict = {key: newdict[key] for key in sorted(list(newdict.keys()))}

In [107]:
newdict

{'mslma': {'filter_by_keys': {'typeOfLevel': 'meanSea', 'paramId': 260323},
  'long_name': 'MSLP (MAPS System Reduction)',
  'file_suffixes': ['prs'],
  'alternative_name': 'prmsl',
  'time_bounds': ['2016-08-22T18', None]},
 'prmsl': {'filter_by_keys': {'typeOfLevel': 'meanSea', 'paramId': 260074},
  'long_name': 'Pressure reduced to MSL',
  'file_suffixes': ['prs'],
  'time_bounds': [None, '2016-08-22T17']}}

In [108]:
import yaml

In [109]:
sources.__path__[0]

'/Users/tsmith/work/ufs2arco/ufs2arco/sources'

In [110]:
with open(f"{sources.__path__[0]}/reference.hrrr.yaml", "r") as f:
    reference = yaml.safe_load(f)

In [111]:
updated = reference.copy()

In [112]:
updated.update(newdict)


In [113]:
updated = {key: updated[key] for key in sorted(updated.keys())}

In [114]:
with open("reference.hrrr.yaml", "w") as f:
    yaml.dump(updated, f)