In [2]:
%load_ext jupyter_black

import pandas as pd
import xarray as xr
import requests
import re

URLS = {
    "DATA.NOAA": "https://data.noaa.gov/onestop/",
    "NCEI.NOAA": "https://www.ncei.noaa.gov/products/",
    "NOMADS.NCEP": "https://nomads.ncep.noaa.gov/pub/",
    # realtime
    "MRMS.NCEP": "https://mrms.ncep.noaa.gov/data",
    # archive
    "MRMS.ARGON": "https://mrms.agron.iastate.edu/",
}

In [38]:
from typing import Mapping

BASE_INDEX = pd.DataFrame(
    [
        {
            "variables": {"{0}_P0_L100_GLL0".format(x): x for x in ("TMP", "RH", "UGRD", "VGRD", "HGT")},
            "coordinates": {
                "lv_ISBL0": "hPa",
                "lat_0": "lat",
                "lon_0": "lon",
            },
            "model": "GALWEM",
        },
        {
            "variables": {"{0}_P0_L100_GLL0".format(x): x for x in ("TMP", "RH", "UGRD", "VGRD", "HGT")},
            "coordinates": {
                "lv_ISBL0": "hPa",
                "lat_0": "lat",
                "lon_0": "lon",
            },  # {x: x.strip("_0") for x in [("lv_ISBL0"), ("lat_0"), "lon_0"]},
            "model": "HRRR",
        },
    ],
).set_index("model")


class GribIndex:
    def __init__(self, model: str) -> None:
        self._base: pd.Series[dict[str, str]] = BASE_INDEX.loc[model]

    @property
    def variables(self) -> list[str]:
        return list(self._base["variables"].keys())

    @property
    def coordinates(self) -> list[str]:
        return list(self._base["coordinates"].keys())

    def rename(self) -> Mapping[str, str]:
        return self._base["coordinates"] | self._base["variables"]


index = GribIndex("GALWEM")
index.variables

['TMP_P0_L100_GLL0',
 'RH_P0_L100_GLL0',
 'UGRD_P0_L100_GLL0',
 'VGRD_P0_L100_GLL0',
 'HGT_P0_L100_GLL0']

In [39]:
import re


def load_dataset(filepath: str, grib_index: GribIndex = None):
    mr, fh = re.search("\d*\.\d{4}$", filepath).group().split(".")
    validtime = pd.to_datetime(mr, format="%Y%m%d%H") + pd.to_timedelta(int(fh), unit="H")
    ds: xr.Dataset = xr.open_dataset(filepath, engine="pynio")
    if grib_index is not None:
        ds = ds[grib_index.variables].rename(grib_index.rename())
    return ds.expand_dims({"validTime": [validtime.value]})


ds = load_dataset("data/557ww/GLOBAL.grib2.2022053000.0000", grib_index=index)
ds

In [40]:
ds.to_dataframe().set_index("HGT", append=True).reorder_levels(["validTime", "HGT", "hPa", "lat", "lon"])

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,TMP,RH,UGRD,VGRD
validTime,HGT,hPa,lat,lon,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1653868800000000000,11537.602539,20000.0,90.0,0.0,231.578232,1.200000,15.380625,7.391882
1653868800000000000,11537.602539,20000.0,90.0,0.5,231.578232,1.200000,15.380625,6.591882
1653868800000000000,11537.602539,20000.0,90.0,1.0,231.578232,1.200000,15.380625,6.591882
1653868800000000000,11537.602539,20000.0,90.0,1.5,231.578232,1.200000,15.380625,6.591882
1653868800000000000,11537.602539,20000.0,90.0,2.0,231.578232,1.200000,15.380625,6.591882
1653868800000000000,...,...,...,...,...,...,...,...
1653868800000000000,282.332092,100000.0,-90.0,357.5,240.699142,97.962502,-3.034274,-2.483182
1653868800000000000,282.332092,100000.0,-90.0,358.0,240.699142,97.962502,-3.034274,-2.483182
1653868800000000000,282.332092,100000.0,-90.0,358.5,240.699142,97.962502,-3.034274,-2.483182
1653868800000000000,282.332092,100000.0,-90.0,359.0,240.699142,97.962502,-3.034274,-2.483182


In [6]:
import pandas as pd

URL_TEMPLATE = (
    URLS["NOMADS.NCEP"]
    + "data/nccf/com/557ww/prod/557ww.{year:04}{month:02d}{day:02d}/GLOBAL.grib2.{year:04}{month:02d}{day:02d}{hour:02d}.{forecast_hour:04d}"
)


def build_url(model_run: str, forecast_hour: int = 0):
    run = pd.to_datetime(model_run)
    return URL_TEMPLATE.format(
        **{x: getattr(run, x) for x in ("year", "month", "day", "hour")}, forecast_hour=forecast_hour
    )


url = build_url("2022-05-30T00:00", forecast_hour=6)
url

'https://nomads.ncep.noaa.gov/pub/data/nccf/com/557ww/prod/557ww.20220530/GLOBAL.grib2.2022053000.0006'

In [19]:
from wxpy import download_file
f = download_file(url)
f

KeyboardInterrupt: 

In [15]:
from wxpy import download_file
url ="https://nomads.ncep.noaa.gov/pub/data/nccf/com/hrrr/v4.1/hrrr.20220530/conus/hrrr.t00z.wrfnatf01.grib2"
download_file(url)


KeyboardInterrupt: 

In [13]:
import sys

for i in range(5):
    print()

["b'",
 '!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">\\n',
 'html>\\n ',
 'head>\\n  ',
 'title>Index of /pub/data',
 '/title>\\n ',
 'script>NS_CSM_td=1413533186;NS_CSM_pd=275116669;NS_CSM_u="/clm10";NS_CSM_col="Logstream";',
 '/script>',
 'script type="text/javascript">function sendTimingInfoInit(){setTimeout(sendTimingInfo,0)}function sendTimingInfo(){var wp=window.performance;if(wp){var c1,c2,t;c1=wp.timing;if(c1){var cm={};cm.ns=c1.navigationStart;if((t=c1.unloadEventStart)>0)cm.us=t;if((t=c1.unloadEventEnd)>0)cm.ue=t;if((t=c1.redirectStart)>0)cm.rs=t;if((t=c1.redirectEnd)>0)cm.re=t;cm.fs=c1.fetchStart;cm.dls=c1.domainLookupStart;cm.dle=c1.domainLookupEnd;cm.cs=c1.connectStart;cm.ce=c1.connectEnd;if((t=c1.secureConnectionStart)>0)cm.scs=t;cm.rqs=c1.requestStart;cm.rss=c1.responseStart;cm.rse=c1.responseEnd;cm.dl=c1.domLoading;cm.di=c1.domInteractive;cm.dcls=c1.domContentLoadedEventStart;cm.dcle=c1.domContentLoadedEventEnd;cm.dc=c1.domComplete;if((t=c1.loadEventStart)>0)c

In [29]:
def dataset_from_url(url) -> xr.Dataset:
    file_path = "tmp/" + re.search(r"\d*\.\d{4}$", url).group()
    r = requests.get(url)
    ds = None
    if r.status_code == 200:
        print("writing file")
        with open(file_path, "rb") as f:
            f.write(r.content)
        ds: xr.Dataset = xr.load_dataset(file_path, engine="pynio")
    else:
        print("bad status code")
    return ds


ds = dataset_from_url(url)
ds

KeyboardInterrupt: 

In [None]:
var_template = "{0}_P0_L100_GLL0"
coordinates = ["lv_ISBL0", "lat_0", "lon_0"]
variables = {var_template.format(x): x for x in ("TMP", "RH", "UGRD", "VGRD", "HGT")}  # HGT_P0_L100_GLL0
ds[variables.keys()].rename(variables | {"lv_ISBL0": "hPa", "lat_0": "lat", "lon_0": "lon"}).to_dataframe()