In [182]:
import io
import os
import subprocess
from datetime import datetime, timezone

import dateparser
import numpy as np
import numpy.typing as npt
import pandas as pd
import pygrib
import requests
import xarray as xr

"CMC_hrdps_domain_Variable_LevelType_level_ps2.5km_YYYYMMDDHH_Phhh-mm.grib2"  
"CMC_hrdps_continental_SNOD_SFC_0_ps2.5km_2022121900_P000-00.grib2 "  
"https://dd.weather.gc.ca/model_hrdps/continental/grib2/00/000/CMC_hrdps_continental_SNOD_SFC_0_ps2.5km_2022121900_P000-00.grib2"  

In [185]:
def create_urls() -> list[list[str]]:
    base_url = "https://dd.weather.gc.ca/model_hrdps/continental/grib2/"
    model_runs = [f"{(i):0>2}" for i in range(0, 24, 6)]
    forecast_hours = [f"{(i):0>3}" for i in range(49)]
    prefix = "CMC"
    model = "hrdps"
    domain = "continental"
    variable = "SNOD"
    level_type = "SFC"
    level = "0"
    resolution = "ps2.5km"
    date = datetime.now(timezone.utc).strftime("%Y%m%d")
    minutes = "00"
    extension = "grib2"

    model_run_urls = []
    for model_run in model_runs:
        prediction_urls = []
        for forecast_hour in forecast_hours:
            filename = f"{prefix}_{model}_{domain}_{variable}_{level_type}_{level}_{resolution}_{date}{model_run}_P{forecast_hour}-{minutes}.{extension}"
            download_url = f"{base_url}{model_run}/{forecast_hour}/{filename}"
            prediction_urls.append(download_url)
        model_run_urls.append(prediction_urls)
    
    return model_run_urls

In [186]:
model_run_urls = create_urls()

In [187]:
def find_latest_run(model_run_urls: list[list[str]]) -> int:
    """Find the latest model run by issuing a HEAD http request to the first url of each model run and comparing the "Last-Modified" field."""
    latest_time = datetime.fromisoformat("0001-01-01 00:00:00.000+00:00")
    latest_idx = 0
    for idx, prediction_urls in enumerate(model_run_urls):
        res = requests.head(prediction_urls[0])
        if not res.ok:
            continue

        modified_date = dateparser.parse(res.headers["Last-Modified"])
        if not modified_date:
            continue

        if modified_date > latest_time:
            latest_time = modified_date
            latest_idx = idx

    return latest_idx


latest_id = find_latest_run(model_run_urls)

In [195]:
from requests.adapters import HTTPAdapter
from requests.packages.urllib3.util.retry import Retry

def download_predictions(model_run_urls: list[list[str]], model_idx:int = 0, savepath:str = "./") -> list[str]:
    filepaths = []

    retry_strategy = Retry(
        total=3,
        backoff_factor=0.3,
        status_forcelist=[404, 429, 500, 502, 503, 504],
        method_whitelist=["HEAD", "GET", "OPTIONS"]
    )
    adapter = HTTPAdapter(max_retries=retry_strategy)
    http = requests.Session()
    http.mount("https://", adapter)
    http.mount("http://", adapter)

    for prediction_url in model_run_urls[model_idx]:
        filename = prediction_url.split("/")[-1]
        print(f"Processing {prediction_url}")
        res = http.get(prediction_url)
        filepath = os.path.join(savepath, filename)
        with open(filepath, "wb") as f:
            f.write(res.content)
        filepaths.append(filepath)
    return filepaths

paths = download_predictions(model_run_urls, latest_id, "../data/")

  retry_strategy = Retry(


Processing https://dd.weather.gc.ca/model_hrdps/continental/grib2/00/000/CMC_hrdps_continental_SNOD_SFC_0_ps2.5km_2022122000_P000-00.grib2
Processing https://dd.weather.gc.ca/model_hrdps/continental/grib2/00/001/CMC_hrdps_continental_SNOD_SFC_0_ps2.5km_2022122000_P001-00.grib2
Processing https://dd.weather.gc.ca/model_hrdps/continental/grib2/00/002/CMC_hrdps_continental_SNOD_SFC_0_ps2.5km_2022122000_P002-00.grib2
Processing https://dd.weather.gc.ca/model_hrdps/continental/grib2/00/003/CMC_hrdps_continental_SNOD_SFC_0_ps2.5km_2022122000_P003-00.grib2
Processing https://dd.weather.gc.ca/model_hrdps/continental/grib2/00/004/CMC_hrdps_continental_SNOD_SFC_0_ps2.5km_2022122000_P004-00.grib2
Processing https://dd.weather.gc.ca/model_hrdps/continental/grib2/00/005/CMC_hrdps_continental_SNOD_SFC_0_ps2.5km_2022122000_P005-00.grib2
Processing https://dd.weather.gc.ca/model_hrdps/continental/grib2/00/006/CMC_hrdps_continental_SNOD_SFC_0_ps2.5km_2022122000_P006-00.grib2
Processing https://dd.weath

FileNotFoundError: [Errno 2] No such file or directory: '../data/CMC_hrdps_continental_SNOD_SFC_0_ps2.5km_2022122000_P025-00.grib2'

In [189]:
def download_predictions(model_run_urls: list[list[str]], model_idx:int = 0, savepath:str = "./") -> list[str]:
    filepaths = []
    for prediction_url in model_run_urls[model_idx]:
        filename = prediction_url.split("/")[-1]
        print(f"Processing {prediction_url}")
        res = requests.get(prediction_url)
        filepath = os.path.join(savepath, filename)
        with open(filepath, "wb") as f:
            f.write(res.content)
        filepaths.append(filepath)
    return filepaths

paths = download_predictions(model_run_urls, latest_id, "../data/")

Processing https://dd.weather.gc.ca/model_hrdps/continental/grib2/00/000/CMC_hrdps_continental_SNOD_SFC_0_ps2.5km_2022122000_P000-00.grib2
Processing https://dd.weather.gc.ca/model_hrdps/continental/grib2/00/001/CMC_hrdps_continental_SNOD_SFC_0_ps2.5km_2022122000_P001-00.grib2
Processing https://dd.weather.gc.ca/model_hrdps/continental/grib2/00/002/CMC_hrdps_continental_SNOD_SFC_0_ps2.5km_2022122000_P002-00.grib2
Processing https://dd.weather.gc.ca/model_hrdps/continental/grib2/00/003/CMC_hrdps_continental_SNOD_SFC_0_ps2.5km_2022122000_P003-00.grib2
Processing https://dd.weather.gc.ca/model_hrdps/continental/grib2/00/004/CMC_hrdps_continental_SNOD_SFC_0_ps2.5km_2022122000_P004-00.grib2
Processing https://dd.weather.gc.ca/model_hrdps/continental/grib2/00/005/CMC_hrdps_continental_SNOD_SFC_0_ps2.5km_2022122000_P005-00.grib2
Processing https://dd.weather.gc.ca/model_hrdps/continental/grib2/00/006/CMC_hrdps_continental_SNOD_SFC_0_ps2.5km_2022122000_P006-00.grib2
Processing https://dd.weath

In [191]:
paths

['../data/CMC_hrdps_continental_SNOD_SFC_0_ps2.5km_2022122000_P000-00.grib2',
 '../data/CMC_hrdps_continental_SNOD_SFC_0_ps2.5km_2022122000_P001-00.grib2',
 '../data/CMC_hrdps_continental_SNOD_SFC_0_ps2.5km_2022122000_P002-00.grib2',
 '../data/CMC_hrdps_continental_SNOD_SFC_0_ps2.5km_2022122000_P003-00.grib2',
 '../data/CMC_hrdps_continental_SNOD_SFC_0_ps2.5km_2022122000_P004-00.grib2',
 '../data/CMC_hrdps_continental_SNOD_SFC_0_ps2.5km_2022122000_P005-00.grib2',
 '../data/CMC_hrdps_continental_SNOD_SFC_0_ps2.5km_2022122000_P006-00.grib2',
 '../data/CMC_hrdps_continental_SNOD_SFC_0_ps2.5km_2022122000_P007-00.grib2',
 '../data/CMC_hrdps_continental_SNOD_SFC_0_ps2.5km_2022122000_P008-00.grib2',
 '../data/CMC_hrdps_continental_SNOD_SFC_0_ps2.5km_2022122000_P009-00.grib2',
 '../data/CMC_hrdps_continental_SNOD_SFC_0_ps2.5km_2022122000_P010-00.grib2',
 '../data/CMC_hrdps_continental_SNOD_SFC_0_ps2.5km_2022122000_P011-00.grib2',
 '../data/CMC_hrdps_continental_SNOD_SFC_0_ps2.5km_2022122000_P0

In [192]:
# %%timeit
def read_grib_predictions(filepaths:list[str]) -> list[npt.NDArray]:
    predictions:list[npt.NDArray] = []
    for path in filepaths:
        print(f"Reading {path}")
        gribs = pygrib.open(path)
        data = np.ma.filled(gribs[1].values, 0)
        predictions.append(data.reshape(-1))
    return predictions

data = read_grib_predictions(paths)

Reading ../data/CMC_hrdps_continental_SNOD_SFC_0_ps2.5km_2022122000_P000-00.grib2
Reading ../data/CMC_hrdps_continental_SNOD_SFC_0_ps2.5km_2022122000_P001-00.grib2
Reading ../data/CMC_hrdps_continental_SNOD_SFC_0_ps2.5km_2022122000_P002-00.grib2
Reading ../data/CMC_hrdps_continental_SNOD_SFC_0_ps2.5km_2022122000_P003-00.grib2
Reading ../data/CMC_hrdps_continental_SNOD_SFC_0_ps2.5km_2022122000_P004-00.grib2
Reading ../data/CMC_hrdps_continental_SNOD_SFC_0_ps2.5km_2022122000_P005-00.grib2
Reading ../data/CMC_hrdps_continental_SNOD_SFC_0_ps2.5km_2022122000_P006-00.grib2
Reading ../data/CMC_hrdps_continental_SNOD_SFC_0_ps2.5km_2022122000_P007-00.grib2
Reading ../data/CMC_hrdps_continental_SNOD_SFC_0_ps2.5km_2022122000_P008-00.grib2
Reading ../data/CMC_hrdps_continental_SNOD_SFC_0_ps2.5km_2022122000_P009-00.grib2
Reading ../data/CMC_hrdps_continental_SNOD_SFC_0_ps2.5km_2022122000_P010-00.grib2
Reading ../data/CMC_hrdps_continental_SNOD_SFC_0_ps2.5km_2022122000_P011-00.grib2
Reading ../data/

OSError: not that many messages in file

In [154]:
%%timeit
ds = xr.open_dataset(
    "../data/CMC_hrdps_continental_SNOD_SFC_0_ps2.5km_2022121918_P001-00.grib2",
    engine="cfgrib",
    indexpath="",
).fillna(0).to_dataframe()
ds

2.67 s ± 85 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [167]:
%%timeit
cmd = ["grib_dump", "../data/CMC_hrdps_continental_SNOD_SFC_0_ps2.5km_2022121918_P002-00.grib2"]
proc = subprocess.run(cmd, capture_output=True, text=True)

1.3 s ± 31.7 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
