In [1]:
%load_ext jupyter_black

In [18]:
import json
import shutil
import warnings
from typing import Union
from pathlib import Path
from datetime import datetime

import pandas as pd
from requests import Session, HTTPError


HRRR_ALASKA_URL_TEMPLATE = (
    "https://storage.googleapis.com/high-resolution-rapid-refresh/hrrr.%Y%m%d/alaska/hrrr.t00z.wrfnatf%H.ak.grib2"
)
TimeLike = Union[datetime, str, pd.Timestamp]

In [19]:
def download_archive_data(
    start: TimeLike,
    end: TimeLike,
    basedir: Path,
    freq: str = "h",
) -> None:
    if not basedir.exists():
        basedir.mkdir()
    # create a DatetimeIndex using the the function arguments and format the urls using the url template
    urls = pd.date_range(start=start, end=end, freq=freq).strftime(HRRR_ALASKA_URL_TEMPLATE)
    msg = f"you are about to atempt to download {len(urls)} grib files, would you like to continue?(y/yes)"
    # adding a user input to verifiy the large download
    user_response = input(msg).lower()
    if user_response not in ("yes", "y"):
        return None

    with Session() as session:
        # iterating over all of the urls
        for url in urls:
            save_to = basedir / ".".join(url.replace("hrrr.", "").split("/")[-3:])
            # using a try/catch block in the event the download fails
            try:
                # make a http get request to the url
                r = session.get(url, stream=True)
                # on non 200 status code raise HTTPError
                r.raise_for_status()
                # save the file to the directory
                with save_to.open("wb") as fileout:
                    shutil.copyfileobj(r.raw, fileout)
                print("grib2 file saved at ", save_to)

            except HTTPError:
                warnings.warn(f"Warning: failed to download {url}")
            # try:
            #     # with our session make a get request, r is a response object
            #     r = session.get(url, stream=True)
            #     # in the event of a non 200 status code we'll raise a HTTPError and trigger the except block
            #     r.raise_for_status()
            #     # splitting the response.url at each / to get the name of the file and creating a new Path object
            #     outfile = basedir / r.url.split("/")[-1]
            #     # using the path object in the write mode
            #     with outfile.open("w") as f:
            #         # and saving the fileout
            #         json.dump(r.json(), f, indent=4)

            # except (ConnectionError, HTTPError):
            #     print("error downloading", url)


outdir = Path("/workspaces/griblib/notebooks/alaska/demo")

download_archive_data(
    start="2022-07-20T01:00:00Z",
    end="2022-07-20T01:00:00Z",
    basedir=outdir,
)

grib2 file saved at  /workspaces/griblib/notebooks/alaska/demo/20220720.alaska.t00z.wrfnatf01.ak.grib2
