In [2]:
%load_ext jupyter_black

# downloading archive data

In [15]:
import json
import shutil
from typing import Union
from pathlib import Path
from datetime import datetime

import pandas as pd
from requests import Session, HTTPError

PROBSEVERE_URL_TEMPLATE = (
    "https://mtarchive.geol.iastate.edu/%Y/%m/%d/mrms/ncep/ProbSevere/MRMS_PROBSEVERE_%Y%m%d_%H%M00.json"
)
TimeLike = Union[datetime, str, pd.Timestamp]

In [17]:
def download_archive_data(
    start: TimeLike,
    end: TimeLike,
    basedir: Path,
    freq: str = "2min",
    delete_when_done: bool = False,
) -> None:
    """scrape data from the iastate archive over a daterange"""
    # in the event the directory doesnt exsit, make it
    if not basedir.exists():
        basedir.mkdir()
    # create a DatetimeIndex using the the function arguments and format the urls using the url template
    urls = pd.date_range(start=start, end=end, freq=freq).strftime(PROBSEVERE_URL_TEMPLATE)
    # using Session as a context manager
    with Session() as session:
        # iterating over all of the urls
        for url in urls:
            # using a try/catch block in the event the download fails
            try:
                # with our session make a get request, r is a response object
                r = session.get(url, stream=True)
                # in the event of a non 200 status code we'll raise a HTTPError and trigger the except block
                r.raise_for_status()
                # splitting the response.url at each / to get the name of the file and creating a new Path object
                outfile = basedir / r.url.split("/")[-1]
                # using the path object in the write mode
                with outfile.open("w") as f:
                    # and saving the fileout
                    json.dump(r.json(), f, indent=4)

            except (ConnectionError, HTTPError):
                print("error downloading", url)


outdir = Path("/workspaces/griblib/notebooks/probsevere/demo")

download_archive_data(
    start="2022-07-20T00:00:00Z",
    end="2022-07-20T00:10:00Z",
    basedir=outdir,
)