In [1]:
%load_ext jupyter_black

In [2]:
import json
from pathlib import Path
from typing import Iterator
from urllib.error import HTTPError

import pandas as pd
import geopandas as gpd
from requests import Session, Response

In [24]:
baseurl = "https://mtarchive.geol.iastate.edu"



def iterurls(res: Response) -> Iterator[str]:
    """generator function that yields a list of json files"""
    yield from res.url + (
        pd.read_html(
            res.content,
            skiprows=[0, 1],
            parse_dates=["Unnamed: 2"],
        )[0]
        .set_index("Unnamed: 2")["Parent Directory"]
        .dropna()
    )


def feature_generator(features: list, valid_time: pd.Timestamp):
    for feat in features:
        props = feat["properties"]
        props["validTime"] = valid_time
        props["geometry"] = feat["geometry"]
        yield props


def get_data(
    start: str, end: str, save_to: Path = Path("/home/leaver2000/sppp/data")
) -> None:
    date_range = pd.date_range(start, end)
    date_nodes = baseurl + date_range.strftime("/%Y/%m/%d/mrms/ncep/ProbSevere/")
    with Session() as session:
        for valid_time, date_node in zip(date_range, date_nodes):
            try:
                r = session.get(date_node)
                r.raise_for_status()
                for fileurl in iterurls(r):
                    r = session.get(fileurl)
                    r.raise_for_status()
                    features = r.json()["features"]

                    df = pd.DataFrame(
                        feature_generator(features, valid_time)
                    ).set_index(["validTime", "ID"])
                    r.url.split("/")[-1][:-4]
                    local_file = save_to / f'{r.url.split("/")[-1][:-4]}.csv'
                    df.to_csv(local_file)


            except HTTPError:
                ...

if __name__ == "__main__":
    get_data("2022-06-01", "2022-06-02")

Index(['https://mtarchive.geol.iastate.edu/2022/06/01/mrms/ncep/ProbSevere/MRMS_PROBSEVERE_20220601_000000.json',
       'https://mtarchive.geol.iastate.edu/2022/06/01/mrms/ncep/ProbSevere/MRMS_PROBSEVERE_20220601_000200.json',
       'https://mtarchive.geol.iastate.edu/2022/06/01/mrms/ncep/ProbSevere/MRMS_PROBSEVERE_20220601_000400.json',
       'https://mtarchive.geol.iastate.edu/2022/06/01/mrms/ncep/ProbSevere/MRMS_PROBSEVERE_20220601_000600.json',
       'https://mtarchive.geol.iastate.edu/2022/06/01/mrms/ncep/ProbSevere/MRMS_PROBSEVERE_20220601_000800.json',
       'https://mtarchive.geol.iastate.edu/2022/06/01/mrms/ncep/ProbSevere/MRMS_PROBSEVERE_20220601_001000.json',
       'https://mtarchive.geol.iastate.edu/2022/06/01/mrms/ncep/ProbSevere/MRMS_PROBSEVERE_20220601_001200.json',
       'https://mtarchive.geol.iastate.edu/2022/06/01/mrms/ncep/ProbSevere/MRMS_PROBSEVERE_20220601_001400.json',
       'https://mtarchive.geol.iastate.edu/2022/06/01/mrms/ncep/ProbSevere/MRMS_PROBSEVE