In [34]:
%load_ext jupyter_black


import pandas as pd
def process_extract(name: str, limit: int):
    data = {
        "Remote Access Migration": [
            ("Remote Access Migration", 100),
            ("Migrate Application Access", 86),
            ("Migration of Z75", 86),
            ("Network Access", 86),
            ("Remote Control Access Hardening", 86),
        ],
        "Some Other Junk": [
            ("Blah1", 10),
            ("Blah2", 10),
            ("Blah3", 10),
            ("Blah4", 10),
            ("Blah5", 10),
            ("Blah6", 10),
            ("Blah7", 10),
            ("Blah8", 10),
        ],
    }
    return data[name][:limit]


ours_list = ["Remote Access Migration", "Some Other Junk"]


pd.DataFrame([dict(process_extract(name, limit=5)) for name in ours_list], index=ours_list).T


The jupyter_black extension is already loaded. To reload it, use:
  %reload_ext jupyter_black


Unnamed: 0,Remote Access Migration,Some Other Junk
Remote Access Migration,100.0,
Migrate Application Access,86.0,
Migration of Z75,86.0,
Network Access,86.0,
Remote Control Access Hardening,86.0,
Blah1,,10.0
Blah2,,10.0
Blah3,,10.0
Blah4,,10.0
Blah5,,10.0


In [20]:
import gzip
import shutil
from pathlib import Path
from typing import Iterable, Literal
from datetime import datetime, timedelta
from urllib3.response import HTTPResponse

import pandas as pd
import xarray as xr
from requests import Session


def make_urls(
    start: datetime,
    stop: int,
    item: Literal[
        "FLASH",
        "MESH",
        "MESH_Max_1440min",
        "MultiSensor_QPE_01H_Pass2",
        "MultiSensor_QPE_24H_Pass2",
        "MultiSensor_QPE_72H_Pass2",
        "PrecipFlag",
        "PrecipRate",
        # "ProbSevere",
        "RadarOnly_QPE_01H",
        "RadarOnly_QPE_24H",
        "RadarOnly_QPE_72H",
        "RadarQualityIndex",
        "RotationTrack1440min",
        "SeamlessHSR",
    ] = "RadarOnly_QPE_01H",
) -> Iterable[str]:

    url = "https://mtarchive.geol.iastate.edu/%Y/%m/%d/mrms/ncep/"
    url += f"{item}/{item}_00.00_%Y%m%d-00%H%M.grib2.gz"
    end = start + timedelta(hours=stop)

    yield from (pd.date_range(start, end, freq="2h").strftime(url))


def gzip_request(file_path: Path, raw: HTTPResponse) -> None:
    with gzip.open(raw, mode="rb") as f_in:
        with file_path.open(mode="wb") as f_out:
            shutil.copyfileobj(f_in, f_out)


def mrms_dataset_from_urls(urls: Iterable[str], path: Path = Path("/tmp/dataset"), remove_tmp:bool=True) -> xr.Dataset:
    if not path.exists():
        path.mkdir()

    def generate():
        with Session() as session:
            for i, url in enumerate(urls):
                file_name = path / f"outfile-{i}.grib2"
                res = session.get(url, stream=True)
                res.raise_for_status()
                gzip_request(file_name, res.raw)
                yield file_name

    data = xr.open_mfdataset(
        generate(),
        engine="cfgrib",
        combine="nested",
        concat_dim="valid_time",
    )
    if remove_tmp:
        shutil.rmtree(path)

    return data


if __name__ == "__main__":
    urls = make_urls(datetime(2022, 6, 15, 0), 3)
    ds = mrms_dataset_from_urls(urls)
ds

Unnamed: 0,Array,Chunk
Bytes,186.92 MiB,93.46 MiB
Shape,"(2, 3500, 7000)","(1, 3500, 7000)"
Count,8 Tasks,2 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 186.92 MiB 93.46 MiB Shape (2, 3500, 7000) (1, 3500, 7000) Count 8 Tasks 2 Chunks Type float32 numpy.ndarray",7000  3500  2,

Unnamed: 0,Array,Chunk
Bytes,186.92 MiB,93.46 MiB
Shape,"(2, 3500, 7000)","(1, 3500, 7000)"
Count,8 Tasks,2 Chunks
Type,float32,numpy.ndarray
