In [None]:
import s3fs
import xarray as xr
import pandas as pd
import os

HRRR_DATA_PATH = "../data/raw/hrrr/"

In [2]:
files = os.listdir(HRRR_DATA_PATH)

In [None]:
fs = s3fs.S3FileSystem(anon=True)


def lookup(path):
    return s3fs.S3Map(path, s3=s3)

In [None]:
dates = pd.date_range(start="2022-01-01", end="2024-08-30")
for date in dates:
    year = date.year
    month = date.month
    if month < 10:
        month = f"0{month}"
    day = date.day
    if day < 10:
        day = f"0{day}"
    if f"hrrr_{year}{month}{day}_00z.nc" in files:
        continue

    url1 = f"s3://hrrrzarr/sfc/{year}{month}{day}/{year}{month}{day}_00z_fcst.zarr/surface/APCP_1hr_acc_fcst/surface"
    url2 = f"s3://hrrrzarr/sfc/{year}{month}{day}/{year}{month}{day}_00z_fcst.zarr/surface/APCP_1hr_acc_fcst"
    file1 = s3fs.S3Map(url1, s3=fs)
    file2 = s3fs.S3Map(url2, s3=fs)
    try:
        ds = xr.open_mfdataset([file1, file2], engine="zarr")
    except FileNotFoundError:
        print(f"no data for {year}{month}{day}")
        continue

    # Generate 6-hourly accumulated precipitation
    ds = ds.APCP_1hr_acc_fcst.resample(time="6h", label="right", closed="right").sum()
    ds = ds.astype("float32")
    ds.to_netcdf(f"{HRRR_DATA_PATH}hrrr_{year}{month}{day}_00z.nc")
    print(f"saved data for {year}{month}{day}")