In [None]:
import xarray as xr
import os
import numpy as np
import pandas as pd
import dask
import cartopy.crs as ccrs
import matplotlib.pyplot as plt
import cartopy.feature as cfeature

X_NEIGHBORHOOD = 21
Y_NEIGHBORHOOD = 27
HRRR_LT_DATA_PATH = "../data/processed/hrrr/"
OBS_DATA_PATH = "../data/processed/obs/"
RESULT_PATH = f"../data/neighbourhood/hrrr_{X_NEIGHBORHOOD}_{Y_NEIGHBORHOOD}/"

In [None]:
obs_file_list = sorted([f for f in os.listdir(OBS_DATA_PATH) if f.endswith(".nc")])
fcst_file_list = sorted([f for f in os.listdir(HRRR_LT_DATA_PATH) if f.endswith(".nc")])
results_file_list = sorted([f for f in os.listdir(RESULT_PATH) if f.endswith(".nc")])

In [None]:
def create_neighourhood_ensemble(
    fcst, x_neighborhood, y_neighborhood, remove_incomplete_ens=True
):
    ensemble = fcst.rolling(
        dim=dict(
            projection_x_coordinate=x_neighborhood,
            projection_y_coordinate=y_neighborhood,
        ),
        center=True,
    ).construct(projection_x_coordinate="i", projection_y_coordinate="j")
    ensemble = ensemble.stack(ens_mem=("i", "j"))
    if remove_incomplete_ens:
        ensemble = ensemble.where(~np.isnan(ensemble).any(dim="ens_mem"), np.nan)
    # Clean up coordinates for crps calculation
    ensemble = ensemble.drop_vars(["ens_mem", "i", "j"])
    ensemble = ensemble.assign_coords(
        ens_mem=np.arange(x_neighborhood * y_neighborhood)
    )
    return ensemble

In [None]:
# CONUS domain
latN = 50.4
latS = 24.25
lonW = -126
lonE = -66

projection = ccrs.LambertConformal(
    central_longitude=262.5,
    central_latitude=38.5,
    standard_parallels=(38.5, 38.5),
    globe=ccrs.Globe(semimajor_axis=6371229, semiminor_axis=6371229),
)

for fcst_name in fcst_file_list:
    if fcst_name in results_file_list:
        continue
    fcst = xr.open_dataset(f"{HRRR_LT_DATA_PATH}{fcst_name}")
    ens = create_neighourhood_ensemble(fcst, X_NEIGHBORHOOD, Y_NEIGHBORHOOD)

    point_list = []
    for i in range(len(obs_file_list)):
        obs_file_name = obs_file_list[i]
        obs = xr.open_dataarray(f"{OBS_DATA_PATH}{obs_file_name}")
        station = obs.attrs["station"]
        latitude = obs.attrs["lat"]
        longitude = obs.attrs["lon"]
        if longitude < lonW or longitude > lonE or latitude > latN or latitude < latS:
            continue
        x, y = projection.transform_point(longitude, latitude, ccrs.PlateCarree())
        ens_point = ens.sel(
            projection_x_coordinate=x, projection_y_coordinate=y, method="nearest"
        )
        ens_point = ens_point.expand_dims("station")
        ens_point = ens_point.assign_coords({"station": [station]})
        ens_point = ens_point.drop_vars(
            ["projection_x_coordinate", "projection_y_coordinate"]
        )
        point_list.append(ens_point)
    point_ens_fcst = xr.concat(point_list, dim="station")
    point_ens_fcst = point_ens_fcst.rename({"APCP_1hr_acc_fcst": "APCP_6hr_acc_fcst"})
    point_ens_fcst.to_netcdf(f"{RESULT_PATH}{fcst_name}")
    print(f"saved data for {fcst_name}")