In [2]:
import geopandas as gpd
import xarray as xr
# requires cfgrib and rioxarray to be installed!

In [1]:
import warnings
from shapely.errors import ShapelyDeprecationWarning
warnings.filterwarnings("ignore", category=ShapelyDeprecationWarning)

### Calculate max hourly rainfall using Radar grib files

In [15]:
radar_filepaths = ["/Users/slamont/temp/radar_202207042010/20220704_201000.004",
                   "/Users/slamont/temp/radar_202207042020/20220704_202000.004",
                   "/Users/slamont/temp/radar_202207042030/20220704_203000.004",
                   "/Users/slamont/temp/radar_202207042040/20220704_204000.004",
                   "/Users/slamont/temp/radar_202207042050/20220704_205000.004",
                   "/Users/slamont/temp/radar_202207042100/20220704_210000.004"]

In [3]:
# Get the KC city boundary polygon
gdf_city_polygons = gpd.read_file("/Users/slamont/japan_gis/geo_boundaries_shp/all_geos_boundaries_107.geojson")
gdf_kc = gdf_city_polygons[gdf_city_polygons.ID == 9]

In [28]:
%%time
# open and sum all 6 layers to get hourly accumulation
ds_sum = xr.open_mfdataset(radar_filepaths, engine="cfgrib" , backend_kwargs={'indexpath': ' '}, concat_dim='t', combine='nested', parallel=True).sum(dim="t")

# assign a lat/lon coordinate system
ds_sum = ds_sum.rio.write_crs("EPSG:4326", inplace=True)

# clip to KC boundary
ds_clip = ds_sum.rio.clip(gdf_kc.geometry, "EPSG:4326", all_touched=True).persist()

# get max value of clipped rainfall
ds_clip.unknown.max().values  # "unknown" is name of rainfall variable

# save clipped layer to disk for viz, if desired
# ds_clip.rio.to_raster("/Users/slamont/temp/2022070420_clipped_sum_kc.tif")

Ignoring index file ' ' older than GRIB file
Ignoring index file ' ' incompatible with GRIB file
Ignoring index file ' ' incompatible with GRIB file
Ignoring index file ' ' incompatible with GRIB file
Ignoring index file ' ' incompatible with GRIB file


CPU times: user 11.2 s, sys: 638 ms, total: 11.9 s
Wall time: 3.21 s


array(65.5, dtype=float32)

In [22]:
ds_sum

Unnamed: 0,Array,Chunk
Bytes,32.81 MiB,32.81 MiB
Shape,"(3360, 2560)","(3360, 2560)"
Count,45 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 32.81 MiB 32.81 MiB Shape (3360, 2560) (3360, 2560) Count 45 Tasks 1 Chunks Type float32 numpy.ndarray",2560  3360,

Unnamed: 0,Array,Chunk
Bytes,32.81 MiB,32.81 MiB
Shape,"(3360, 2560)","(3360, 2560)"
Count,45 Tasks,1 Chunks
Type,float32,numpy.ndarray


### Extracting rainfall from COMPASS grib files by variable name

In [7]:
compass_filepath = "/Users/slamont/temp/tmp_compass/20220613_000000"

In [22]:
ds_cmp = xr.open_dataset(compass_filepath, engine="cfgrib", backend_kwargs={'indexpath': ' ', 'filter_by_keys': {'parameterName': 'Total precipitation'}}, chunks={"step":1, "latitude":100, "longitude":100})
ds_cmp

Ignoring index file ' ' incompatible with GRIB file


Unnamed: 0,Array,Chunk
Bytes,296 B,8 B
Shape,"(37,)","(1,)"
Count,38 Tasks,37 Chunks
Type,datetime64[ns],numpy.ndarray
"Array Chunk Bytes 296 B 8 B Shape (37,) (1,) Count 38 Tasks 37 Chunks Type datetime64[ns] numpy.ndarray",37  1,

Unnamed: 0,Array,Chunk
Bytes,296 B,8 B
Shape,"(37,)","(1,)"
Count,38 Tasks,37 Chunks
Type,datetime64[ns],numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,34.28 MiB,39.06 kiB
Shape,"(37, 505, 481)","(1, 100, 100)"
Count,1111 Tasks,1110 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 34.28 MiB 39.06 kiB Shape (37, 505, 481) (1, 100, 100) Count 1111 Tasks 1110 Chunks Type float32 numpy.ndarray",481  505  37,

Unnamed: 0,Array,Chunk
Bytes,34.28 MiB,39.06 kiB
Shape,"(37, 505, 481)","(1, 100, 100)"
Count,1111 Tasks,1110 Chunks
Type,float32,numpy.ndarray


In [9]:
# Limit to 7 GEOs only 
CITIES_OF_INTEREST = ["Kōriyama", "Nagano", "Kawasaki", "Chiba", "Okazaki", "Okayama", "Kumamoto"]
mask = gdf_city_polygons["E-Name"].isin(CITIES_OF_INTEREST)
gdf_city_polygons = gdf_city_polygons[mask]

In [23]:
%%time
lst_forecast_hr = list(range(36))  # 36 is length of forecast we care about
lst_all_cities_hourly = []
lst_all_cities_24hr = []

ds_cmp = ds_cmp.rio.write_crs("EPSG:4326", inplace=True)

# for j, tpl in enumerate(gdf_city_polygons.itertuples()):
#     # Get the polygon geometry for this geo
#     geom = tpl.geometry
#     for step_val in lst_forecast_hr:
#         ds_tmp = ds_cmp.rio.clip(geom, "EPSG:4326", all_touched=True).persist()
#         # ds_tmp = ds_tmp.where(ds_tmp != ds_cmp._FillValue)  # sets values equal to FillValue to nan
#         max_hr = float(ds_tmp.unknown.isel(step=step_val).max().values)
#         max_24hr = float(ds_tmp.unknown.isel(step=slice(step_val, step_val + 24)).sum(dim="step").max().values)
        
for step_val in lst_forecast_hr:
    lst_hourly_max = []
    lst_24hr_max = []
    for j, tpl in enumerate(gdf_city_polygons.itertuples()):
        geom = tpl.geometry
        ds_tmp = ds_cmp.rio.clip(geom, "EPSG:4326", all_touched=True).persist()
        # ds_tmp = ds_tmp.where(ds_tmp != ds_cmp._FillValue)  # sets values equal to FillValue to nan
        max_hr = float(ds_tmp.unknown.isel(step=step_val).max().values)
        max_24hr = float(ds_tmp.unknown.isel(step=slice(step_val, step_val + 24)).sum(dim="step").max().values)

        # # NOTE: For lisflood data, divide by 24
        # max_hr = max_hr / 24.
        # max_24hr = max_24hr / 24.

        lst_hourly_max.append(max_hr)
        lst_24hr_max.append(max_24hr)


    lst_all_cities_hourly.append(lst_hourly_max)
    lst_all_cities_24hr.append(lst_24hr_max)

CPU times: user 1min 14s, sys: 15 s, total: 1min 29s
Wall time: 1min 32s


In [20]:
ds_cmp

In [17]:
ds_tmp.isel(step=step_val).max().values

<bound method Mapping.values of <xarray.Dataset>
Dimensions:      ()
Coordinates:
    time         datetime64[ns] 2022-06-13
    step         timedelta64[ns] 00:00:00
    surface      float64 0.0
    valid_time   datetime64[ns] 2022-06-13
    spatial_ref  int64 0
Data variables:
    unknown      float32 0.0>