In [13]:
import xarray as xr 
import numpy as np
from glob import glob
from tqdm import tqdm
import hvplot.xarray

# Import helper functions
import utils.parameters as param
from utils.misc_utils import check_and_create_dir

In [3]:
data_dir = "../data/"

In [4]:
# Read in data
var = "precip"
filepaths_wildcard = data_dir + "chirps_precip/*chirps*.days_p25.nc"
filepaths_all = glob(filepaths_wildcard)
ds = xr.open_mfdataset(filepaths_all).sel(time=param.time_period)
global_attrs = ds.attrs
var_attrs = ds[var].attrs

In [5]:
ds = ds.chunk(dict(time=-1))
ds_coarsened = ds.coarsen(latitude=20,longitude=20).mean()

    >>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):
    ...     array.reshape(shape)

To avoid creating the large chunks, set the option
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):
    >>> array.reshape(shape, limit='128 MiB')
  reduced[key] = da.variable.coarsen(


In [6]:
ds_coarsened["p95"] = ds_coarsened[var].quantile(0.95, dim="time")

In [7]:
# Assign classes based on exceedance of 95th percentile
ds_coarsened["precip_class"] = xr.where(ds_coarsened[var] > ds_coarsened["p95"], 1, 0)
ds_coarsened["precip_class"].attrs = {
    "classes": "Class 0: precipitation below threshold \nClass 1: precipitation exeeds threshold",
}

In [8]:
# # Make plots 
# to_plot = ds_coarsened.isel(time=np.arange(0,30)).load()

# for var in ["precip","p95","precip_class"]:    
#     clim = (0,70) if var in ["precip","p95"] else (0,1)
#     pl = to_plot[var].hvplot.quadmesh(x="longitude",y="latitude", clim=clim)
#     display(pl)

In [9]:
# ds_coarsened.to_netcdf("{0}input_data_preprocessed/labels/chirps_5x5.nc".format(data_dir))

In [14]:
non_nan_coords = []

lats = ds_coarsened.latitude.values 
lons = ds_coarsened.longitude.values 
coords = [(lat,lon) for lat in lats for lon in lons]
for i in tqdm(range(len(coords))):
    coord = coords[i]
    ds_i = ds_coarsened.sel(latitude=coord[0], longitude=coord[1])
    # Check if any NaNs in that gridcell timeseries
    # If NaN's found, do not build model 
    if not np.isnan(ds_i.precip.values).any(): 
        non_nan_coords.append(coord)
    else: 
        pass 

100%|██████████| 1440/1440 [1:55:33<00:00,  4.81s/it]   


In [15]:
non_nan_coords

[(-47.5, -77.5),
 (-47.5, -72.5),
 (-47.5, -67.5),
 (-47.5, 167.5),
 (-47.5, 172.5),
 (-42.5, -177.5),
 (-42.5, -72.5),
 (-42.5, -67.5),
 (-42.5, -62.5),
 (-42.5, 142.5),
 (-42.5, 147.5),
 (-42.5, 167.5),
 (-42.5, 172.5),
 (-42.5, 177.5),
 (-37.5, -72.5),
 (-37.5, -67.5),
 (-37.5, -62.5),
 (-37.5, -57.5),
 (-37.5, 117.5),
 (-37.5, 137.5),
 (-37.5, 142.5),
 (-37.5, 147.5),
 (-37.5, 152.5),
 (-37.5, 172.5),
 (-37.5, 177.5),
 (-32.5, -72.5),
 (-32.5, -67.5),
 (-32.5, -62.5),
 (-32.5, -57.5),
 (-32.5, -52.5),
 (-32.5, 17.5),
 (-32.5, 22.5),
 (-32.5, 27.5),
 (-32.5, 32.5),
 (-32.5, 117.5),
 (-32.5, 122.5),
 (-32.5, 127.5),
 (-32.5, 132.5),
 (-32.5, 137.5),
 (-32.5, 142.5),
 (-32.5, 147.5),
 (-32.5, 152.5),
 (-32.5, 172.5),
 (-27.5, -72.5),
 (-27.5, -67.5),
 (-27.5, -62.5),
 (-27.5, -57.5),
 (-27.5, -52.5),
 (-27.5, -47.5),
 (-27.5, 12.5),
 (-27.5, 17.5),
 (-27.5, 22.5),
 (-27.5, 27.5),
 (-27.5, 32.5),
 (-27.5, 42.5),
 (-27.5, 47.5),
 (-27.5, 112.5),
 (-27.5, 117.5),
 (-27.5, 122.5),
 (-27.5