In [1]:
%matplotlib inline

### os 
import os 
import sys

### datetimes 
from datetime import datetime, timedelta
from dateutil.relativedelta import relativedelta

### scipy 
import numpy as np 
import pandas as pd
import xarray as xr
from scipy import stats as st
from cartopy import crs as ccrs

# dask 
from dask.diagnostics import ProgressBar

### plotting 
from matplotlib import pyplot as plt


In [2]:
import pathlib

HOME = pathlib.Path.home()
CWD = pathlib.Path.cwd() 


In [3]:
import sys

In [4]:
sys.path.append('../../')

In [5]:
domain = [125, 240, -35, 25]

In [6]:
from ICU_Water_Watch import plot, geo, utils

### directory with the daily GPM-IMERG data (one file per day )

In [7]:
dpath = pathlib.Path('/home/nicolasf/operational/ICU/ops/data/GPM_IMERG/daily/extended_SP')

In [8]:
lfiles = list(dpath.glob("GPM_IMERG_daily.v06.????.??.??.nc"))

In [9]:
lfiles.sort()

In [10]:
lfiles[0]

PosixPath('/home/nicolasf/operational/ICU/ops/data/GPM_IMERG/daily/extended_SP/GPM_IMERG_daily.v06.2001.01.01.nc')

In [11]:
lfiles[-1]

PosixPath('/home/nicolasf/operational/ICU/ops/data/GPM_IMERG/daily/extended_SP/GPM_IMERG_daily.v06.2021.10.02.nc')

In [12]:
len(lfiles)

7580

In [13]:
lfiles = lfiles[:-2]

In [14]:
lfiles[-1]

PosixPath('/home/nicolasf/operational/ICU/ops/data/GPM_IMERG/daily/extended_SP/GPM_IMERG_daily.v06.2021.09.30.nc')

### open the multiple file dataset and selects the variable of interest 

In [15]:
dset = xr.open_mfdataset(lfiles, concat_dim='time', parallel=True)[['precipitationCal']]

### calculates the running accumulations 

### re-chunks first 

In [16]:
dset = dset.chunk({'time':-1, 'lon':10, 'lat':10})

### number of days 

In [17]:
ndays = 90

### running accumulation 

In [18]:
dset = dset.rolling({'time':ndays}, min_periods=ndays, center=False).sum('time')

In [19]:
with ProgressBar(): 
    dset = dset.compute()

[########################################] | 100% Completed | 55.2s


### get rid of the ndays - 1 first values (missing values)

In [20]:
dset = dset.isel(time=slice(ndays-1, None))

### take the month and day of the last value 

In [21]:
last_day = dset.time.to_index()[-1]

In [22]:
last_day

cftime.DatetimeJulian(2021, 9, 30, 0, 0, 0, 0)

### buffer (in days) around the date (for taking the corresponding accumulations from the climatology)

In [23]:
buffer = 3

### climatological period 

In [24]:
clim_period =[2001, 2020]

## construct dates 

In [25]:
def subset_daily_clim(dset, last_day, clim_period=[2001, 2020], buffer=3): 
    """
    takes a multiple files daily dataset, and extract N days (parameter `buffer`)
    around each day of year for a climatological period (parameter `clim_period`)
    """
    
    from datetime import datetime
    from dateutil.relativedelta import relativedelta
    import pandas as pd
    import xarray as xr 
    
    import warnings
    warnings.filterwarnings("ignore")
    
    ldates = []
    
    for y in np.arange(clim_period[0], clim_period[-1] + 1): 
        
        d = datetime(y, last_day.month, last_day.day)
        d = [d + relativedelta(days=x) for x in range(-buffer, buffer+1)]
        ldates += d
    
    ldates = np.array(ldates)
    dates = pd.Index(ldates)
    dates = dates.to_series()
    
    clim = dset.sel(time=slice(*map(str, clim_period)))
    
    clim['time'] = clim.indexes['time'].to_datetimeindex()
    
    dates = dates.loc[clim.time.to_index()[0]:clim.time.to_index()[-1],]
    
    clim = clim.sel(time=dates.values)
    
    return clim

In [26]:
clim = subset_daily_clim(dset, last_day)

In [27]:
def calibrate_SPI(dset, variable='precipitationCal', dimension='time', return_gamma = False):
    """
    calibrate the SPI over a climatological dataset (typically obtained using `subset_daily_clim`
    with appropriate buffer ...)
    """
    
    import numpy as np 
    import xarray as xr 
    from scipy import stats as st
    
    ds_ma = dset[variable]
    
    ds_In = np.log(ds_ma)
    ds_In = ds_In.where(np.isinf(ds_In) == False) #= np.nan  #Change infinity to NaN

    ds_mu = ds_ma.mean(dimension)

    #Overall Mean of Moving Averages
    ds_mu = ds_ma.mean(dimension)

    #Summation of Natural log of moving averages
    ds_sum = ds_In.sum(dimension)

    #Computing essentials for gamma distribution
    n = ds_In.count(dimension)                  #size of data
    A = np.log(ds_mu) - (ds_sum/n)             #Computing A
    alpha = (1/(4*A))*(1+(1+((4*A)/3))**0.5)   #Computing alpha  (a)
    beta = ds_mu/alpha            
    
    if return_gamma: 

        gamma_func = lambda data, a, scale: st.gamma.cdf(data, a=a, scale=scale)

        gamma = xr.apply_ufunc(gamma_func, ds_ma, alpha, beta)

        return gamma, alpha, beta

    else: 
        
        return alpha, beta

In [28]:
def calculate_SPI(dataarray, alpha, beta, name='SPI'): 
    
    import numpy as np 
    import xarray as xr 
    from scipy import stats as st
    
    gamma_func = lambda data, a, scale: st.gamma.cdf(data, a=a, scale=scale)
    
    gamma = xr.apply_ufunc(gamma_func, dataarray, alpha, beta)
    
    norminv = lambda data: st.norm.ppf(data, loc=0, scale=1)
    
    norm_spi = xr.apply_ufunc(norminv, gamma)
    
    return norm_spi.to_dataset(name=name)

In [None]:
alpha, beta = calibrate_SPI(clim)

In [None]:
norm_spi_realtime = calculate_SPI(dset['precipitationCal'].isel(time=-1), alpha, beta) 

### reads in the EEZs 

In [None]:
from ICU_Water_Watch import geo

In [None]:
EEZs, merged_EEZs = geo.get_EEZs(dpath_shapes='/home/nicolasf/operational/ICU/development/hotspots/data/shapefiles/')

### adds the EEZ mask to the dataset 

In [None]:
norm_spi_realtime = geo.make_mask_from_gpd(norm_spi_realtime, merged_EEZs, subset=True, mask_name='EEZ')

In [None]:
from ICU_Water_Watch.plot import * 

In [None]:
thresholds = [-2.5, -2, -1.5, -1, 1, 1.5, 2, 2.5]

rgbs = ['#F04E37', '#F99D1C', '#FFDE40', '#ffffff', '#96ceff', '#4553bf', '#09146b']

ticks_marks = np.diff(np.array(thresholds)) / 2.

ticks = [thresholds[i] + ticks_marks[i] for i in range(len(thresholds) - 1)]

In [None]:
ticks

In [None]:
cbar_kwargs={'shrink':0.5, 'pad':0.01, 'extend':'neither', 'drawedges':True, 'ticks':ticks, 'aspect':15}

In [None]:
cmap = matplotlib.colors.ListedColormap(rgbs, name='SPI')

In [None]:
dataarray = norm_spi_realtime['SPI'] * norm_spi_realtime['EEZ']

In [None]:
cbar_ticklabels = ['- extremely dry','- severely dry','- moderately dry',' ', '- moderately wet','- severely wet','- extremely wet']

In [None]:
last_day = datetime(last_day.year, last_day.month, last_day.day)

In [None]:
f, ax = plt.subplots(figsize=(13, 8), subplot_kw={'projection':ccrs.PlateCarree(central_longitude=180)})

im = dataarray.plot.contourf(ax=ax, levels=thresholds, cmap=cmap, transform=ccrs.PlateCarree(), add_colorbar=False)

# adds the colorbar axes as insets 

cbar_ax = ax.axes.inset_axes([0.80, 0.5225, 0.025, 0.38])

# plots the colorbar in these axes 

cb = plt.colorbar(im, cax=cbar_ax, **cbar_kwargs)

cb.ax.minorticks_off() 

cb.ax.tick_params(size=0)

# plots the ticklabels 

cbar_ax.set_yticklabels(cbar_ticklabels)

ax.coastlines(resolution='10m')

EEZs.boundary.plot(ax=ax, transform=ccrs.PlateCarree(), color='0.4', linewidth=0.8)

title = f"GPM-IMERG, Standardized Precipitation Index (SPI)"

ax.set_title("") # to get rid of the default title

ax.text(0.99, 0.95, title, fontsize=13, fontdict={'color':'k'}, bbox=dict(facecolor='w', edgecolor='w'), horizontalalignment='right', verticalalignment='center', transform=ax.transAxes)

ax.set_extent(domain, crs = ccrs.PlateCarree())

In [None]:
f.savefig(f'./SPI_ICU_prototype_{ndays}_days_to_{last_day:%Y%m%d}.png', dpi=200, bbox_inches='tight', facecolor='w')