<span style="color:red; font-family:Helvetica Neue, Helvetica, Arial, sans-serif; font-size:2em;">An Exception was encountered at '<a href="#papermill-error-cell">In [50]</a>'.</span>

# calculate the leadtime-dependent climatological terciles, deciles and percentiles (0.02, then 0.05 to 0.95 with 0.05 step) from the individual GCMs' hindcast dataset (period 1993 - 2016) 

In [1]:
!pwd

/home/nicolasf/operational/ICU/development/hotspots/code/ICU_Water_Watch/notebooks/C3S


#### **Note**: this is quite slow: It takes about 15 minutes to process a complete climatology (12 months) for one GCM, for one accumulation period (monthly or seasonal), but only needs to be calculated once ... 

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
%matplotlib inline

### os
import os
import sys
from collections import OrderedDict

### datetimes
from datetime import datetime, timedelta
from dateutil.relativedelta import relativedelta
from calendar import month_name


### scipy
import numpy as np
import pandas as pd
import xarray as xr
import geopandas as gpd
import cartopy.crs as ccrs
import dask
from dask.diagnostics import ProgressBar
from tqdm import tqdm

### plotting
from matplotlib import pyplot as plt
import matplotlib
import seaborn as sns


In [4]:
import pathlib

HOME = pathlib.Path.home()
CWD = pathlib.Path.cwd() 

In [5]:
sys.path.append('../../') 

In [6]:
from ICU_Water_Watch import geo, C3S, domains, plot

### dictionnary holding quantile name and quantile values, they are passed as **lists** to avoid any numerical issues 

In [7]:
dict_quantiles = OrderedDict()
dict_quantiles['tercile'] = [0.3333, 0.6666]
dict_quantiles['decile'] = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
dict_quantiles['percentile'] = [0.02, 0.05, 0.1 , 0.15, 0.2 , 0.25, 0.3 , 0.35, 0.4 , 0.45, 0.5 , 0.55, 0.6 , 0.65, 0.7 , 0.75, 0.8 , 0.85, 0.9 , 0.95] 

### variables 

### provider 

In [8]:
provider = 'CDS'

### variable name in the hindcast dataset 

In [9]:
varname = 'tprate'

### period: `monthly` or `seasonal` 

In [10]:
period = 'monthly'
GCM = 'ECMWF'
lag = 1

In [11]:
# Parameters
GCM = "ECMWF"
lag = 0
period = "seasonal"


### get today's date 

In [12]:
date = datetime.utcnow()

### apply lag 

In [13]:
date = date - relativedelta(months=lag)

In [14]:
print(f"will process hindcasts for {date:%B}")

will process hindcasts for October


In [15]:
initial_month = date.month

In [16]:
initial_month

10

### list of valid GCMs 

In [17]:
GCMs = ['ECMWF', 'UKMO', 'METEO_FRANCE', 'DWD', 'CMCC', 'NCEP', 'JMA', 'ECCC']

### path definitions follow

#### where to find the GCM hindcast datasets 

In [18]:
gcm_path = pathlib.Path(f'/media/nicolasf/END19101/ICU/data/{provider}/operational/hindcasts')

In [19]:
dpath = gcm_path.joinpath(GCM).joinpath(varname.upper())

In [20]:
print(dpath)

/media/nicolasf/END19101/ICU/data/CDS/operational/hindcasts/ECMWF/TPRATE


#### where to save the climatologies 

In [21]:
clim_path = gcm_path.joinpath(f'CLIMATOLOGY/{GCM}')

In [22]:
clim_path

PosixPath('/media/nicolasf/END19101/ICU/data/CDS/operational/hindcasts/CLIMATOLOGY/ECMWF')

In [23]:
if not clim_path.exists(): 
    
    clim_path.mkdir(parents=True)

### get the list of files 

In [24]:
varname

'tprate'

In [25]:
lfiles = list(dpath.glob(f"ensemble_seas_forecasts_{varname}_from_*_{initial_month:02d}_{GCM}.netcdf"))

In [26]:
dpath

PosixPath('/media/nicolasf/END19101/ICU/data/CDS/operational/hindcasts/ECMWF/TPRATE')

In [27]:
print(f"ensemble_seas_forecasts_{varname}_from_*_{initial_month:02d}_{GCM}.netcdf")

ensemble_seas_forecasts_tprate_from_*_10_ECMWF.netcdf


In [28]:
lfiles.sort()

In [29]:
lfiles

[PosixPath('/media/nicolasf/END19101/ICU/data/CDS/operational/hindcasts/ECMWF/TPRATE/ensemble_seas_forecasts_tprate_from_1993_10_ECMWF.netcdf'),
 PosixPath('/media/nicolasf/END19101/ICU/data/CDS/operational/hindcasts/ECMWF/TPRATE/ensemble_seas_forecasts_tprate_from_1994_10_ECMWF.netcdf'),
 PosixPath('/media/nicolasf/END19101/ICU/data/CDS/operational/hindcasts/ECMWF/TPRATE/ensemble_seas_forecasts_tprate_from_1995_10_ECMWF.netcdf'),
 PosixPath('/media/nicolasf/END19101/ICU/data/CDS/operational/hindcasts/ECMWF/TPRATE/ensemble_seas_forecasts_tprate_from_1996_10_ECMWF.netcdf'),
 PosixPath('/media/nicolasf/END19101/ICU/data/CDS/operational/hindcasts/ECMWF/TPRATE/ensemble_seas_forecasts_tprate_from_1997_10_ECMWF.netcdf'),
 PosixPath('/media/nicolasf/END19101/ICU/data/CDS/operational/hindcasts/ECMWF/TPRATE/ensemble_seas_forecasts_tprate_from_1998_10_ECMWF.netcdf'),
 PosixPath('/media/nicolasf/END19101/ICU/data/CDS/operational/hindcasts/ECMWF/TPRATE/ensemble_seas_forecasts_tprate_from_1999_10_E

In [30]:
lfiles[0]

PosixPath('/media/nicolasf/END19101/ICU/data/CDS/operational/hindcasts/ECMWF/TPRATE/ensemble_seas_forecasts_tprate_from_1993_10_ECMWF.netcdf')

In [31]:
lfiles[-1]

PosixPath('/media/nicolasf/END19101/ICU/data/CDS/operational/hindcasts/ECMWF/TPRATE/ensemble_seas_forecasts_tprate_from_2016_10_ECMWF.netcdf')

In [32]:
len(lfiles)

24

### open the multiple files dataset, concatenating over the time dimension, and preprocessing 

In [33]:
dset = xr.open_mfdataset(lfiles, preprocess=C3S.preprocess_GCM, parallel=True, engine='netcdf4')

In [34]:
dset

Unnamed: 0,Array,Chunk
Bytes,112.16 MiB,4.67 MiB
Shape,"(24, 5, 25, 81, 121)","(1, 5, 25, 81, 121)"
Count,168 Tasks,24 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 112.16 MiB 4.67 MiB Shape (24, 5, 25, 81, 121) (1, 5, 25, 81, 121) Count 168 Tasks 24 Chunks Type float32 numpy.ndarray",5  24  121  81  25,

Unnamed: 0,Array,Chunk
Bytes,112.16 MiB,4.67 MiB
Shape,"(24, 5, 25, 81, 121)","(1, 5, 25, 81, 121)"
Count,168 Tasks,24 Chunks
Type,float32,numpy.ndarray


### make sure we select the hindcast period 

In [35]:
dset = dset.sel(time=slice('1993', '2016'))

### convert to monthly rainfall accumulations (mm/month)

In [36]:
dset.tprate.attrs

{'units': 'm s**-1', 'long_name': 'Mean total precipitation rate'}

In [37]:
dset = C3S.convert_rainfall(dset, varin='tprate', varout='precip', leadvar='step', timevar='time', dropvar=True)


unit is m s**-1, converting to mm/day

now converting to mm/month, converted precipitation will be held in var = precip


In [38]:
dset.info

<bound method Dataset.info of <xarray.Dataset>
Dimensions:  (time: 24, lon: 121, lat: 81, member: 25, step: 5)
Coordinates:
  * time     (time) datetime64[ns] 1993-10-01 1994-10-01 ... 2016-10-01
  * lon      (lon) float32 120.0 121.0 122.0 123.0 ... 237.0 238.0 239.0 240.0
  * lat      (lat) float32 -50.0 -49.0 -48.0 -47.0 -46.0 ... 27.0 28.0 29.0 30.0
  * member   (member) int32 0 1 2 3 4 5 6 7 8 9 ... 16 17 18 19 20 21 22 23 24
  * step     (step) int64 1 2 3 4 5
Data variables:
    precip   (time, step, member, lat, lon) float64 dask.array<chunksize=(1, 5, 25, 81, 121), meta=np.ndarray>
Attributes:
    Conventions:  CF-1.6
    history:      2021-10-14 20:39:58 GMT by grib_to_netcdf-2.23.0: /opt/ecmw...>

In [39]:
dset.precip.attrs

{'units': 'mm/month'}

### make sure there are no negative values 

In [40]:
dset = dset.clip(min=0)

### if the period is set to `seasonal`, calculates the seasonal accumulations 

In [41]:
if period == 'seasonal': 
    
    dset = dset.rolling({'step':3}, min_periods=3).sum('step')
    
    dset = dset.sel({'step':slice(3, None)})

In [42]:
dset.info

<bound method Dataset.info of <xarray.Dataset>
Dimensions:  (time: 24, lon: 121, lat: 81, member: 25, step: 3)
Coordinates:
  * time     (time) datetime64[ns] 1993-10-01 1994-10-01 ... 2016-10-01
  * lon      (lon) float32 120.0 121.0 122.0 123.0 ... 237.0 238.0 239.0 240.0
  * lat      (lat) float32 -50.0 -49.0 -48.0 -47.0 -46.0 ... 27.0 28.0 29.0 30.0
  * member   (member) int32 0 1 2 3 4 5 6 7 8 9 ... 16 17 18 19 20 21 22 23 24
  * step     (step) int64 3 4 5
Data variables:
    precip   (time, step, member, lat, lon) float64 dask.array<chunksize=(1, 3, 25, 81, 121), meta=np.ndarray>
Attributes:
    Conventions:  CF-1.6
    history:      2021-10-14 20:39:58 GMT by grib_to_netcdf-2.23.0: /opt/ecmw...>

### rechunk, just make sure each chunk can fit in memory

In [43]:
dset.chunks

Frozen({'time': (1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), 'step': (3,), 'member': (25,), 'lat': (81,), 'lon': (121,)})

In [44]:
chunks = {
    'time':-1, 
    'member':-1, 
    'step':1, 
    'lat':10, 
    'lon':10
}

In [45]:
dset = dset.chunk(chunks)

In [46]:
dset

Unnamed: 0,Array,Chunk
Bytes,134.60 MiB,468.75 kiB
Shape,"(24, 3, 25, 81, 121)","(24, 1, 25, 10, 10)"
Count,2038 Tasks,351 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 134.60 MiB 468.75 kiB Shape (24, 3, 25, 81, 121) (24, 1, 25, 10, 10) Count 2038 Tasks 351 Chunks Type float64 numpy.ndarray",3  24  121  81  25,

Unnamed: 0,Array,Chunk
Bytes,134.60 MiB,468.75 kiB
Shape,"(24, 3, 25, 81, 121)","(24, 1, 25, 10, 10)"
Count,2038 Tasks,351 Chunks
Type,float64,numpy.ndarray


### uncomment the following for visual inspection of the hindcasts 

In [47]:
# for t in range(24): 
#     for s in range(5): 
#         fg = dset.isel(time=t, step=s)['precip'].plot(vmax=1000., col='member', col_wrap=5)
#         fg.fig.savefig(f'./tmp/JMA_hindcast_t_{t}_s_{s}.png', dpi=200, bbox_inches='tight', facecolor='w')
#         plt.close(fg.fig)

### calculates parametrized quantiles 

In [48]:
dset

Unnamed: 0,Array,Chunk
Bytes,134.60 MiB,468.75 kiB
Shape,"(24, 3, 25, 81, 121)","(24, 1, 25, 10, 10)"
Count,2038 Tasks,351 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 134.60 MiB 468.75 kiB Shape (24, 3, 25, 81, 121) (24, 1, 25, 10, 10) Count 2038 Tasks 351 Chunks Type float64 numpy.ndarray",3  24  121  81  25,

Unnamed: 0,Array,Chunk
Bytes,134.60 MiB,468.75 kiB
Shape,"(24, 3, 25, 81, 121)","(24, 1, 25, 10, 10)"
Count,2038 Tasks,351 Chunks
Type,float64,numpy.ndarray


In [49]:
dset

Unnamed: 0,Array,Chunk
Bytes,134.60 MiB,468.75 kiB
Shape,"(24, 3, 25, 81, 121)","(24, 1, 25, 10, 10)"
Count,2038 Tasks,351 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 134.60 MiB 468.75 kiB Shape (24, 3, 25, 81, 121) (24, 1, 25, 10, 10) Count 2038 Tasks 351 Chunks Type float64 numpy.ndarray",3  24  121  81  25,

Unnamed: 0,Array,Chunk
Bytes,134.60 MiB,468.75 kiB
Shape,"(24, 3, 25, 81, 121)","(24, 1, 25, 10, 10)"
Count,2038 Tasks,351 Chunks
Type,float64,numpy.ndarray


<span id="papermill-error-cell" style="color:red; font-family:Helvetica Neue, Helvetica, Arial, sans-serif; font-size:2em;">Execution using papermill encountered an exception here and stopped:</span>

In [50]:
dims = ('time','member')

for k in dict_quantiles.keys():

    print(f"{k} climatology\n")

    clim_quantile = C3S.calc_parametrized_quantiles(dset, quantiles=dict_quantiles[k], varname='precip', dims=('time','member'))

    # adds a `'month' dimension with coordinate `month`, for later concatenation
    clim_quantile = clim_quantile.expand_dims({'month':[initial_month]})

    # add the number of instances in the dataset for record-keeping
    clim_quantile['n_instances'] = (('month'), [len(dset[dims[0]]) * len(dset[dims[1]])])

    # now compute 
    with ProgressBar(): 

        clim_quantile = clim_quantile.compute() 

    # save to disk 
    clim_quantile.to_netcdf(clim_path.joinpath(f"{GCM}_{period}_parametrized_{k}_climatology_{initial_month:02d}.netcdf"))

    if clim_path.joinpath(f"{GCM}_{period}_parametrized_{k}_climatology_{initial_month:02d}.netcdf").exists(): 

        print(f"Successfully saved {str(clim_path.joinpath(f'{GCM}_{period}_parametrized_{k}_climatology_{initial_month:02d}.netcdf'))}\n")

        clim_quantile.close()

tercile climatology



AttributeError: module 'scipy.stats._continuous_distns' has no attribute 'frechet_r_gen'