# calculate the leadtime-dependent climatological terciles, quartiles, deciles and percentiles (0.02, then 0.05 to 0.95 with 0.05 step) from the individual GCMs' hindcast dataset (period 1993 - 2016) 

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
%matplotlib inline

### os
import os
import sys
from collections import OrderedDict

### datetimes
from datetime import datetime, timedelta
from dateutil.relativedelta import relativedelta
from calendar import month_name


### scipy
import numpy as np
import pandas as pd
import xarray as xr
import geopandas as gpd
import cartopy.crs as ccrs
import dask
from dask.diagnostics import ProgressBar
from tqdm import tqdm

### plotting
from matplotlib import pyplot as plt
import matplotlib
import seaborn as sns


In [3]:
import pathlib

HOME = pathlib.Path.home()
CWD = pathlib.Path.cwd() 

In [4]:
sys.path.append('../../') 

In [5]:
from ICU_Water_Watch import geo, C3S, domains, plot

### parameters for papermill

In [6]:
provider = 'CDS' # should not change
GCM = 'ECMWF' # should be in ['ECMWF','UKMO','METEO_FRANCE','CMCC','DWD', 'NCEP', 'JMA', 'ECCC_CanCM4i', 'ECCC_GEM5_NEMO']
varname = 'tprate' # can be in ['tprate','t2m']
period = 'seasonal' # valid values: 'seasonal or 'monthly'
# if one wants to process older forecasts 
lag = 0
# path where to find the hindcasts / reforecasts
gcm_path = f'/media/nicolasf/END19101/ICU/data/{provider}/operational/hindcasts'

### dictionnary holding quantile name and quantile values, they are passed as **lists** to avoid any numerical issues 

In [8]:
dict_quantiles = OrderedDict()
dict_quantiles['tercile'] = [0.3333, 0.6666]
dict_quantiles['quartile'] = [0.25, 0.50, 0.75]
dict_quantiles['decile'] = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
dict_quantiles['percentile'] = [0.02, 0.05, 0.1 , 0.15, 0.2 , 0.25, 0.3 , 0.35, 0.4 , 0.45, 0.5 , 0.55, 0.6 , 0.65, 0.7 , 0.75, 0.8 , 0.85, 0.9 , 0.95] 

### get today's date 

In [9]:
date = datetime.utcnow()

### apply lag 

In [10]:
date = date - relativedelta(months=lag)

In [11]:
print(f"will process hindcasts for {date:%B}")

will process hindcasts for June


In [12]:
initial_month = date.month

#### where to save the climatologies 

In [13]:
gcm_path = pathlib.Path(gcm_path)

In [14]:
dpath = gcm_path.joinpath(GCM).joinpath(varname.upper())

In [15]:
clim_path = gcm_path.joinpath(f'CLIMATOLOGY/{GCM}/{varname.upper()}')

In [16]:
clim_path

PosixPath('/media/nicolasf/END19101/ICU/data/CDS/operational/hindcasts/CLIMATOLOGY/ECCC_GEM5_NEMO/TPRATE')

In [17]:
if not clim_path.exists(): 
    
    clim_path.mkdir(parents=True)

### get the list of files 

In [18]:
if 'ECCC' in GCM: 
    lfiles = list(dpath.glob(f"ensemble_seas_forecasts_{varname}_from_*_{initial_month:02d}_{GCM.split('_')[0]}.netcdf"))
else: 
    lfiles = list(dpath.glob(f"ensemble_seas_forecasts_{varname}_from_*_{initial_month:02d}_{GCM}.netcdf"))

In [19]:
lfiles.sort()

In [20]:
lfiles

[PosixPath('/media/nicolasf/END19101/ICU/data/CDS/operational/hindcasts/ECCC_GEM5_NEMO/TPRATE/ensemble_seas_forecasts_tprate_from_1993_06_ECCC.netcdf'),
 PosixPath('/media/nicolasf/END19101/ICU/data/CDS/operational/hindcasts/ECCC_GEM5_NEMO/TPRATE/ensemble_seas_forecasts_tprate_from_1994_06_ECCC.netcdf'),
 PosixPath('/media/nicolasf/END19101/ICU/data/CDS/operational/hindcasts/ECCC_GEM5_NEMO/TPRATE/ensemble_seas_forecasts_tprate_from_1995_06_ECCC.netcdf'),
 PosixPath('/media/nicolasf/END19101/ICU/data/CDS/operational/hindcasts/ECCC_GEM5_NEMO/TPRATE/ensemble_seas_forecasts_tprate_from_1996_06_ECCC.netcdf'),
 PosixPath('/media/nicolasf/END19101/ICU/data/CDS/operational/hindcasts/ECCC_GEM5_NEMO/TPRATE/ensemble_seas_forecasts_tprate_from_1997_06_ECCC.netcdf'),
 PosixPath('/media/nicolasf/END19101/ICU/data/CDS/operational/hindcasts/ECCC_GEM5_NEMO/TPRATE/ensemble_seas_forecasts_tprate_from_1998_06_ECCC.netcdf'),
 PosixPath('/media/nicolasf/END19101/ICU/data/CDS/operational/hindcasts/ECCC_GEM5_

In [21]:
lfiles[0]

PosixPath('/media/nicolasf/END19101/ICU/data/CDS/operational/hindcasts/ECCC_GEM5_NEMO/TPRATE/ensemble_seas_forecasts_tprate_from_1993_06_ECCC.netcdf')

In [22]:
lfiles[-1]

PosixPath('/media/nicolasf/END19101/ICU/data/CDS/operational/hindcasts/ECCC_GEM5_NEMO/TPRATE/ensemble_seas_forecasts_tprate_from_2016_06_ECCC.netcdf')

In [23]:
len(lfiles)

24

### open the multiple files dataset, concatenating over the time dimension, and preprocessing 

In [24]:
dset = xr.open_mfdataset(lfiles, preprocess=C3S.preprocess_GCM, parallel=True, engine='netcdf4')

In [25]:
dset

Unnamed: 0,Array,Chunk
Bytes,44.87 MiB,1.87 MiB
Shape,"(24, 5, 10, 81, 121)","(1, 5, 10, 81, 121)"
Count,168 Tasks,24 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 44.87 MiB 1.87 MiB Shape (24, 5, 10, 81, 121) (1, 5, 10, 81, 121) Count 168 Tasks 24 Chunks Type float32 numpy.ndarray",5  24  121  81  10,

Unnamed: 0,Array,Chunk
Bytes,44.87 MiB,1.87 MiB
Shape,"(24, 5, 10, 81, 121)","(1, 5, 10, 81, 121)"
Count,168 Tasks,24 Chunks
Type,float32,numpy.ndarray


### make sure we select the hindcast period 

In [26]:
dset = dset.sel(time=slice('1993', '2016'))

### convert to monthly rainfall accumulations (mm/month)

In [None]:
if varname == 'tprate': 
    
    dset = C3S.convert_rainfall(dset, varin=varname, varout='precip', leadvar='step', timevar='time', dropvar=True)
    
    dset = dset.clip(min=0)

### if the period is set to `seasonal`, calculates the seasonal accumulations (for `tprate`) or averages (for everything else)

In [32]:
if period == 'seasonal': 
    
    if varname == 'tprate': 
    
        dset = dset.rolling({'step':3}, min_periods=3).sum('step')
    
    else: 
        
        dset = dset.rolling({'step':3}, min_periods=3).mean('step')
    
    dset = dset.sel({'step':slice(3, None)})

In [33]:
dset.info

<bound method Dataset.info of <xarray.Dataset>
Dimensions:  (time: 24, lon: 121, lat: 81, member: 10, step: 3)
Coordinates:
  * time     (time) datetime64[ns] 1993-06-01 1994-06-01 ... 2016-06-01
  * lon      (lon) float32 120.0 121.0 122.0 123.0 ... 237.0 238.0 239.0 240.0
  * lat      (lat) float32 -50.0 -49.0 -48.0 -47.0 -46.0 ... 27.0 28.0 29.0 30.0
  * member   (member) int32 0 1 2 3 4 5 6 7 8 9
  * step     (step) int64 3 4 5
Data variables:
    precip   (time, step, member, lat, lon) float64 dask.array<chunksize=(1, 3, 10, 81, 121), meta=np.ndarray>
Attributes:
    Conventions:  CF-1.6
    history:      2022-06-08 23:27:29 GMT by grib_to_netcdf-2.24.3: /opt/ecmw...>

### rechunk, just make sure each chunk can fit in memory

In [34]:
dset.chunks

Frozen({'time': (1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), 'step': (3,), 'member': (10,), 'lat': (81,), 'lon': (121,)})

In [35]:
chunks = {
    'time':-1, 
    'member':-1, 
    'step':1, 
    'lat':10, 
    'lon':10
}

In [36]:
dset = dset.chunk(chunks)

In [37]:
dset

Unnamed: 0,Array,Chunk
Bytes,53.84 MiB,187.50 kiB
Shape,"(24, 3, 10, 81, 121)","(24, 1, 10, 10, 10)"
Count,1686 Tasks,351 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 53.84 MiB 187.50 kiB Shape (24, 3, 10, 81, 121) (24, 1, 10, 10, 10) Count 1686 Tasks 351 Chunks Type float64 numpy.ndarray",3  24  121  81  10,

Unnamed: 0,Array,Chunk
Bytes,53.84 MiB,187.50 kiB
Shape,"(24, 3, 10, 81, 121)","(24, 1, 10, 10, 10)"
Count,1686 Tasks,351 Chunks
Type,float64,numpy.ndarray


### uncomment the following for visual inspection of the hindcasts 

In [38]:
# for t in range(24): 
#     for s in range(5): 
#         fg = dset.isel(time=t, step=s)['precip'].plot(vmax=1000., col='member', col_wrap=5)
#         fg.fig.savefig(f'./tmp/JMA_hindcast_t_{t}_s_{s}.png', dpi=200, bbox_inches='tight', facecolor='w')
#         plt.close(fg.fig)

### stack the time and members 

In [39]:
clim = dset.stack(instance=('time','member'))

In [40]:
# drop the eventual missing instances (due to sometimes inconsistent number of members)
clim = clim.dropna(dim='instance')

# rechunk so that we have a continuous instance dimension 
clim = clim.chunk({'instance':-1})

# number of instances for the calculation of the quantiles climatologies, will be added to the dataset for record-keeping 
n_instances = len(clim['instance'])

# print 
print(f"The {period} {varname} hindcast climatologies from month {initial_month} in {GCM} will be calculated using {n_instances} instances\n")

# now loop over the quantiles dictionnary, calculates the climatological quantiles, do some 
# data munging, and saves to disk 


The seasonal tprate hindcast climatologies from month 6 in ECCC_GEM5_NEMO will be calculated using 240 instances



In [41]:
for k in dict_quantiles.keys():

    print(f"{k} climatology\n")

    clim_quantile = clim.quantile(dict_quantiles[k], dim='instance')

    # adds a `'month' dimension with coordinate `month`, for later concatenation
    clim_quantile = clim_quantile.expand_dims({'month':[initial_month]})

    # add the number of instances in the dataset for record-keeping
    clim_quantile['n_instances'] = (('month'), [n_instances])

    # now compute 
    with ProgressBar(): 

        clim_quantile = clim_quantile.compute() 

    # save to disk 
    clim_quantile.to_netcdf(clim_path.joinpath(f"{GCM}_{varname}_{period}_{k}_climatology_{initial_month:02d}.netcdf"))

    if clim_path.joinpath(f"{GCM}_{varname}_{period}_{k}_climatology_{initial_month:02d}.netcdf").exists(): 

        print(f"Successfully saved {str(clim_path.joinpath(f'{GCM}_{varname}_{period}_{k}_climatology_{initial_month:02d}.netcdf'))}\n")

        clim_quantile.close()

tercile climatology

[########################################] | 100% Completed |  5.6s
Successfully saved /media/nicolasf/END19101/ICU/data/CDS/operational/hindcasts/CLIMATOLOGY/ECCC_GEM5_NEMO/TPRATE/ECCC_GEM5_NEMO_tprate_seasonal_tercile_climatology_06.netcdf

quartile climatology

[########################################] | 100% Completed |  5.1s
Successfully saved /media/nicolasf/END19101/ICU/data/CDS/operational/hindcasts/CLIMATOLOGY/ECCC_GEM5_NEMO/TPRATE/ECCC_GEM5_NEMO_tprate_seasonal_quartile_climatology_06.netcdf

decile climatology

[########################################] | 100% Completed |  5.0s
Successfully saved /media/nicolasf/END19101/ICU/data/CDS/operational/hindcasts/CLIMATOLOGY/ECCC_GEM5_NEMO/TPRATE/ECCC_GEM5_NEMO_tprate_seasonal_decile_climatology_06.netcdf

percentile climatology

[########################################] | 100% Completed |  5.2s
Successfully saved /media/nicolasf/END19101/ICU/data/CDS/operational/hindcasts/CLIMATOLOGY/ECCC_GEM5_NEMO/TPRATE/ECC