### This notebook is run by: 

`drive_calculate_C3S_GCMs_hindcast_tercile_probabilities.ipynb`   

For a given GCM, initial month, and period, it calculates the tercile climatology, saves the climatology in 

`/media/nicolasf/END19101/ICU/data/CDS/CLIMATOLOGY/{GCM}`

then calculates the probabilities for each year (for the given initial month) and saves them in 

`/media/nicolasf/END19101/ICU/data/CDS/{GCM}/tercile_probs/{varname.upper()}`


In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
### os 
import os 
import sys

### datetimes 
from datetime import datetime, timedelta
from dateutil.relativedelta import relativedelta
from calendar import month_name

### scipy 
import numpy as np 
import pandas as pd
import xarray as xr

# dask 
import dask 
from dask.diagnostics import ProgressBar

In [3]:
import pathlib
HOME = pathlib.Path.home()
CWD = pathlib.Path.cwd() 

### import local functions for the processing of the C3S forecasts 

In [4]:
sys.path.append('../..')

In [5]:
from ICU_Water_Watch import C3S, domains

In [6]:
domain = domains.domains['C3S_download']

In [7]:
domain

[100, 240, -50, 30]

### parameters for papermill 

In [8]:
provider = 'CDS'
varname = 'tprate'
varname_out = 'precip'
period = 'seasonal'
GCM = 'ECMWF'
month = 2
quantile = 'tercile'

In [9]:
# Parameters
GCM = "ECMWF"
period = "seasonal"
month = 7


### list of GCMs 

In [10]:
list_GCMs = ['ECMWF','UKMO','METEO_FRANCE','CMCC','DWD', 'NCEP', 'JMA']

In [11]:
if GCM not in list_GCMs: 
    print(f"{GCM} is not in {', '.join(list_GCMs)}")

### path to the GCMs **hindcast datasets** and **climatologies** 

In [12]:
gcm_path = pathlib.Path(f'/media/nicolasf/END19101/ICU/data/{provider}')

### reads the hindcasts 

In [13]:
lfiles_hindcast = list(gcm_path.joinpath(f"{GCM}/{varname.upper()}").glob(f"ensemble_seas_forecasts_tprate_from_*_{month:02d}_{GCM}.netcdf"))

### only keep the years 1993 to 2016

In [14]:
lfiles_hindcast.sort()

In [15]:
def get_year(fname): 
    return int(fname.name.split("_")[5])

In [16]:
lfiles_hindcast = [x for x in lfiles_hindcast if get_year(x) <= 2016]

In [17]:
hindcast = xr.open_mfdataset(lfiles_hindcast, preprocess=C3S.preprocess_GCM)

In [18]:
hindcast = domains.extract_domain(hindcast, domain)

In [19]:
hindcast = C3S.convert_rainfall(hindcast, varin='tprate', varout='precip', leadvar='step', timevar='time', dropvar=True)


unit is m s**-1, converting to mm/day

now converting to mm/month, converted precipitation will be held in var = precip


In [20]:
if period == 'seasonal': 

    print("Calculating the seasonal (3 months) accumulations")

    hindcast = hindcast.rolling({'step':3}, min_periods=3, center=False).sum('step') 

    # get rid of the 2 first steps, which by definition contain missing values 

    hindcast = hindcast.sel(step=slice(3, None))

Calculating the seasonal (3 months) accumulations


### calculates the climatology 

In [21]:
hindcast.chunks

Frozen({'time': (1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), 'step': (3,), 'member': (25,), 'lat': (81,), 'lon': (121,)})

In [22]:
hindcast = hindcast.chunk({'time':-1, 'member':-1, 'lat':10, 'lon':10})

In [23]:
instances = len(hindcast.stack(instance=('time','member')).dropna('instance')['instance'])

In [24]:
instances

600

In [25]:
tercile_climatology = hindcast.quantile([0.3333, 0.6666], dim=['time','member'])

In [26]:
with ProgressBar(): 
    tercile_climatology = tercile_climatology.compute()

[                                        ] | 0% Completed |  0.0s

[####                                    ] | 11% Completed |  0.1s

[########                                ] | 21% Completed |  0.2s

[#############                           ] | 34% Completed |  0.3s

[##################                      ] | 45% Completed |  0.4s

[########################                ] | 60% Completed |  0.5s

[###########################             ] | 69% Completed |  0.6s

[############################            ] | 71% Completed |  0.7s

[############################            ] | 71% Completed |  0.8s

[############################            ] | 71% Completed |  0.9s

[############################            ] | 71% Completed |  1.0s

[############################            ] | 72% Completed |  1.1s

[#############################           ] | 74% Completed |  1.3s

[#############################           ] | 74% Completed |  1.4s

[#############################           ] | 74% Completed |  1.5s

[#############################           ] | 74% Completed |  1.6s

[##############################          ] | 76% Completed |  1.7s

[###############################         ] | 78% Completed |  1.8s

[###############################         ] | 78% Completed |  1.9s

[###############################         ] | 78% Completed |  2.0s

[###############################         ] | 79% Completed |  2.1s

[################################        ] | 81% Completed |  2.2s

[#################################       ] | 82% Completed |  2.3s

[#################################       ] | 82% Completed |  2.4s

[#################################       ] | 82% Completed |  2.5s

[#################################       ] | 83% Completed |  2.6s

[##################################      ] | 85% Completed |  2.7s

[##################################      ] | 86% Completed |  2.8s

[##################################      ] | 86% Completed |  2.9s

[##################################      ] | 86% Completed |  3.0s

[###################################     ] | 87% Completed |  3.1s

[###################################     ] | 89% Completed |  3.2s

[###################################     ] | 89% Completed |  3.3s

[###################################     ] | 89% Completed |  3.4s

[####################################    ] | 90% Completed |  3.5s

[####################################    ] | 92% Completed |  3.7s

[#####################################   ] | 93% Completed |  3.8s

[#####################################   ] | 93% Completed |  3.9s

[#####################################   ] | 93% Completed |  4.0s

[#####################################   ] | 94% Completed |  4.1s

[######################################  ] | 97% Completed |  4.2s

[####################################### ] | 97% Completed |  4.3s

[####################################### ] | 97% Completed |  4.4s

[####################################### ] | 97% Completed |  4.5s

[####################################### ] | 99% Completed |  4.6s

[########################################] | 100% Completed |  4.7s




In [27]:
tercile_climatology.attrs['ninstances'] = instances

### save the climatology 

In [28]:
gcm_path

PosixPath('/media/nicolasf/END19101/ICU/data/CDS')

In [29]:
opath_clim = gcm_path.joinpath(f"CLIMATOLOGY/{GCM}/{varname.upper()}")

In [30]:
if not opath_clim.exists(): 
    opath_clim.mkdir(parents=True)

In [31]:
tercile_climatology.to_netcdf(opath_clim.joinpath(f"{GCM}_{period}_{quantile}_climatology_{month:02d}.netcdf"))

### Now derive the categories for the hindcast using the above climatology 

In [32]:
terciles_category = C3S.get_GCM_category_digitize(hindcast, tercile_climatology.squeeze(), varname=varname_out, dim='quantile')

In [33]:
terciles_category

Unnamed: 0,Array,Chunk
Bytes,134.60 MiB,1.37 MiB
Shape,"(24, 3, 25, 81, 121)","(24, 3, 25, 10, 10)"
Count,2015 Tasks,117 Chunks
Type,int64,numpy.ndarray
"Array Chunk Bytes 134.60 MiB 1.37 MiB Shape (24, 3, 25, 81, 121) (24, 3, 25, 10, 10) Count 2015 Tasks 117 Chunks Type int64 numpy.ndarray",3  24  121  81  25,

Unnamed: 0,Array,Chunk
Bytes,134.60 MiB,1.37 MiB
Shape,"(24, 3, 25, 81, 121)","(24, 3, 25, 10, 10)"
Count,2015 Tasks,117 Chunks
Type,int64,numpy.ndarray


### Now calculate the probabilities as the proportion (along the `member` dimension) of members in each categories 

In [34]:
with ProgressBar(): 
    terciles_category_percent = C3S.calculate_quantiles_probabilities(terciles_category, ncategories=3)

[                                        ] | 0% Completed |  0.0s

[####                                    ] | 10% Completed |  0.1s

[#######                                 ] | 18% Completed |  0.2s

[###########                             ] | 28% Completed |  0.3s

[##############                          ] | 36% Completed |  0.4s

[#####################                   ] | 53% Completed |  0.5s

[###########################             ] | 68% Completed |  0.7s

[###########################             ] | 69% Completed |  1.0s

[###########################             ] | 69% Completed |  1.7s

[###########################             ] | 69% Completed |  2.0s

[###########################             ] | 69% Completed |  3.1s

[###########################             ] | 69% Completed |  3.7s

[###########################             ] | 69% Completed |  3.9s

[###########################             ] | 69% Completed |  4.1s

[###########################             ] | 69% Completed |  4.6s

[############################            ] | 70% Completed |  5.1s

[############################            ] | 70% Completed |  5.8s

[############################            ] | 70% Completed |  6.4s

[############################            ] | 70% Completed |  7.1s

[############################            ] | 70% Completed |  8.0s

[############################            ] | 70% Completed |  8.6s

[############################            ] | 70% Completed |  9.1s

[############################            ] | 70% Completed |  9.6s

[############################            ] | 70% Completed | 10.4s

[############################            ] | 70% Completed | 11.4s

[############################            ] | 70% Completed | 11.7s

[############################            ] | 70% Completed | 12.9s

[############################            ] | 70% Completed | 13.4s

[############################            ] | 70% Completed | 13.9s

[############################            ] | 70% Completed | 14.4s

[############################            ] | 70% Completed | 14.8s

[############################            ] | 70% Completed | 15.2s

[############################            ] | 70% Completed | 16.5s

[############################            ] | 70% Completed | 17.1s

[############################            ] | 70% Completed | 17.5s

[############################            ] | 71% Completed | 18.2s

[############################            ] | 71% Completed | 18.4s

[############################            ] | 71% Completed | 18.5s

[############################            ] | 72% Completed | 18.6s

[############################            ] | 72% Completed | 18.8s

[############################            ] | 72% Completed | 19.1s

[#############################           ] | 72% Completed | 19.3s

[#############################           ] | 72% Completed | 19.9s

[#############################           ] | 73% Completed | 20.6s

[#############################           ] | 73% Completed | 20.9s

[#############################           ] | 73% Completed | 21.5s

[#############################           ] | 73% Completed | 22.4s

[#############################           ] | 73% Completed | 23.6s

[#############################           ] | 73% Completed | 23.9s

[#############################           ] | 73% Completed | 24.6s

[#############################           ] | 74% Completed | 25.5s

[#############################           ] | 74% Completed | 26.1s

[#############################           ] | 74% Completed | 26.8s

[#############################           ] | 74% Completed | 27.1s

[#############################           ] | 74% Completed | 27.9s

[#############################           ] | 74% Completed | 28.4s

[#############################           ] | 74% Completed | 29.0s

[#############################           ] | 74% Completed | 29.3s

### plots 

In [None]:
terciles_category_percent.isel(time=0, step=0)[varname_out].plot(col=quantile)

In [None]:
terciles_category_percent.isel(time=-1, step=0)[varname_out].plot(col=quantile)

### most likely category 

In [None]:
terciles_category_percent.idxmax(dim=quantile)[varname_out].isel(time=0, step=0).plot(levels=[1,2,3,4])

In [None]:
terciles_category_percent.idxmax(dim=quantile)[varname_out].isel(time=-1, step=0).plot(levels=[1,2,3,4])

In [None]:
GCM

In [None]:
period 

In [None]:
quantile

### now saves the tercile probabilities for each year, for the corresponding month 

In [None]:
opath_probs = gcm_path.joinpath(f"{GCM}/{quantile}_probs/{varname.upper()}")

In [None]:
if not opath_probs.exists(): 
    opath_probs.mkdir(parents=True)

In [None]:
for date in terciles_category_percent.time.to_index(): 
    sub = terciles_category_percent.sel(time=date)
    sub = sub.expand_dims({'time':[date]})
    sub.to_netcdf(opath_probs.joinpath(f"{GCM}_{quantile}s_probs_{date:%Y-%m}.netcdf"))