In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
### os 
import os 
import sys

### datetimes 
from datetime import datetime, timedelta
from dateutil.relativedelta import relativedelta
from calendar import month_name

### scipy 
import numpy as np 
import pandas as pd
import xarray as xr

In [3]:
import pathlib
HOME = pathlib.Path.home()
CWD = pathlib.Path.cwd() 

In [4]:
sys.path.append('../..')

In [5]:
from src import C3S

### provider (always CDS for now)

In [6]:
provider = 'CDS'

### variable name

In [7]:
varname = 'tprate'

### period (`monthly` or `seasonal`)

In [8]:
period = 'monthly'

### list of GCMs 

In [9]:
list_GCMs = ['ECMWF','UKMO','METEO_FRANCE','CMCC','DWD', 'NCEP', 'JMA', 'ECCC']

### lag in months (if need to process older forecasts)

In [10]:
lag = 0 

### get today's date 

In [11]:
date = datetime.utcnow()

### apply lag 

In [12]:
date = date - relativedelta(months=lag)

### path to the GCMs **hindcast datasets** and **climatologies** 

In [13]:
gcm_path = pathlib.Path('/media/nicolasf/END19101/ICU/data/CDS')

### path to where the **realtime forecasts** have been downloaded 

In [14]:
realtime_path  = gcm_path.joinpath("realtime") 

### output path for the probabilistic forecast files 

In [15]:
opath = CWD.parents[1].joinpath("outputs/C3S")

In [16]:
if not opath.exists(): 
    opath.mkdir(parents=True)

### get year and month 

In [17]:
year, month =  date.year, date.month

### loop over the GCMs here

In [None]:
for GCM in list_GCMs: 

    ### path to the CLIMATOLOGICAL terciles and deciles, 
    
    ### calculated over all the ensembles and month (for each initial month, i.e. leadtime dependent climatology) 
    
    clim_path = gcm_path.joinpath(f'CLIMATOLOGY/{GCM}')

    ### loads the monthly tercile, decile and percentile climatologies 

    tercile_climatology = xr.open_dataset(clim_path.joinpath(f"{GCM}_{period}_tercile_climatology_v2.netcdf"), engine='netcdf4')
    decile_climatology = xr.open_dataset(clim_path.joinpath(f"{GCM}_{period}_decile_climatology_v2.netcdf"), engine='netcdf4')
    percentile_climatology = xr.open_dataset(clim_path.joinpath(f"{GCM}_{period}_percentile_climatology_v2.netcdf"), engine='netcdf4')

    ### Open the (latest) realtime forecasts  

    print(f"{50*'-'}\nReading forecasts issued {year}-{str(month).zfill(2)} for GCM {GCM}")

    x = xr.open_dataset(gcm_path.joinpath(f"realtime/{GCM}/{varname.upper()}/ensemble_seas_forecasts_{varname}_from_{year}_{str(month).zfill(2)}_{GCM}.netcdf"), engine='netcdf4')

    ### preprocess (harmonize the variable names, sort the latitudes, etc )

    x = C3S.preprocess_GCM(x)

    ### convert from mm/day to mm/month 

    x = C3S.convert_rainfall(x, varin='tprate', varout='precip', leadvar='step', timevar='time', dropvar=True)
    
    ### just in case, remove potential missing fields (members)
    
    x = x.dropna(dim='member')
    
    # calculates the seasonal values if period == seasonal
    
    if period == 'seasonal': 
        
        print("Calculating the seasonal (3 months) accumulations")
    
        x = x.rolling({'step':3}, min_periods=3, center=False).sum('step') 
    
        # get rid of the 2 first steps, which by definition contain missing values 
    
        x = x.sel(step=slice(3, None))

    ### checks that the initial month corresponds indeed to what we defined earlier 

    if (x.time.dt.year != year) or (x.time.dt.month != month): 
        
        print(f"issue with the initial date in the latest forecast, expected {year}-{month}, got {x.time.dt.year}-{x.time.dt.month}")

    ### selects the appropriate month in the terciles or deciles climatology (again, initialisation time, everything else is leadtime dependent)

    clim_terciles = tercile_climatology.sel(month=month)

    clim_deciles = decile_climatology.sel(month=month)
    
    clim_percentiles = percentile_climatology.sel(month=month)

    ### Now calculates the tercile and decile category for each member 

    terciles_category = C3S.get_GCM_category_digitize(x, clim_terciles, varname='precip', dim='quantile')

    deciles_category = C3S.get_GCM_category_digitize(x, clim_deciles, varname='precip', dim='quantile')
    
    percentiles_category = C3S.get_GCM_category_digitize(x, clim_percentiles, varname='precip', dim='quantile')

    ### calculate the proportion of members in each category: i.e. the PROBABILITY for each category

    terciles_category_percent = C3S.calculate_quantiles_probabilities(terciles_category, ncategories=3)

    deciles_category_percent = C3S.calculate_quantiles_probabilities(deciles_category, ncategories=10)
    
    percentiles_category_percent = C3S.calculate_quantiles_probabilities(percentiles_category, ncategories=21)

    ### creates a dummy 'GCM' dimension, and saves the tercile and decile probabilities to disk 

    terciles_category_percent = terciles_category_percent.expand_dims(dim={'GCM':[GCM]}, axis=0) 

    deciles_category_percent = deciles_category_percent.expand_dims(dim={'GCM':[GCM]}, axis=0) 
    
    percentiles_category_percent = percentiles_category_percent.expand_dims(dim={'GCM':[GCM]}, axis=0)

    ### saves to disk 

    terciles_category_percent.to_netcdf(opath.joinpath(f"{period}_terciles_probabilities_from_{terciles_category_percent.time.dt.strftime('%Y-%m').data[0]}_{GCM}.netcdf")) 

    deciles_category_percent.to_netcdf(opath.joinpath(f"{period}_deciles_probabilities_from_{deciles_category_percent.time.dt.strftime('%Y-%m').data[0]}_{GCM}.netcdf")) 

    percentiles_category_percent.to_netcdf(opath.joinpath(f"{period}_percentiles_probabilities_from_{percentiles_category_percent.time.dt.strftime('%Y-%m').data[0]}_{GCM}.netcdf")) 

    print(f"\n{GCM} {period} forecasts from {year}-{str(month).zfill(2)} processed ...\n")



--------------------reading forecasts issued 2021-05 for GCM ECMWF

unit is m s**-1, converting to mm/day

now converting to mm/month, converted precipitation will be held in var = precip
