### download the rolling hindcasts for all GCMs from the Copernicus Climate Datastore

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import sys
from subprocess import call 
from shutil import which 
import pathlib
from datetime import datetime
from dateutil.relativedelta import relativedelta
import dateparser
import yaml
from yaml.loader import SafeLoader

In [3]:
from dask import delayed, compute
from dask.diagnostics import ProgressBar

In [4]:
import numpy as np
import pandas as pd

In [5]:
HOME = pathlib.Path.home()

In [6]:
CWD = pathlib.Path.cwd()

In [7]:
CWD

PosixPath('/home/nicolasf/operational/ICU/development/hotspots/code/ICU_Water_Watch/notebooks/C3S')

### import local functions for the downloading of the C3S forecasts 

In [8]:
sys.path.append('../..')

In [9]:
from ICU_Water_Watch import C3S, domains

### parameters for papermill

In [10]:
provider = 'CDS' # should not change
varname = 'tprate' # can be ['tprate' or 't2m']
domain_name = 'C3S_download'
# if one wants to download other reforecasts than the ones corresponding to the current month
lag = 0
# path where to save the hindcasts / reforecasts
gcm_path = f'/media/nicolasf/END19101/ICU/data/{provider}/operational/hindcasts'
config_yaml = './CDS_config.yaml' 

### reads the yaml file, mapping GCM to system 

Note that the forecast system to system number mapping is available at: 
    
- [https://confluence.ecmwf.int/display/CKB/Description+of+the+C3S+seasonal+multi-system](https://confluence.ecmwf.int/display/CKB/Description+of+the+C3S+seasonal+multi-system)

#### it can be read directly into pandas 

In [12]:
url = 'https://confluence.ecmwf.int/display/CKB/Description+of+the+C3S+seasonal+multi-system'

In [13]:
systems_table = pd.read_html(url)

In [14]:
systems_table = systems_table[1]

In [15]:
systems_table

Unnamed: 0,Forecasting centre,Forecasting system name,CDS 'system' value
0,ECMWF,System 4,4
1,ECMWF,SEAS5,5
2,Météo-France,System 5,5
3,Météo-France,System 6,6
4,Météo-France,System 7,7
5,Météo-France,System 8,8
6,CMCC,SPS3,3
7,CMCC,SPS3.5,35
8,DWD,GCFS2.0,2
9,DWD,GCFS2.1,21


In [16]:
# Open the file and load the file
with open(config_yaml) as f:
    dict_systems = yaml.load(f, Loader=SafeLoader)

In [17]:
dict_systems

{'ECMWF': 5,
 'UKMO': 601,
 'METEO_FRANCE': 8,
 'DWD': 21,
 'CMCC': 35,
 'NCEP': 2,
 'JMA': 3,
 'ECCC_CanCM4i': 2,
 'ECCC_GEM5_NEMO': 3}

### casts the paths to `pathlib.Path` objects 

In [18]:
gcm_path = pathlib.Path(gcm_path)

### get today's date 

In [19]:
date = dateparser.parse('today')

In [20]:
date

datetime.datetime(2022, 6, 9, 16, 40, 24, 151815)

### apply lag 

In [21]:
date = date - relativedelta(months=lag)

In [22]:
print(f"the hindcast data will be downloaded for all re-forecasts initialised in {date:%B}")

the hindcast data will be downloaded for all re-forecasts initialised in June


### generate the dates for the download of the rolling hindcasts 

In [23]:
dates = [datetime(y, date.month, 1) for y in range(1993, 2016 + 1)]

In [24]:
dates

[datetime.datetime(1993, 6, 1, 0, 0),
 datetime.datetime(1994, 6, 1, 0, 0),
 datetime.datetime(1995, 6, 1, 0, 0),
 datetime.datetime(1996, 6, 1, 0, 0),
 datetime.datetime(1997, 6, 1, 0, 0),
 datetime.datetime(1998, 6, 1, 0, 0),
 datetime.datetime(1999, 6, 1, 0, 0),
 datetime.datetime(2000, 6, 1, 0, 0),
 datetime.datetime(2001, 6, 1, 0, 0),
 datetime.datetime(2002, 6, 1, 0, 0),
 datetime.datetime(2003, 6, 1, 0, 0),
 datetime.datetime(2004, 6, 1, 0, 0),
 datetime.datetime(2005, 6, 1, 0, 0),
 datetime.datetime(2006, 6, 1, 0, 0),
 datetime.datetime(2007, 6, 1, 0, 0),
 datetime.datetime(2008, 6, 1, 0, 0),
 datetime.datetime(2009, 6, 1, 0, 0),
 datetime.datetime(2010, 6, 1, 0, 0),
 datetime.datetime(2011, 6, 1, 0, 0),
 datetime.datetime(2012, 6, 1, 0, 0),
 datetime.datetime(2013, 6, 1, 0, 0),
 datetime.datetime(2014, 6, 1, 0, 0),
 datetime.datetime(2015, 6, 1, 0, 0),
 datetime.datetime(2016, 6, 1, 0, 0)]

### path to download the rolling hindcasts 

In [25]:
if not(gcm_path.exists()): 
    gcm_path.mkdir(parents=True)

### Now loops over the dates, creates a dictionnary containing the keywords arguments, and pass to dask for parallel download 

In [26]:
for date in dates: 
    
    downloaded_files = []

    for GCM in dict_systems.keys():
        
        if 'ECCC' in GCM: 
            
            GCM_API_name = GCM.split('_')[0]
            
            GCM_path = gcm_path.joinpath(GCM).joinpath(varname.upper())
            
            print(f"processing ECCC, path should be {str(GCM_path)}")
        
            args = dict(GCM=GCM_API_name,
            system=dict_systems[GCM],
            varname=varname,
            year=date.year,
            month=date.month,
            leadtimes=[1, 2, 3, 4, 5, 6],
            opath=GCM_path,
            domain=domains.domains[domain_name],
            file_format='netcdf',
            level='surface',
            max_retry=3)
        
        else: 

            args = dict(GCM=GCM,
            system=dict_systems[GCM],
            varname=varname,
            year=date.year,
            month=date.month,
            leadtimes=[1, 2, 3, 4, 5, 6],
            opath=gcm_path.joinpath(GCM).joinpath(varname.upper()),
            domain=domains.domains[domain_name],
            file_format='netcdf',
            level='surface',
            max_retry=3)
            

        downloaded_files.append(delayed(C3S.download)(**args))

    with ProgressBar():
        downloaded_files = compute(downloaded_files)

processing ECCC, path should be /media/nicolasf/END19101/ICU/data/CDS/operational/hindcasts/ECCC_CanCM4i/TPRATE
processing ECCC, path should be /media/nicolasf/END19101/ICU/data/CDS/operational/hindcasts/ECCC_GEM5_NEMO/TPRATE
[                                        ] | 0% Completed |  0.0s
/media/nicolasf/END19101/ICU/data/CDS/operational/hindcasts/ECCC_CanCM4i/TPRATE/ensemble_seas_forecasts_tprate_from_1993_06_ECCC.netcdf exists already on disk, skipping download and returning path


/media/nicolasf/END19101/ICU/data/CDS/operational/hindcasts/ECCC_GEM5_NEMO/TPRATE/ensemble_seas_forecasts_tprate_from_1993_06_ECCC.netcdf exists already on disk, skipping download and returning path

[########################################] | 100% Completed |  0.1s
processing ECCC, path should be /media/nicolasf/END19101/ICU/data/CDS/operational/hindcasts/ECCC_CanCM4i/TPRATE
processing ECCC, path should be /media/nicolasf/END19101/ICU/data/CDS/operational/hindcasts/ECCC_GEM5_NEMO/TPRATE
[              

### convert the notebook to HTML 

### Note: this does not work with papermill

In [27]:
# jupyter = which('jupyter')

# nb_name = ipynbname.name()

# nb_name

# cmd = f"{jupyter} nbconvert --to html {nb_name}.ipynb"

# r = call(cmd, shell=True)