### download the rolling hindcasts for all GCMs from the Copernicus Climate Datastore

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import sys
from subprocess import call 
from shutil import which 
import pathlib
from datetime import datetime
from dateutil.relativedelta import relativedelta
import dateparser
import yaml
from yaml.loader import SafeLoader
import ipynbname

In [3]:
from dask import delayed, compute
from dask.diagnostics import ProgressBar

In [4]:
import numpy as np

In [5]:
HOME = pathlib.Path.home()

In [6]:
CWD = pathlib.Path.cwd()

In [7]:
CWD

PosixPath('/home/nicolasf/operational/ICU/development/hotspots/code/ICU_Water_Watch/notebooks/C3S')

### import local functions for the downloading of the C3S forecasts 

In [8]:
sys.path.append('../..')

In [9]:
from ICU_Water_Watch import C3S, domains

  from pandas import Int64Index as NumericIndex


### parameters for papermill

In [10]:
provider = 'CDS' # should not change
varname = 'tprate' # should not change
domain_name = 'C3S_download'
# if one wants to download other reforecasts than the ones corresponding to the current month
lag = 0
# path where to save the hindcasts / reforecasts
gcm_path = f'/media/nicolasf/END19101/ICU/data/{provider}/operational/hindcasts'
config_yaml = './CDS_config.yaml' 

In [11]:
# Parameters
lag = 0


### reads the yaml file, mapping GCM to system 

In [12]:
# Open the file and load the file
with open(config_yaml) as f:
    dict_systems = yaml.load(f, Loader=SafeLoader)

In [13]:
dict_systems

{'ECMWF': 5,
 'UKMO': 601,
 'METEO_FRANCE': 8,
 'DWD': 21,
 'CMCC': 35,
 'NCEP': 2,
 'JMA': 3,
 'ECCC_GEM_NEMO': 2,
 'ECCC_CanCM4i': 3}

### casts the paths to `pathlib.Path` objects 

In [14]:
gcm_path = pathlib.Path(gcm_path)

### get today's date 

In [15]:
date = dateparser.parse('today')

In [16]:
date

datetime.datetime(2022, 3, 15, 15, 41, 9, 303283)

### apply lag 

In [17]:
date = date - relativedelta(months=lag)

In [18]:
print(f"the hindcast data will be downloaded for all re-forecasts initialised in {date:%B}")

the hindcast data will be downloaded for all re-forecasts initialised in March


### generate the dates for the download of the rolling hindcasts 

In [19]:
dates = [datetime(y, date.month, 1) for y in range(1993, 2016 + 1)]

In [20]:
dates

[datetime.datetime(1993, 3, 1, 0, 0),
 datetime.datetime(1994, 3, 1, 0, 0),
 datetime.datetime(1995, 3, 1, 0, 0),
 datetime.datetime(1996, 3, 1, 0, 0),
 datetime.datetime(1997, 3, 1, 0, 0),
 datetime.datetime(1998, 3, 1, 0, 0),
 datetime.datetime(1999, 3, 1, 0, 0),
 datetime.datetime(2000, 3, 1, 0, 0),
 datetime.datetime(2001, 3, 1, 0, 0),
 datetime.datetime(2002, 3, 1, 0, 0),
 datetime.datetime(2003, 3, 1, 0, 0),
 datetime.datetime(2004, 3, 1, 0, 0),
 datetime.datetime(2005, 3, 1, 0, 0),
 datetime.datetime(2006, 3, 1, 0, 0),
 datetime.datetime(2007, 3, 1, 0, 0),
 datetime.datetime(2008, 3, 1, 0, 0),
 datetime.datetime(2009, 3, 1, 0, 0),
 datetime.datetime(2010, 3, 1, 0, 0),
 datetime.datetime(2011, 3, 1, 0, 0),
 datetime.datetime(2012, 3, 1, 0, 0),
 datetime.datetime(2013, 3, 1, 0, 0),
 datetime.datetime(2014, 3, 1, 0, 0),
 datetime.datetime(2015, 3, 1, 0, 0),
 datetime.datetime(2016, 3, 1, 0, 0)]

### path to download the rolling hindcasts 

In [21]:
if not(gcm_path.exists()): 
    gcm_path.mkdir(parents=True)

### Now loops over the dates, creates a dictionnary containing the keywords arguments, and pass to dask for parallel download 

In [22]:
for date in dates: 
    
    downloaded_files = []

    for GCM in dict_systems.keys():
        
        if 'ECCC' in GCM: 
            
            GCM_API_name = GCM.split('_')[0]
            
            GCM_path = gcm_path.joinpath(GCM).joinpath(varname.upper())
            
            print(f"processing ECCC, path should be {str(GCM_path)}")
        
            args = dict(GCM=GCM_API_name,
            system=dict_systems[GCM],
            varname=varname,
            year=date.year,
            month=date.month,
            leadtimes=[1, 2, 3, 4, 5, 6],
            opath=GCM_path,
            domain=domains.domains[domain_name],
            file_format='netcdf',
            level='surface',
            max_retry=3)
        
        else: 

            args = dict(GCM=GCM,
            system=dict_systems[GCM],
            varname=varname,
            year=date.year,
            month=date.month,
            leadtimes=[1, 2, 3, 4, 5, 6],
            opath=gcm_path.joinpath(GCM).joinpath(varname.upper()),
            domain=domains.domains[domain_name],
            file_format='netcdf',
            level='surface',
            max_retry=3)
            

        downloaded_files.append(delayed(C3S.download)(**args))

    with ProgressBar():
        downloaded_files = compute(downloaded_files)

processing ECCC, path should be /media/nicolasf/END19101/ICU/data/CDS/operational/hindcasts/ECCC_GEM_NEMO/TPRATE
processing ECCC, path should be /media/nicolasf/END19101/ICU/data/CDS/operational/hindcasts/ECCC_CanCM4i/TPRATE
[                                        ] | 0% Completed |  0.0s
/media/nicolasf/END19101/ICU/data/CDS/operational/hindcasts/ECCC_GEM_NEMO/TPRATE/ensemble_seas_forecasts_tprate_from_1993_03_ECCC.netcdf exists already on disk, skipping download and returning path


/media/nicolasf/END19101/ICU/data/CDS/operational/hindcasts/CMCC/TPRATE/ensemble_seas_forecasts_tprate_from_1993_03_CMCC.netcdf exists already on disk, skipping download and returning path


/media/nicolasf/END19101/ICU/data/CDS/operational/hindcasts/NCEP/TPRATE/ensemble_seas_forecasts_tprate_from_1993_03_NCEP.netcdf exists already on disk, skipping download and returning path


/media/nicolasf/END19101/ICU/data/CDS/operational/hindcasts/JMA/TPRATE/ensemble_seas_forecasts_tprate_from_1993_03_JMA.netcdf e

### convert the notebook to HTML 

### Note: this does not work with papermill

In [23]:
# jupyter = which('jupyter')

# nb_name = ipynbname.name()

# nb_name

# cmd = f"{jupyter} nbconvert --to html {nb_name}.ipynb"

# r = call(cmd, shell=True)