### download the rolling hindcasts for all GCMs from the Copernicus Climate Datastore

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import sys
import pathlib
from datetime import datetime
from dateutil.relativedelta import relativedelta
import dateparser

In [None]:
from dask import delayed, compute
from dask.diagnostics import ProgressBar

In [None]:
import numpy as np

In [None]:
HOME = pathlib.Path.home()

In [None]:
CWD = pathlib.Path.cwd()

In [None]:
CWD

### import local functions for the downloading of the C3S forecasts 

In [None]:
sys.path.append('../..')

In [None]:
from ICU_Water_Watch import C3S, domains

### show the predefined domains 

In [None]:
domains.domains

### provider (always CDS for now)

In [None]:
provider = 'CDS'

### variable name

In [None]:
var_name = 'tprate'

### mapping GCMs to systems 

#### The mapping between the GCM name and the latest system version available for the forecasts needs to be checked [on the CDS website](https://cds.climate.copernicus.eu/)

In [None]:
from collections import OrderedDict as OD

In [None]:
dict_systems = OD(ECMWF=5,
                   UKMO=600, 
                   METEO_FRANCE=8, 
                   DWD=21, 
                   CMCC=35, 
                   NCEP=2,
                   JMA=2, 
                   ECCC=2)

In [None]:
dict_systems.keys()

### lag in months (if need to download older forecasts)

In [None]:
lag = 0

### get today's date 

In [None]:
date = dateparser.parse('today')

### apply lag 

In [None]:
date = date - relativedelta(months=lag)

In [None]:
date

### generate the dates for the download of the rolling hindcasts 

In [None]:
dates = [datetime(y, date.month, 1) for y in range(1993, 2016 + 1)]

In [None]:
dates

### path to download the rolling hindcasts 

In [None]:
gcm_path = pathlib.Path(f'/media/nicolasf/END19101/ICU/data/{provider}/operational/hindcasts')

In [None]:
if not(gcm_path.exists()): 
    gcm_path.mkdir(parents=True)

### Now loops over the dates, creates a dictionnary containing the keywords arguments, and pass to dask for parallel download 

In [None]:
for date in dates: 
    
    downloaded_files = []

    for GCM in dict_systems.keys():

        args = dict(GCM=GCM,
        system=dict_systems[GCM],
        varname=var_name,
        year=date.year,
        month=date.month,
        leadtimes=[1, 2, 3, 4, 5, 6],
        opath=gcm_path.joinpath(GCM).joinpath(var_name.upper()),
        domain=domains.domains['C3S_download'],
        file_format='netcdf',
        level='surface',
        max_retry=3)

        downloaded_files.append(delayed(C3S.download)(**args))

    with ProgressBar():
        downloaded_files = compute(downloaded_files)