### download the rolling hindcasts for all GCMs from the Copernicus Climate Datastore

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import sys
import pathlib
from datetime import datetime
from dateutil.relativedelta import relativedelta
import dateparser

In [3]:
from dask import delayed, compute
from dask.diagnostics import ProgressBar

In [4]:
import numpy as np

In [5]:
HOME = pathlib.Path.home()

In [6]:
CWD = pathlib.Path.cwd()

In [7]:
CWD

PosixPath('/home/nicolasf/operational/ICU/development/hotspots/code/ICU_Water_Watch/notebooks/C3S')

### import local functions for the downloading of the C3S forecasts 

In [8]:
sys.path.append('../..')

In [9]:
from ICU_Water_Watch import C3S, domains

### show the predefined domains 

In [10]:
domains.domains

{'Tropical_Pacific': [140, 220, -25, 25],
 'SW_Pacific': [172.5, 190, -22.5, -12],
 'Fiji': [175, 183, -21, -15],
 'NZ': [161, 181, -50, -30],
 'Pacific': [140, 240, -50, 25],
 'C3S_download': [100, 240, -50, 30],
 'Water_Watch': [120, 240, -38, 22.5]}

### provider (always CDS for now)

In [11]:
provider = 'CDS'

### variable name

In [12]:
var_name = 'tprate'

### mapping GCMs to systems 

#### The mapping between the GCM name and the latest system version available for the forecasts needs to be checked [on the CDS website](https://cds.climate.copernicus.eu/)

In [13]:
from collections import OrderedDict as OD

In [14]:
dict_systems = OD(ECMWF=5,
                   UKMO=600, 
                   METEO_FRANCE=8, 
                   DWD=21, 
                   CMCC=35, 
                   NCEP=2,
                   JMA=2, 
                   ECCC=2)

In [15]:
dict_systems.keys()

odict_keys(['ECMWF', 'UKMO', 'METEO_FRANCE', 'DWD', 'CMCC', 'NCEP', 'JMA', 'ECCC'])

### lag in months (if need to download older forecasts)

In [16]:
lag = 0

### get today's date 

In [17]:
date = dateparser.parse('today')

### apply lag 

In [18]:
date = date - relativedelta(months=lag)

In [19]:
date

datetime.datetime(2021, 10, 19, 13, 59, 11, 929445)

### generate the dates for the download of the rolling hindcasts 

In [20]:
dates = [datetime(y, date.month, 1) for y in range(1993, 2016 + 1)]

In [21]:
dates

[datetime.datetime(1993, 10, 1, 0, 0),
 datetime.datetime(1994, 10, 1, 0, 0),
 datetime.datetime(1995, 10, 1, 0, 0),
 datetime.datetime(1996, 10, 1, 0, 0),
 datetime.datetime(1997, 10, 1, 0, 0),
 datetime.datetime(1998, 10, 1, 0, 0),
 datetime.datetime(1999, 10, 1, 0, 0),
 datetime.datetime(2000, 10, 1, 0, 0),
 datetime.datetime(2001, 10, 1, 0, 0),
 datetime.datetime(2002, 10, 1, 0, 0),
 datetime.datetime(2003, 10, 1, 0, 0),
 datetime.datetime(2004, 10, 1, 0, 0),
 datetime.datetime(2005, 10, 1, 0, 0),
 datetime.datetime(2006, 10, 1, 0, 0),
 datetime.datetime(2007, 10, 1, 0, 0),
 datetime.datetime(2008, 10, 1, 0, 0),
 datetime.datetime(2009, 10, 1, 0, 0),
 datetime.datetime(2010, 10, 1, 0, 0),
 datetime.datetime(2011, 10, 1, 0, 0),
 datetime.datetime(2012, 10, 1, 0, 0),
 datetime.datetime(2013, 10, 1, 0, 0),
 datetime.datetime(2014, 10, 1, 0, 0),
 datetime.datetime(2015, 10, 1, 0, 0),
 datetime.datetime(2016, 10, 1, 0, 0)]

### path to download the rolling hindcasts 

In [22]:
gcm_path = pathlib.Path(f'/media/nicolasf/END19101/ICU/data/{provider}/operational/hindcasts')

In [23]:
if not(gcm_path.exists()): 
    gcm_path.mkdir(parents=True)

### Now loops over the dates, creates a dictionnary containing the keywords arguments, and pass to dask for parallel download 

In [24]:
for date in dates: 
    
    downloaded_files = []

    for GCM in dict_systems.keys():

        args = dict(GCM=GCM,
        system=dict_systems[GCM],
        varname=var_name,
        year=date.year,
        month=date.month,
        leadtimes=[1, 2, 3, 4, 5, 6],
        opath=gcm_path.joinpath(GCM).joinpath(var_name.upper()),
        domain=domains.domains['C3S_download'],
        file_format='netcdf',
        level='surface',
        max_retry=3)

        downloaded_files.append(delayed(C3S.download)(**args))

    with ProgressBar():
        downloaded_files = compute(downloaded_files)

[                                        ] | 0% Completed |  0.0s
/media/nicolasf/END19101/ICU/data/CDS/operational/hindcasts/ECMWF/TPRATE/ensemble_seas_forecasts_tprate_from_1993_10_ECMWF.netcdf exists already on disk, skipping download and returning path


/media/nicolasf/END19101/ICU/data/CDS/operational/hindcasts/CMCC/TPRATE/ensemble_seas_forecasts_tprate_from_1993_10_CMCC.netcdf exists already on disk, skipping download and returning path


/media/nicolasf/END19101/ICU/data/CDS/operational/hindcasts/METEO_FRANCE/TPRATE/ensemble_seas_forecasts_tprate_from_1993_10_METEO_FRANCE.netcdf exists already on disk, skipping download and returning path

/media/nicolasf/END19101/ICU/data/CDS/operational/hindcasts/ECCC/TPRATE/ensemble_seas_forecasts_tprate_from_1993_10_ECCC.netcdf exists already on disk, skipping download and returning path

/media/nicolasf/END19101/ICU/data/CDS/operational/hindcasts/NCEP/TPRATE/ensemble_seas_forecasts_tprate_from_1993_10_NCEP.netcdf exists already on disk, sk