### download the last month of the CDS raw forecasts 

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import sys
from subprocess import call 
from shutil import which 
import pathlib
from datetime import datetime
from dateutil.relativedelta import relativedelta
import dateparser
import yaml
from yaml.loader import SafeLoader
import ipynbname

import numpy as np
import xarray as xr

In [3]:
from dask import delayed, compute
from dask.diagnostics import ProgressBar

In [4]:
HOME = pathlib.Path.home()

In [5]:
CWD = pathlib.Path.cwd()

In [6]:
CWD

PosixPath('/home/nicolasf/operational/ICU/development/hotspots/code/ICU_Water_Watch/notebooks/C3S')

### import local functions for the downloading of the C3S forecasts 

In [7]:
sys.path.append('../..')

In [8]:
from ICU_Water_Watch import C3S, domains

  from pandas import Int64Index as NumericIndex


### parameters for papermill

In [9]:
provider = 'CDS' # should not change
varname = 'tprate' # should not change
domain_name = "C3S_download" # domain name, see below 
# if one wants to download other forecasts than the ones corresponding to the current month
lag = 0
# path where to save the hindcasts / reforecasts
gcm_path = f'/media/nicolasf/END19101/ICU/data/{provider}/operational/forecasts'
config_yaml = './CDS_config.yaml' 

### reads the yaml file, mapping GCM to system 

In [10]:
# Open the file and load the file
with open(config_yaml) as f:
    dict_systems = yaml.load(f, Loader=SafeLoader)

In [11]:
dict_systems

{'ECMWF': 5,
 'UKMO': 601,
 'METEO_FRANCE': 8,
 'DWD': 21,
 'CMCC': 35,
 'NCEP': 2,
 'JMA': 3,
 'ECCC_GEM_NEMO': 2,
 'ECCC_CanCM4i': 3}

### casts the paths to `pathlib.Path` objects 

In [12]:
gcm_path = pathlib.Path(gcm_path)

In [13]:
if not(gcm_path.exists()): 
    gcm_path.mkdir(parents=True)

### get today's date 

In [14]:
date = dateparser.parse('today')

In [15]:
date

datetime.datetime(2022, 3, 15, 15, 46, 35, 194674)

### apply lag 

In [16]:
date = date - relativedelta(months=lag)

In [17]:
print(f"will download the forecasts issued in {date:%B %Y}")

will download the forecasts issued in March 2022


### use dask to parallelize the download of the forecasts 

#### build the queue

In [18]:
downloaded_files = []

for GCM in dict_systems.keys():
    
    if 'ECCC' in GCM:
        
        GCM_API_name = GCM.split('_')[0]
        
        GCM_path = gcm_path.joinpath(GCM).joinpath(varname.upper())
        
        print(f"processing ECCC, path should be {str(GCM_path)}")
    
        args = dict(GCM=GCM_API_name,
        system=dict_systems[GCM], 
        varname=varname,
        year=date.year,
        month=date.month,
        leadtimes=[1, 2, 3, 4, 5, 6],
        opath=GCM_path,
        domain=domains.domains[domain_name],
        file_format='netcdf',
        level='surface',
        max_retry=3)
    
    else: 
        
        args = dict(GCM=GCM,
        system=dict_systems[GCM], 
        varname=varname,
        year=date.year,
        month=date.month,
        leadtimes=[1, 2, 3, 4, 5, 6],
        opath=gcm_path.joinpath(GCM).joinpath(varname.upper()),
        domain=domains.domains[domain_name],
        file_format='netcdf',
        level='surface',
        max_retry=3)    
    
    downloaded_files.append(delayed(C3S.download)(**args))


processing ECCC, path should be /media/nicolasf/END19101/ICU/data/CDS/operational/forecasts/ECCC_GEM_NEMO/TPRATE
processing ECCC, path should be /media/nicolasf/END19101/ICU/data/CDS/operational/forecasts/ECCC_CanCM4i/TPRATE


#### now compute 

In [19]:
with ProgressBar():
    downloaded_files = compute(downloaded_files)

[                                        ] | 0% Completed |  0.0s
/media/nicolasf/END19101/ICU/data/CDS/operational/forecasts/NCEP/TPRATE/ensemble_seas_forecasts_tprate_from_2022_03_NCEP.netcdf exists already on disk, skipping download and returning path

/media/nicolasf/END19101/ICU/data/CDS/operational/forecasts/JMA/TPRATE/ensemble_seas_forecasts_tprate_from_2022_03_JMA.netcdf exists already on disk, skipping download and returning path



/media/nicolasf/END19101/ICU/data/CDS/operational/forecasts/UKMO/TPRATE/ensemble_seas_forecasts_tprate_from_2022_03_UKMO.netcdf exists already on disk, skipping download and returning path


/media/nicolasf/END19101/ICU/data/CDS/operational/forecasts/ECCC_GEM_NEMO/TPRATE/ensemble_seas_forecasts_tprate_from_2022_03_ECCC.netcdf exists already on disk, skipping download and returning path

/media/nicolasf/END19101/ICU/data/CDS/operational/forecasts/ECMWF/TPRATE/ensemble_seas_forecasts_tprate_from_2022_03_ECMWF.netcdf exists already on disk, skipping d

### check dimensions and return the total number of members in the C3S MME 

In [20]:
lfiles = downloaded_files[0]

In [21]:
[f.name for f in lfiles]

['ensemble_seas_forecasts_tprate_from_2022_03_ECMWF.netcdf',
 'ensemble_seas_forecasts_tprate_from_2022_03_UKMO.netcdf',
 'ensemble_seas_forecasts_tprate_from_2022_03_METEO_FRANCE.netcdf',
 'ensemble_seas_forecasts_tprate_from_2022_03_DWD.netcdf',
 'ensemble_seas_forecasts_tprate_from_2022_03_CMCC.netcdf',
 'ensemble_seas_forecasts_tprate_from_2022_03_NCEP.netcdf',
 'ensemble_seas_forecasts_tprate_from_2022_03_JMA.netcdf',
 'ensemble_seas_forecasts_tprate_from_2022_03_ECCC.netcdf',
 'ensemble_seas_forecasts_tprate_from_2022_03_ECCC.netcdf']

In [22]:
members = []
for f in lfiles: 
    g = f.name.split('.')[0].split('_')[-1]
    dset = xr.open_dataset(f)
    members.append(dset.dims['number'])
    print(f"{g}: {dset.dims}")
    

ECMWF: Frozen({'longitude': 141, 'latitude': 81, 'number': 51, 'time': 5})
UKMO: Frozen({'longitude': 141, 'latitude': 81, 'number': 56, 'time': 5})
FRANCE: Frozen({'longitude': 141, 'latitude': 81, 'number': 51, 'time': 5})
DWD: Frozen({'longitude': 141, 'latitude': 81, 'number': 50, 'time': 5})
CMCC: Frozen({'longitude': 141, 'latitude': 81, 'number': 50, 'time': 5})
NCEP: Frozen({'longitude': 141, 'latitude': 81, 'number': 112, 'time': 5})
JMA: Frozen({'longitude': 113, 'latitude': 65, 'number': 140, 'time': 5})
ECCC: Frozen({'longitude': 141, 'latitude': 81, 'number': 10, 'time': 5})
ECCC: Frozen({'longitude': 141, 'latitude': 81, 'number': 10, 'time': 5})


In [23]:
print(f"TOTAL NUMBER OF MEMBERS IN THE C3S MME for forecasts issued {date:%B %Y}: {np.array(members).sum()}")

TOTAL NUMBER OF MEMBERS IN THE C3S MME for forecasts issued March 2022: 530


### convert the notebook to HTML 

This does not work with papermill

In [24]:
# jupyter = which('jupyter')

# nb_name = ipynbname.name()

# nb_name

# cmd = f"{jupyter} nbconvert --to html {nb_name}.ipynb"

# r = call(cmd, shell=True)