## Purge the *operational* hindcasts for the current month, to be run prior to `1_download_C3S_rolling_hindcasts.ipynb` 

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import pathlib
import shutil
import sys
from datetime import datetime
from subprocess import call

import dateparser
import pandas as pd
import yaml
from dateutil.relativedelta import relativedelta
from yaml.loader import SafeLoader

In [3]:
HOME = pathlib.Path.home()

In [4]:
CWD = pathlib.Path.cwd()

In [5]:
CWD

PosixPath('/home/nicolasf/operational/ICU/development/hotspots/code/ICU_Water_Watch/notebooks/C3S')

### import local functions for the downloading of the C3S forecasts 

In [6]:
sys.path.append('../..')

In [7]:
from ICU_Water_Watch import C3S, domains

### parameters for papermill

In [8]:
provider = "CDS"  # should not change
varname = "tprate"  # in ['tprate','t2m','sst']
# if one wants to download other forecasts than the ones corresponding to the current month
lag = 0
# path where to find the hindcasts / reforecasts
gcm_path = f"/media/nicolasf/END19101/ICU/data/{provider}/operational/hindcasts"
config_yaml = "./CDS_config_operational.yaml"

### reads the yaml file, mapping GCM to system 

Note that the forecast system to system number mapping is available at: 
    
- [https://confluence.ecmwf.int/display/CKB/Description+of+the+C3S+seasonal+multi-system](https://confluence.ecmwf.int/display/CKB/Description+of+the+C3S+seasonal+multi-system)

#### it can be read directly into pandas 

In [9]:
url = "https://confluence.ecmwf.int/display/CKB/Description+of+the+C3S+seasonal+multi-system"

In [10]:
systems_table = pd.read_html(url)

In [11]:
systems_table = systems_table[1]

In [12]:
systems_table

Unnamed: 0,Forecasting centre,Forecasting system name,CDS 'system' value
0,ECMWF,System 4,4
1,ECMWF,SEAS5,5
2,ECMWF,SEAS5,51(a)
3,Météo-France,System 5,5
4,Météo-France,System 6,6
5,Météo-France,System 7,7
6,Météo-France,System 8,8
7,CMCC,SPS3,3
8,CMCC,SPS3.5,35
9,DWD,GCFS2.0,2


### and one needs to check, and if necessary edit, the YAML file 

In [13]:
with open(config_yaml) as f:
    dict_systems = yaml.load(f, Loader=SafeLoader)

In [14]:
dict_systems

{'ECMWF': 51,
 'UKMO': 601,
 'METEO_FRANCE': 8,
 'DWD': 21,
 'CMCC': 35,
 'NCEP': 2,
 'JMA': 3,
 'ECCC_CanCM4i': 2,
 'ECCC_GEM5_NEMO': 3}

### casts the paths to `pathlib.Path` objects 

In [15]:
gcm_path = pathlib.Path(gcm_path)

### get today's date 

In [16]:
init = dateparser.parse("today")

In [17]:
init

datetime.datetime(2022, 11, 14, 11, 3, 43, 800670)

### apply lag 

In [18]:
init = init - relativedelta(months=lag)

In [19]:
init

datetime.datetime(2022, 11, 14, 11, 3, 43, 800670)

In [20]:
for GCM in dict_systems.keys(): 
    lfiles_to_delete = list(
        gcm_path.joinpath(f"{GCM}/{varname.upper()}").glob(
            f"ensemble_seas_forecasts_{varname}_from_*_{init.month:02d}*.netcdf"
        )
    )
    if len(lfiles_to_delete) > 1:
        print(f"""deleting files in {str(gcm_path.joinpath(f"{GCM}/{varname.upper()}"))}""")
        for file in lfiles_to_delete: 
            file.unlink()
    else: 
        print(f"nothing to delete for {GCM}")

nothing to delete for ECMWF
deleting files in /media/nicolasf/END19101/ICU/data/CDS/operational/hindcasts/UKMO/TPRATE
deleting files in /media/nicolasf/END19101/ICU/data/CDS/operational/hindcasts/METEO_FRANCE/TPRATE
deleting files in /media/nicolasf/END19101/ICU/data/CDS/operational/hindcasts/DWD/TPRATE
deleting files in /media/nicolasf/END19101/ICU/data/CDS/operational/hindcasts/CMCC/TPRATE
deleting files in /media/nicolasf/END19101/ICU/data/CDS/operational/hindcasts/NCEP/TPRATE
deleting files in /media/nicolasf/END19101/ICU/data/CDS/operational/hindcasts/JMA/TPRATE
deleting files in /media/nicolasf/END19101/ICU/data/CDS/operational/hindcasts/ECCC_CanCM4i/TPRATE
deleting files in /media/nicolasf/END19101/ICU/data/CDS/operational/hindcasts/ECCC_GEM5_NEMO/TPRATE
