In [33]:
%matplotlib inline
from matplotlib import pyplot as plt

In [34]:
import numpy as np
import pandas as pd
import xarray as xr

In [35]:
import pathlib

In [36]:
HOME = pathlib.Path.home()

In [37]:
HOME

PosixPath('/home/nicolasf')

In [38]:
provider = 'CDS'

In [39]:
# Parameters
GCM = "CMCC"
var_name = "PRECIP"

### INPUT PATH

In [40]:
dpath = HOME / 'drives' / 'auck_projects' / 'END19101' / 'Working' / 'data' / 'GCMs' / 'downloads' / 'forecasts' / provider / GCM / var_name

In [41]:
dpath

PosixPath('/home/nicolasf/drives/auck_projects/END19101/Working/data/GCMs/downloads/forecasts/CDS/CMCC/PRECIP')

### OUTPUT PATH 

In [42]:
opath = HOME / 'drives' / 'auck_projects' / 'END19101' / 'Working' / 'data' / 'GCMs' / 'pre_processed' / 'forecasts' / provider / GCM / var_name

In [43]:
opath

PosixPath('/home/nicolasf/drives/auck_projects/END19101/Working/data/GCMs/pre_processed/forecasts/CDS/CMCC/PRECIP')

In [44]:
if not opath.exists(): 
    opath.mkdir(parents=True)

### DICTIONNARY WITH VARIABLES NAMES TO REPLACE 

In [45]:
dict_variables = {}
dict_variables['PRECIP'] = {'tprate':'precip'}
dict_variables['Z500'] = {'z':'z500'}
dict_variables['SST'] = {'sst':'sst'}
dict_variables['T2M'] = {'t2m':'t2m'}

### DICTIONNARY WITH DIMENSIONS NAMES TO REPLACE 

In [46]:
dict_dims = {}
dict_dims['latitude'] = 'lat'
dict_dims['longitude'] = 'lon'
dict_dims['number'] = 'member'

### DICTIONNARY WITH BACKEND PARAMETERS FOR GRIB READING, DEPENDS ON THE GCM

In [47]:
dict_backend = {}
dict_backend['ECMWF'] = {'indexpath':''}
dict_backend['UKMO'] = {'indexpath':'', 'filter_by_keys':{'longitudeOfFirstGridPointInDegrees': 0.5}}
dict_backend['METEO_FRANCE'] = {'indexpath':''}
dict_backend['DWD'] = {'indexpath':''}
dict_backend['CMCC'] = {'indexpath':''}

### list of files 

In [48]:
lfiles = list(dpath.glob(f"{GCM}_*_????_??.grib"))

In [49]:
lfiles.sort()

In [50]:
lfiles[0]

PosixPath('/home/nicolasf/drives/auck_projects/END19101/Working/data/GCMs/downloads/forecasts/CDS/CMCC/PRECIP/CMCC_PRECIP_2018_11.grib')

In [51]:
lfiles[-1]

PosixPath('/home/nicolasf/drives/auck_projects/END19101/Working/data/GCMs/downloads/forecasts/CDS/CMCC/PRECIP/CMCC_PRECIP_2019_12.grib')

In [52]:
len(lfiles)

14

### read first and last files for testing 

In [53]:
sub0 = xr.open_dataset(lfiles[0], engine='cfgrib', backend_kwargs=dict_backend[GCM])

In [54]:
sub0

In [55]:
sub1 = xr.open_dataset(lfiles[-1], engine='cfgrib', backend_kwargs=dict_backend[GCM])

In [56]:
sub1

In [57]:
sub0.close() 

In [58]:
sub1.close()

### now loop over files and save processed netcdf files 

In [59]:
dlog = []
dset = []

for fname in lfiles: 
    
#     print(f"processing {str(fname)}") 
    
    try:
        sub = xr.open_dataset(fname, engine='cfgrib', backend_kwargs=dict_backend[GCM])
    except: 
        sub = xr.open_dataset(fname, engine='cfgrib', backend_kwargs={'indexpath':''})
    
    sub = sub.dropna(dim='step', how='all')
    
    if len(sub.step) != 5: 
        print("issue with time {:%Y-%m}".format(pd.to_datetime(sub.time.data)))
        pass 
    else: 
        sub['step'] = (('step'), np.arange(1, 6))
    
    sub = sub.rename(dict_variables[var_name])
    
    sub = sub.rename(dict_dims)

    if sub.lat[0].data > sub.lat[-1].data: 
        sub = sub.sortby('lat')
        
    logtxt = f"{str(fname)}: members = {sub.dims['member']}, lat = {sub.dims['lat']}, lon = {sub.dims['lon']}, step = {sub.dims['step']}"
    
    print(logtxt)
        
    dlog.append(logtxt)
    
    dset.append(sub)

/home/nicolasf/drives/auck_projects/END19101/Working/data/GCMs/downloads/forecasts/CDS/CMCC/PRECIP/CMCC_PRECIP_2018_11.grib: members = 50, lat = 180, lon = 360, step = 5
/home/nicolasf/drives/auck_projects/END19101/Working/data/GCMs/downloads/forecasts/CDS/CMCC/PRECIP/CMCC_PRECIP_2018_12.grib: members = 50, lat = 180, lon = 360, step = 5
/home/nicolasf/drives/auck_projects/END19101/Working/data/GCMs/downloads/forecasts/CDS/CMCC/PRECIP/CMCC_PRECIP_2019_01.grib: members = 50, lat = 180, lon = 360, step = 5
/home/nicolasf/drives/auck_projects/END19101/Working/data/GCMs/downloads/forecasts/CDS/CMCC/PRECIP/CMCC_PRECIP_2019_02.grib: members = 50, lat = 180, lon = 360, step = 5
/home/nicolasf/drives/auck_projects/END19101/Working/data/GCMs/downloads/forecasts/CDS/CMCC/PRECIP/CMCC_PRECIP_2019_03.grib: members = 50, lat = 180, lon = 360, step = 5
/home/nicolasf/drives/auck_projects/END19101/Working/data/GCMs/downloads/forecasts/CDS/CMCC/PRECIP/CMCC_PRECIP_2019_04.grib: members = 50, lat = 180, 

### checks that one can concatenate the dataset along the time dimension 

In [60]:
dset_step = xr.concat(dset, 'time') 

In [61]:
dset_step

In [62]:
opath

PosixPath('/home/nicolasf/drives/auck_projects/END19101/Working/data/GCMs/pre_processed/forecasts/CDS/CMCC/PRECIP')

In [63]:
for date in pd.to_datetime(dset_step.time.data): 
    sub = dset_step.sel(time=date)
    filename_out = pathlib.Path(opath / f"{GCM}_{var_name}_{date:%Y_%m}.nc")
    print(f"saving {filename_out}")
    sub.to_netcdf(filename_out)
    sub.close()

saving /home/nicolasf/drives/auck_projects/END19101/Working/data/GCMs/pre_processed/forecasts/CDS/CMCC/PRECIP/CMCC_PRECIP_2018_11.nc
saving /home/nicolasf/drives/auck_projects/END19101/Working/data/GCMs/pre_processed/forecasts/CDS/CMCC/PRECIP/CMCC_PRECIP_2018_12.nc
saving /home/nicolasf/drives/auck_projects/END19101/Working/data/GCMs/pre_processed/forecasts/CDS/CMCC/PRECIP/CMCC_PRECIP_2019_01.nc
saving /home/nicolasf/drives/auck_projects/END19101/Working/data/GCMs/pre_processed/forecasts/CDS/CMCC/PRECIP/CMCC_PRECIP_2019_02.nc
saving /home/nicolasf/drives/auck_projects/END19101/Working/data/GCMs/pre_processed/forecasts/CDS/CMCC/PRECIP/CMCC_PRECIP_2019_03.nc
saving /home/nicolasf/drives/auck_projects/END19101/Working/data/GCMs/pre_processed/forecasts/CDS/CMCC/PRECIP/CMCC_PRECIP_2019_04.nc
saving /home/nicolasf/drives/auck_projects/END19101/Working/data/GCMs/pre_processed/forecasts/CDS/CMCC/PRECIP/CMCC_PRECIP_2019_05.nc
saving /home/nicolasf/drives/auck_projects/END19101/Working/data/GCMs

In [64]:
dset_step.close()