### calculate the monthly precipitation mean daily rates from the daily MSWEP data, then climatologies 

In [34]:
%matplotlib inline

### os 
import os 
import sys
import pathlib

### datetimes 
from datetime import datetime, timedelta

### scipy 
import numpy as np 
import pandas as pd
import xarray as xr

### plotting 
from matplotlib import pyplot as plt

In [35]:
from dask.diagnostics import ProgressBar

In [36]:
def roll_longitudes(dset, lon_name='lon'): 
    """
    roll the longitudes of a dataset so that it goes from 0 to 360
    instead of -180 to 180
    Parameters
    ----------
    dset : xarray.Dataset
        The input Dataset with the longitudes going from -180 to 180
    lon_name : str, optional
        The name of the longitude dimension, by default 'lon'
        
    Returns
    -------
    
    dset : xarray.Dataset 
        Dataset with rolled longitudes 
    """
    
    dset = dset.assign_coords({lon_name:(dset[lon_name] % 360)}).roll({lon_name:(dset.dims[lon_name] // 2)}, roll_coords=True)
    
    return dset

In [37]:
def preprocess(dset): 
    
    dset = dset.sortby('lat') 
    dset = roll_longitudes(dset) 
    dset = dset.sel(lat=slice(-40., 40.), lon=slice(110, 280))
    return dset

In [38]:
year_start = 1991
year_stop = 2020 

In [39]:
dpath = pathlib.Path('/media/nicolasf/END19101/ICU/data/glo2ho/MSWEP280/Past/Daily/') 

In [40]:
opath = pathlib.Path('/media/nicolasf/END19101/ICU/data/glo2ho/MSWEP280/climatologies/monthly') 

In [41]:
for year in np.arange(year_start, year_stop + 1):
    
    lfiles = list(dpath.glob(f"{year}???.nc"))
    
    lfiles.sort()
    
    dset = xr.open_mfdataset(lfiles, preprocess=preprocess, engine='netcdf4')
    
    dset = dset.chunk({'time':-1, 'lat':10, 'lon':10})
    
    dsetm = dset.resample(time='1M').mean()
    
    with ProgressBar(): 
        
        dsetm = dsetm.compute()
        
        dsetm.to_netcdf(opath.joinpath(f'monthly_MSWEP_{year}.nc')) 
        
    dsetm.close()

[########################################] | 100% Completed | 107.71 s
[########################################] | 100% Completed | 107.68 s
[########################################] | 100% Completed | 108.39 s
[########################################] | 100% Completed | 103.24 s
[########################################] | 100% Completed | 101.80 s
[########################################] | 100% Completed | 104.77 s
[########################################] | 100% Completed | 102.49 s
[########################################] | 100% Completed | 105.93 s
[########################################] | 100% Completed | 105.40 s
[########################################] | 100% Completed | 116.30 s
[########################################] | 100% Completed | 122.35 s
[########################################] | 100% Completed | 146.65 s
[########################################] | 100% Completed | 147.90 s
[########################################] | 100% Completed | 137.92 s
[#####

### list the yearly files containing the monthly averages 

In [42]:
lfiles_clim = list(opath.glob("*.nc"))

In [43]:
lfiles_clim

[PosixPath('/media/nicolasf/END19101/ICU/data/glo2ho/MSWEP280/climatologies/monthly/monthly_MSWEP_1991.nc'),
 PosixPath('/media/nicolasf/END19101/ICU/data/glo2ho/MSWEP280/climatologies/monthly/monthly_MSWEP_1992.nc'),
 PosixPath('/media/nicolasf/END19101/ICU/data/glo2ho/MSWEP280/climatologies/monthly/monthly_MSWEP_1993.nc'),
 PosixPath('/media/nicolasf/END19101/ICU/data/glo2ho/MSWEP280/climatologies/monthly/monthly_MSWEP_1994.nc'),
 PosixPath('/media/nicolasf/END19101/ICU/data/glo2ho/MSWEP280/climatologies/monthly/monthly_MSWEP_1995.nc'),
 PosixPath('/media/nicolasf/END19101/ICU/data/glo2ho/MSWEP280/climatologies/monthly/monthly_MSWEP_1996.nc'),
 PosixPath('/media/nicolasf/END19101/ICU/data/glo2ho/MSWEP280/climatologies/monthly/monthly_MSWEP_1997.nc'),
 PosixPath('/media/nicolasf/END19101/ICU/data/glo2ho/MSWEP280/climatologies/monthly/monthly_MSWEP_1998.nc'),
 PosixPath('/media/nicolasf/END19101/ICU/data/glo2ho/MSWEP280/climatologies/monthly/monthly_MSWEP_1999.nc'),
 PosixPath('/media/

In [44]:
lfiles_clim.sort()

In [46]:
dset_clim = xr.open_mfdataset(lfiles_clim)

In [47]:
clim = dset_clim.groupby(dset_clim.time.dt.month).mean('time')

In [49]:
with ProgressBar(): 
    
    clim = clim.compute()

[########################################] | 100% Completed | 18.51 ss


In [51]:
clim.to_netcdf('../data/MSWEP/Monthly_climatology_1991_2020.nc') 