In [None]:
import xarray as xr
import numpy as np
import pandas as pd
from cftime import num2date
import datetime as dt

In [None]:
dirnc_3h = '/archive/Raphael.Dussin/ERAinterim/originals/'
dirnc_daily = '/archive/Raphael.Dussin/ERAinterim/nc_daily/'

In [None]:
def make_daily_average(ds, varout="precip", varERA='TP'):
    """ compute daily average from 3 hours cumulative"""
    grp = ds.groupby(ds.time.dt.hour)
    midnight = grp.groups[0]
    noon = grp.groups[12]
    ds.load()
    daily_avg = xr.Dataset()
    # geo coordinates/ flip north up
    daily_avg['lon'] = ds['lon']
    daily_avg['lat'] = ds['lat'][::-1]
    # daily precip
    rho_w = 1000  # kg.m-3
    seconds_per_day = 86400 # s.day-1
    # kg.m-2.s-1 = kg.m-3 * m.day-1 / s.day-1
    precip = rho_w * (ds[varERA].isel(time=midnight).values + ds[varERA].isel(time=noon)).values / seconds_per_day
    daily_avg[varout] = xr.DataArray(data=precip[:,::-1,:], dims=['time', 'lat', 'lon'])
    # new time axis
    year = ds.time[0].dt.year.values
    ndays = len(ds.time) / 8.
    dateref = dt.datetime(1900,1,1,0,0,0)
    datestart = dt.datetime(year,1,1,12,0,0)
    time = (datestart - dateref).total_seconds() + seconds_per_day * np.arange(ndays)
    daily_avg['time'] = time
    daily_avg['time'].attrs['units'] = "seconds since 1900-01-01 0:00:00"
    daily_avg['time'].attrs['calendar'] = "gregorian"
    
    return daily_avg

In [None]:
for year in np.arange(1979,2018+1):
    print(f'working on year {year}')
    ds = xr.open_dataset(f'{dirnc_3h}/precip_ERAinterim_{year}.nc')
    ds_daily = make_daily_average(ds)
    ds_daily.to_netcdf(f'{dirnc_daily}/precip_ERAinterim_{year}_daily.nc')

In [None]:
for year in np.arange(1979,2018+1):
    print(f'working on year {year}')
    ds = xr.open_dataset(f'{dirnc_3h}/snow_ERAinterim_{year}.nc')
    ds_daily = make_daily_average(ds, varout='snow', varERA='SF')
    ds_daily.to_netcdf(f'{dirnc_daily}/snow_ERAinterim_{year}_daily.nc')