In [None]:
import numpy as np
import pandas as pd
import xarray as xr
from matplotlib import pyplot as plt
%matplotlib inline
plt.rcParams['figure.figsize'] = (8,5)
from glob import glob

In [None]:
!ls /net/kage/d5/datasets

In [None]:
path = '/net/kage/d5/datasets/ERAInterim/monthly/'
#dirs = glob(path+'/*'); dirs

In [None]:
ddict = {'Surface':path+'Surface/*.nc','VertInt':path+'VertInt/*.nc',
         'Accum':path+'Accum/*.nc','PressureLevels':path+'PressureLevels/*.nc'}
for key, value in ddict.items():
    print(key,value)

In [None]:
dtype = 'Surface'
ds_tmp = xr.open_mfdataset(glob(ddict[dtype]),decode_times=False)
names = list(ds_tmp.data_vars.variables)

In [None]:
for var in sorted(names):
    print(var+':\t',ds_tmp[var].long_name)

In [None]:
ds_tmp.t2m[0,:,:].plot()

In [None]:
ds_tmp['T'][0]

In [None]:
def to_pandas(Tgrid):
    """
    Parse the time grid of a Dataset and replace by a pandas time grid.
    """
    # first get the reference year from units
    words = Tgrid.units.split()
    ref_year = int(words[-1][0:4])
    # get the first time grid value
    first_time = Tgrid.values[0] - 0.5
    datetime = enso2date(first_time,ref_year)
    if 'months since' in Tgrid.units:
        return pd.date_range(datetime, periods=Tgrid.shape[0], freq='MS').shift(15, freq='D')
    elif 'days since' in Tgrid.units:
        return pd.date_range(datetime, periods=Tgrid.shape[0], freq='D')
    else:
        print('Unrecognized time grid')
        return

In [None]:
def enso2date(T0,ryear=1960,leap=True):
    norm = [31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]
    iy = ryear + int(T0/12)
    if T0 < 0:
        iy = iy - 1
    res = T0 - (iy - ryear)*12
    im = int(res) + 1
    if im == 13:
        im = 1
        iy = iy + 1
    if leap & (im == 2) &  (iy % 4 == 0 ):   
        id = 1 + int(29 * (res - int(res)))
    else:
        id = 1 + int(norm[im-1] * (res - int(res)))
    return str(im)+'/'+str(id)+'/'+str(iy)

enso2date(1.999),enso2date(13.999)

In [None]:
Tgrid = ds_tmp['T']
to_pandas(Tgrid)

In [None]:
ds_Surface = ds_tmp.rename({'T':'time'})
ds_Surface['time'] = to_pandas(Tgrid)
ds_Surface.t2m[0,:,:].plot()

In [None]:
ds_Surface.t2m.to_netcdf('t2m.nc',encoding={'time':{'dtype':'float32'}})

In [None]:
dtype = 'PressureLevels'
ds_tmp = xr.open_mfdataset(glob(ddict[dtype]),decode_times=False)

Tgrid = ds_tmp['T']
ds_tmp['T'] = to_pandas(Tgrid) 
ds_tmp['T']

In [None]:
ds_PL = ds_tmp.rename({'T':'time'})

In [None]:
names = list(ds_PL.data_vars.variables); names

In [None]:
ds_PL.u[0,0].plot()

In [177]:
#ds_PL.u.to_netcdf('u.nc',encoding={'time':{'dtype':'float32'}})

In [None]:
dtype = 'Accum'
ds_tmp = xr.open_mfdataset(glob(ddict[dtype]),decode_times=False)
names = list(ds_tmp.data_vars.variables)
for var in sorted(names):
    print(var+':\t',ds_tmp[var].long_name)

In [None]:
Tgrid = ds_tmp['T']
ds_Accum = ds_tmp.rename({'T':'time'})
ds_Accum['time'] = to_pandas(Tgrid)
ds_Accum.tp.mean('time').plot(vmax = 1e-7)

In [None]:
ds_Accum.tp.to_netcdf('tp.nc',encoding={'time':{'dtype':'float32'}})