# A method to correct precipitation fields for ocean models.

*R. Dussin*

## 1. Regridding GPCP v2.3 to ERAinterim grid

The first step is to regrid the satelite-based precipitations onto the Atmospheric reanalyse grid. This can be done easily with xesmf.

In [None]:
import xarray as xr
import numpy as np

In [None]:
gpcpdir = './'
gpcp = xr.open_dataset(gpcpdir + 'precip.mon.mean.nc')

In [None]:
gpcp['precip']

In [None]:
import xesmf

In [None]:
erai_dir = '/archive/Raphael.Dussin/ERAinterim/nc_daily'

In [None]:
erai_grid = xr.open_dataset(f'{erai_dir}/precip_ERAinterim_1979_daily.nc', decode_times=False, drop_variables=['time', 'precip'])

In [None]:
erai_grid

Now let's add the cell edges:

In [None]:
# ERA-interim

lon = erai_grid['lon'].values
lon_bnds = np.concatenate((np.array([lon[0] -0.5 * 0.7031]), 0.5 * (lon[:-1] + lon[1:]), np.array([lon[-1] + 0.5 * 0.7031])), axis=0)

lat = erai_grid['lat'].values
lat_bnds = np.concatenate((np.array([-90]), 0.5 * (lat[:-1] + lat[1:]), np.array([90])), axis=0)

erai_grid['lon_b'] = xr.DataArray(data=lon_bnds, dims=('lonp1'))
erai_grid['lat_b'] = xr.DataArray(data=lat_bnds, dims=('latp1'))

In [None]:
# GPCP

gpcp['lon_b'] = xr.DataArray(data=np.arange(0,360+2.5,2.5), dims=('lonp1'))
gpcp['lat_b'] = xr.DataArray(data=np.arange(-90,90+2.5,2.5), dims=('latp1'))

In [None]:
gpcp2erai = xesmf.Regridder(gpcp, erai_grid, 'conservative', periodic=True)

In [None]:
gpcp_precip_interp = gpcp2erai(gpcp['precip'])

In [None]:
gpcp_precip_interp.sel(time='2016-1').plot()

In [None]:
gpcp_regridded = xr.Dataset()
gpcp_regridded['precip'] = gpcp_precip_interp
gpcp_regridded.to_netcdf('./GPCP_v2.3_256x512.nc')

## 2. Generate new dataset

### Functions

In [None]:
def cumul_precip(da):
    ''' apply cumsum and scale data array'''
    # create cumulated precip
    da_cs = da.cumsum(dim='time')
    # concat with zero initial value, needed for decumul
    zeroslice = xr.zeros_like(da_cs.isel(time=0))
    da_cs = xr.concat([zeroslice, da_cs], dim='time')
    return da_cs

def normalize_cumulated_precip(da):
    # normalize to the last value
    norm = da.isel(time=-1).clip(min=1e-15)
    da_scaled = da / norm
    return da_scaled

def decumul_precip(da):
    out = da.diff('time')
    return out

### Method

* extract monthly data from ERAinterim yearly file
* cumul/scale the data and reserve
* conservative regridding of monthly GPCP onto ERAinterim grid
* total precip in GPCP = avg monthly value * ndays_in_month
* rescale the cumulative sum with GPCP value (smoothing required?)
* run decumulation

### Compute

In [None]:
def process_one_year(ds_erai, da_gpcp):
    current_year = ds_erai.time.dt.year[0].values
    print(current_year)
    ds_out = xr.zeros_like(ds_erai)
    for month in range(12):
        cmonth = str(month+1).zfill(2)
        data_month = ds_erai['rain'].sel(time=f'{current_year}-{cmonth}')
        ndays = len(data_month.time)
        #print(data_month.time)
        cumul = cumul_precip(data_month.clip(min=0))
        cumul_normed = normalize_cumulated_precip(cumul)
        new_total = da_gpcp.sel(time=f'{current_year}-{cmonth}').values.squeeze()
        new_total = new_total * ndays / 1000  # total precip in meters
        ny, nx = new_total.shape
        new_data_month = decumul_precip(cumul_normed.transpose(*('time', 'lat', 'lon')) * new_total) * 1000 / 86400 # kg.m-2.s-1
        if month == 0:
            da_out = new_data_month.copy()
        else:
            da_out = xr.concat([da_out, new_data_month], dim='time')
    ds_out['rain'] = da_out
    return ds_out

In [None]:
outdir = '/archive/Raphael.Dussin/ERAinterim/blend_GPCP'
encoding = {'time': {'_FillValue': 0}, 'lon': {'_FillValue': 1e+36},
            'lat': {'_FillValue': 1e+36}, 'rain': {'_FillValue': 1e+36}}

for year in np.arange(1979,2018+1):
    precip = xr.open_dataset(f'{erai_dir}/precip_ERAinterim_{year}_daily.nc')
    snow = xr.open_dataset(f'{erai_dir}/snow_ERAinterim_{year}_daily.nc')
    rain = (precip['precip'] - snow["snow"]).clip(min=0).to_dataset(name='rain')
    ds_time = xr.open_dataset(f'{erai_dir}/precip_ERAinterim_{year}_daily.nc', decode_times=False)
    ds_corrected = process_one_year(rain, gpcp_precip_interp)
    ds_corrected['time'] = ds_time['time']
    ds_corrected['rain'].attrs = {'valid_min': 0., 'valid_max': 1e-2}
    ds_corrected.to_netcdf(f'{outdir}/rain_Dussin_corrected_{year}_daily.nc', encoding=encoding)