In [None]:
import os
import pickle as pk

import xarray as xr
import numpy as np
import pandas as pd
from tqdm import tqdm_notebook as tqdm

import distributed

from pytassim.localization import GaspariCohn
from pytassim.model.terrsysmp import preprocess_cosmo
from pytassim.obs_ops.terrsysmp import CosmoT2mOperator
from pytassim.assimilation.filter.letkf_dist import DistributedLETKFUncorr
from pytassim.assimilation.filter.letkf import LETKFUncorr
from py_bacy.intf_pytassim.io import load_observations
from py_bacy.intf_pytassim.clm import distance_func

In [None]:
rnd = np.random.RandomState(42)

In [None]:
cluster = distributed.LocalCluster(n_workers=16, threads_per_worker=1, memory_limit='6GB', local_dir="/tmp")
client = distributed.Client(cluster)
client

# Load data

In [None]:
base_path = '/p/project/chbn29/hbn29p/Projects/phd_thesis/data/da_enkf_for_soil/'

## H2O

In [None]:
vr_h2o_path = os.path.join(base_path, '016', 'h2o_cleaned.nc')
vr_h2o = xr.open_dataset(vr_h2o_path)['H2OSOI'].squeeze(drop=True).chunk((1, 302, 267))

In [None]:
ens_h2o_path = os.path.join(base_path, '015', 'h2o_cleaned.nc')
ens_h2o = xr.open_dataset(ens_h2o_path)['H2OSOI'].squeeze(drop=True).chunk((40, 1, 302, 267))

In [None]:
sekf_h2o_path = os.path.join(base_path, '022', 'h2o_cleaned.nc')
sekf_h2o = xr.open_dataset(sekf_h2o_path)['H2OSOI'].squeeze(drop=True).chunk((40, 1, 302, 267))

## T2m

In [None]:
vr_t2m_path = os.path.join(base_path, '016', 't2m_cleaned.nc')
vr_t2m = xr.open_dataset(vr_t2m_path)['T_2M'].squeeze(drop=True).chunk((1, 109, 98))

In [None]:
ens_t2m_path = os.path.join(base_path, '015', 't2m_cleaned.nc')
ens_t2m = xr.open_dataset(ens_t2m_path)['T_2M'].squeeze(drop=True).chunk((40, 1, 109, 98))

In [None]:
sekf_t2m_path = os.path.join(base_path, '022', 't2m_smoother_cleaned.nc')
sekf_t2m = xr.open_dataset(sekf_t2m_path)['T_2M'].squeeze(drop=True).chunk((40, 1, 109, 98))

## Prepare

In [None]:
ens_t2m = ens_t2m.sel(time=~ens_t2m.indexes['time'].duplicated())
vr_t2m = vr_t2m.sel(time=~vr_t2m.indexes['time'].duplicated())
sekf_t2m = sekf_t2m.sel(time=~sekf_t2m.indexes['time'].duplicated())

In [None]:
ens_t2m['ensemble'] = ens_h2o['ensemble'] = np.arange(40)

In [None]:
fg_time = sekf_t2m.indexes['time'][12*4-1::12*4][:-1]
bg_time = fg_time.normalize()

### Load stations

In [None]:
stations_path = '/p/scratch/chbn29/hbn29p/data/tsmp/runs/utilities/stations.hd5'
df_stations = pd.read_hdf(stations_path, 'stations')

### Load constant

In [None]:
const_path = '/p/scratch/chbn29/hbn29p/data/tsmp/runs/utilities/cosmo_const.nc'
ds_cos_const = xr.open_dataset(const_path).load()

### Load LatLon

In [None]:
coords_latlon = np.stack((ens_t2m['lat'].values, ens_t2m['lon'].values), axis=-1)

### Define observation operator

In [None]:
obs_op = CosmoT2mOperator(df_stations, cosmo_coords=coords_latlon, cosmo_const=ds_cos_const)
obs_op.get_lapse_rate = lambda x: 0

### Prepare VR

In [None]:
vr_obs_prep = vr_t2m.expand_dims('var_name', axis=0).expand_dims('ensemble', axis=2).expand_dims('vgrid', axis=-3)
vr_obs_prep = vr_obs_prep.stack(grid=['rlat', 'rlon', 'vgrid'])
vr_obs_prep['var_name'] = ['T_2M']

### Load observations

In [None]:
%%capture
obs_path = '/p/scratch/chbn29/hbn29p/data/tsmp/runs/obs/ens/t2m_obs_016_0_1_long.nc'
obs_det = load_observations(obs_path)
obs_det.obs.operator = obs_op.get_obs_method

In [None]:
obs_vr_values = obs_det.obs.operator(vr_obs_prep).squeeze('ensemble')
obs_vr = obs_det.copy(deep=True)
obs_vr['observations'] = obs_vr_values
obs_vr['obs_grid_1'] = obs_det['obs_grid_1']
obs_vr = obs_vr.sel(time=fg_time)
obs_vr.obs.operator = obs_op.get_obs_method

# Define assimilation

In [None]:
loc_radius = (15000, 0.7)
local_gc = GaspariCohn(loc_radius, distance_func)
letkf = LETKFUncorr(localization=local_gc, inf_factor=1)

## Create states

In [None]:
fg_ens_state = ens_t2m.sel(time=fg_time).expand_dims('var_name', axis=0).expand_dims('vgrid', axis=-3).stack(grid=['rlat', 'rlon', 'vgrid'])
fg_ens_state['var_name'] = ['T_2M']
fg_ens_state = fg_ens_state.transpose('var_name', 'time', 'ensemble', 'grid').load()

In [None]:
ens_background = ens_h2o.isel(levsoi=[4]).sel(time=bg_time).expand_dims('var_name', axis=0).stack(grid=['lat', 'lon', 'levsoi'])
ens_background['var_name'] = ['H2OSOI']
ens_background = ens_background.transpose('var_name', 'time', 'ensemble', 'grid').load()

In [None]:
fg_sekf_state = sekf_t2m.sel(time=fg_time).expand_dims('var_name', axis=0).expand_dims('vgrid', axis=-3).stack(grid=['rlat', 'rlon', 'vgrid'])
fg_sekf_state['var_name'] = ['T_2M']
fg_sekf_state = fg_sekf_state.transpose('var_name', 'time', 'grid').load()

## We have to use SEKF values

In [None]:
sekf_background = sekf_h2o.isel(levsoi=4).sel(time=bg_time).load()

$\mathbf{y}^o - \overline{\mathbf{y}}^b + \mathbf{c} = \mathbf{y}^o - \mathbf{y}^d$

$ \mathbf{c} = \overline{\mathbf{y}}^b - \mathbf{y}^d$

In [None]:
obs_det_corr = obs_vr.obs.operator(fg_ens_state).mean(dim='ensemble')-obs_vr.obs.operator(fg_sekf_state)

In [None]:
time_pbar = tqdm(enumerate(sekf_background.time.values), total=len(sekf_background.time))

In [None]:
ds_ana = []
for k, time in time_pbar:
    time_pbar.set_postfix(time=pd.to_datetime(time).strftime('%m-%d %H:%MZ'))
    sekf_bg = sekf_background.sel(time=[time])
    tmp_bg = ens_background.sel(time=[time])
    tmp_fg_state = fg_ens_state.isel(time=[k])
    tmp_obs = obs_vr.sel(time=tmp_fg_state.indexes['time']).copy(deep=True)
    tmp_obs['observations'] = tmp_obs['observations'] + obs_det_corr.sel(time=tmp_obs.indexes['time'])
    tmp_obs = tmp_obs.copy(deep=True).drop('time')
    tmp_fg_state = tmp_fg_state.copy(deep=True).drop('time')
    tmp_fg_state['time'] = tmp_obs['time'] = tmp_bg.time
    tmp_obs.obs.operator = obs_op.get_obs_method
    tmp_ana = letkf.assimilate(tmp_bg, tmp_obs, tmp_fg_state)
    tmp_inc = (tmp_ana.mean('ensemble')-tmp_bg.mean('ensemble')).unstack('grid')
    tmp_ana = sekf_bg + tmp_inc
    ds_ana.append(tmp_ana)

In [None]:
ds_ana_concat = xr.concat(ds_ana, dim='time')

In [None]:
ds_ana_concat = ds_ana_concat.squeeze()

In [None]:
ds_ana_concat.to_netcdf('/p/project/chbn29/hbn29p/Projects/phd_thesis/data/da_enkf_for_soil/022/da_offline_letkf_nature.nc')