In [1]:
import os
from copy import deepcopy

import xarray as xr
import numpy as np
import pandas as pd
from tqdm import tqdm_notebook as tqdm

from cartopy.mpl.ticker import LongitudeFormatter, LatitudeFormatter
import matplotlib.pyplot as plt
import matplotlib.colors as mplc
import cartopy.crs as ccrs
import matplotlib.dates as mdates
import matplotlib.lines as mpll
from pandas.plotting import register_matplotlib_converters

import pytassim
from py_bacy.intf_pytassim.io import load_observations

In [2]:
rnd = np.random.RandomState(42)

In [3]:
plt.style.use('paper')
plt.style.use('egu_journals')
register_matplotlib_converters()
rotated_pole = ccrs.RotatedPole(pole_longitude=-171.0, pole_latitude=41.5)
plate_carree = ccrs.PlateCarree()

# Load data

In [4]:
base_path = '/p/scratch/chbn29/hbn29p/data/tsmp/runs/da_enkf_for_soil/'

## H2O

In [5]:
vr_h2o_path = os.path.join(base_path, '016', 'h2o_cleaned.nc')
vr_h2o = xr.open_dataset(vr_h2o_path)['H2OSOI'].squeeze(drop=True).isel(levsoi=4)

## T2m

In [6]:
vr_t2m_path = os.path.join(base_path, '016', 't2m_cleaned.nc')
vr_t2m = xr.open_dataset(vr_t2m_path)['T_2M'].squeeze(drop=True)

## Pre-process data

In [7]:
vr_h2o = vr_h2o.isel(time=~vr_h2o.indexes['time'].duplicated())
vr_t2m = vr_t2m.isel(time=~vr_t2m.indexes['time'].duplicated())

# Remapping via bilinear interpolation from COSMO to CLM

## Get clm coordinates

In [8]:
prep_clm = vr_h2o.stack(grid=['lat', 'lon'])
clm_coords_rotated = rotated_pole.transform_points(plate_carree, prep_clm.lon.values, prep_clm.lat.values)
clm_rot_index = pd.MultiIndex.from_arrays([clm_coords_rotated[:, 0], clm_coords_rotated[:, 1]], names=['rlon', 'rlat'])
clm_rlon = xr.DataArray(clm_coords_rotated[:, 0], coords={'grid': prep_clm.grid}, dims=['grid'])
clm_rlat = xr.DataArray(clm_coords_rotated[:, 1], coords={'grid': prep_clm.grid}, dims=['grid'])

## Remapping

## Get clm coordinates

In [8]:
prep_clm = vr_h2o.stack(grid=['lat', 'lon'])
clm_coords_rotated = rotated_pole.transform_points(plate_carree, prep_clm.lon.values, prep_clm.lat.values)
clm_rot_index = pd.MultiIndex.from_arrays([clm_coords_rotated[:, 0], clm_coords_rotated[:, 1]], names=['rlon', 'rlat'])
clm_rlon = xr.DataArray(clm_coords_rotated[:, 0], coords={'grid': prep_clm.grid}, dims=['grid'])
clm_rlat = xr.DataArray(clm_coords_rotated[:, 1], coords={'grid': prep_clm.grid}, dims=['grid'])

## Remapping

In [9]:
vr_t2m_interp = vr_t2m.interp(rlon=clm_rlon, rlat=clm_rlat, method='linear').drop(['rlon', 'rlat']).unstack('grid')

# Create observation values

In [10]:
hourly = vr_t2m_interp.indexes['time'].minute == 0
ecmwf_obs_times = np.isin(vr_t2m_interp.indexes['time'].hour, [9, 12, 15, 18])
combined_time_mask = hourly & ecmwf_obs_times

In [11]:
t2m_interp_sel = vr_t2m_interp.sel(time=combined_time_mask)

## Sample noise
$\epsilon \sim \mathcal{N}(\mu = 0\,\text{K}, \sigma = 0.1\,\text{K})$

In [12]:
obs_stddev = 0.1

In [13]:
sampled_noise = rnd.normal(scale=obs_stddev, size=t2m_interp_sel.shape)

In [14]:
t2m_obs_values = t2m_interp_sel + sampled_noise

# Create PyTassim compatible observations

In [15]:
t2m_obs_prep = t2m_obs_values.expand_dims('vgrid', axis=-3)
t2m_obs_prep['vgrid'] = np.array([0])
t2m_obs_prep = t2m_obs_prep.stack(obs_grid_1=['lat', 'lon', 'vgrid'])
obs_cov = t2m_obs_prep[0].drop('time').copy(deep=True)
obs_cov[:] = obs_stddev ** 2

In [16]:
sekf_obs = xr.Dataset({
    'observations': t2m_obs_prep,
    'covariance': obs_cov
})

In [17]:
print(sekf_obs.obs.valid)

True


# Save observations

In [18]:
OBS_PATH = "/p/scratch/chbn29/hbn29p/data/tsmp/runs/obs/ens/sekf_obs_016_0_1_long.nc"

In [19]:
obs_to_save = sekf_obs.reset_index('obs_grid_1')
obs_to_save.attrs['multiindex']=list(sekf_obs.indexes['obs_grid_1'].names)
obs_to_save.to_netcdf(OBS_PATH)