# Train and run wflow_sbm surrogate model

The surrogate emulates Actual Evapotranspiration and Volumetric Water Content (top 10 cm) produced from Wflow_sbm over the Alpine region at 1 km spatial resolution and daily time steps. 

The repository offers a very small dataset in the demodata folder. Uncomment the jupyter notebook cell "DEMO" and comment the cell "ALPS" in order to train the model.

The figures at the end of the notebook show the surrogate outputs on the Alps, however the training data are not yet available to reproduce the results.

In [4]:
%load_ext autoreload
%autoreload 2

In [18]:
import numpy as np
import xarray as xr
from pathlib import Path

import dask

from hython.preprocess import reshape
from hython.utils import store_as_zarr, read_from_zarr

from numcodecs import Blosc

import matplotlib.pyplot as plt

# Settings

In [6]:
dynamic_names = ["precip", "pet", "temp"] #  WFLOW forcings, input features of the surrogate
static_names = [ 'thetaS', 'thetaR', 'RootingDepth', 'Swood','KsatVer', "Sl"] # WFLOW static parameters, input features of the surrogate
target_names = [ "vwc","actevap" ] # WFLOW outputs, targets of the surrogate

fn_forcings =  "forcings.nc" 
fn_params = "staticmaps.nc"
fn_targets = "output.nc"


train_start = "2016-01-01"
train_end = "2018-12-31"

test_start = "2019-01-01"
test_end = "2020-12-31"

train_range = slice(train_start,train_end)
test_range = slice(test_start, test_end)

In [7]:
# DEMO 

# wflow_model = "datademo"

# wd = Path("../data") / wflow_model

# fp_dynamic_forcings = wd / fn_forcings 
# fp_wflow_static_params = wd / fn_params
# fp_target = wd / fn_targets

# forcings = xr.open_dataset(fp_dynamic_forcings)
# params = xr.open_dataset(fp_wflow_static_params)
# targets = xr.open_dataset(fp_target).isel(lat=slice(None, None, -1))

In [36]:
wflow_model =  "alps1km_eobs" 

wd = Path("/mnt/CEPH_PROJECTS/InterTwin/Wflow/models") / wflow_model

input_dir_path = Path('/mnt/CEPH_PROJECTS/InterTwin/Wflow/models') / wflow_model

surrogate_input_path = Path("/mnt/CEPH_PROJECTS/InterTwin/hydrologic_data/surrogate_training")


surrogate_data = surrogate_input_path / f"{wflow_model}.zarr"


forcings = xr.open_dataset(input_dir_path / fn_forcings , chunks= {"time":200})
params = xr.open_dataset(input_dir_path / fn_params ,  chunks= {"time":200}).sel(layer=1)
targets = xr.open_dataset(input_dir_path / "run_default" / fn_targets, chunks= {"time":200}).sel(layer=1).isel(lat=slice(None, None, -1))

In [9]:
try:
    forcings = forcings.rename({"latitude":"lat", "longitude":"lon"})
    params = params.rename({"latitude":"lat", "longitude":"lon"})
except:
    pass

In [12]:
params = params.drop_dims("time")

In [None]:
Xd, Xs, Y  = reshape(
                   forcings, 
                   params, 
                   targets,
                   return_type="xarray"
                   )

In [33]:
wflow_lakes = params.wflow_lakeareas
mask_lakes = (wflow_lakes > 0).astype(np.bool_).rename("mask_lake")

In [31]:
missing_mask = np.isnan(params[static_names[0]]).rename("mask")

In [37]:
# write to zarr

compressor = Blosc(cname='zl4', clevel=4, shuffle=Blosc.BITSHUFFLE)


store_as_zarr(Xd ,url= surrogate_data, group="xd", storage_options={"compressor":compressor}, multi_index="gridcell")
store_as_zarr(Y ,url= surrogate_data,  group="y", storage_options={"compressor":compressor}, multi_index="gridcell")
store_as_zarr(Xs ,url= surrogate_data, group="xs", storage_options={"compressor":compressor}, multi_index="gridcell")

store_as_zarr(missing_mask,url= surrogate_data, group="mask", storage_options={"compressor":compressor})
store_as_zarr(mask_lakes,url= surrogate_data, group="mask_lake", storage_options={"compressor":compressor})