In [18]:
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap, BoundaryNorm
import cartopy.crs as ccrs
import cartopy.feature as cfeature
import xarray as xr
import time
import numpy as np
import geopandas as gpd
import xarray as xr
import os
from itertools import product
from scipy.stats import pearsonr
import numpy as np
from numba import njit
import random
from joblib import Parallel, delayed
from typing import Callable, List, Union, Tuple

In [2]:
@njit
def time_evolution_numba(temp, rad, prec, ndvi, c_s, alpha, beta, gamma, c_m, iota, temp_w, ndvi_w):
    #Initialize 
    length = len(temp)
    runoff_out = np.full(length, np.nan)
    evapo_out = np.full(length, np.nan)
    soil_mois_out = np.full(length, np.nan)
    snow_out = np.full(length, np.nan)

    # Transformations / Calculations for Setup
    conv = 1 / 2260000  # from J/day/m**2 to mm/day
    rad = rad * conv  # convert radiation to mm/day
    prec = prec * 10 **3 # from m/day to mm/day
    w = 0.9 * c_s
    snow = 0

    # --- calc_et_weight function ---
    ndvi = np.nan_to_num(ndvi, nan=0.0)
    normalized_temp = (temp - temp.min()) / (temp.max() - temp.min())
    normalized_ndvi = (ndvi - ndvi.min()) / (ndvi.max() - ndvi.min())
    et_weight = temp_w * normalized_temp + ndvi_w * normalized_ndvi
    beta_weighted = beta * et_weight

    for t in range(1, length):
        prec_t = prec[t-1]
        temp_t = temp[t-1]
        rad_t = rad[t-1]
        beta_t = beta_weighted[t-1]

        # ---- snow_function ----
        is_melting = temp_t > 273.15
        has_snow = snow >= 0.001

        if not is_melting:
            snow = snow + prec_t
            water = 0.0
        elif is_melting and has_snow:
            melt = c_m * (temp_t - 273.15)
            melt = min(melt, snow)
            snow = snow - melt
            water = melt + prec_t
        else:
            water = prec_t

        runoff = (water + iota) * (w / c_s) ** alpha
        evap = beta_t * (w / c_s) ** gamma * rad_t

        w = w + (water - runoff - evap)
        w = np.maximum(w, 0.0)

        # Store results
        runoff_out[t] = runoff
        evapo_out[t] = evap
        soil_mois_out[t] = w
        snow_out[t] = snow

    return runoff_out, evapo_out, soil_mois_out, snow_out

In [3]:
class WaterModel:
    def __init__(self, params: dict, data):
        self.params = params
        self.data = data

    def run_simulation_whole_catchment(self):
        runoff,_,_,_= time_evolution_numba(
            self.data['temperature'].values,
            self.data['radiation'].values,
            self.data['precipitation'].values,
            self.data['ndvi'].values,
            self.params['c_s'],
            self.params['alpha'],
            self.params['beta'],
            self.params['gamma'],
            self.params['c_m'],
            self.params['iota'],
            self.params['temp_w'],
            self.params['ndvi_w']
        )
        return runoff

In [None]:
runoff_data_path = r"data/catchments/GRDC-Daily.nc"
bentfeld_path = r"data/catchment_timeseries/bentfeld"

runoff = xr.open_mfdataset(runoff_data_path).load()
runoff = runoff.sel(time=slice('2000-03-01', '2022-12-19'))
bentfeld_id = runoff["station_name"].values == "BENTFELD"
bentfeld_index = np.where(bentfeld_id)[0][0]
runoffBentfeld = runoff["runoff_mean"].isel(id=bentfeld_index)


temperature = xr.open_mfdataset(bentfeld_path + '/temperature.nc', combine='by_coords').load()
precipitation = xr.open_mfdataset(bentfeld_path + '/precipitation.nc', combine='by_coords').load()
radiation = xr.open_mfdataset(bentfeld_path + '/radiation.nc', combine='by_coords').load()
ndvi = xr.open_mfdataset(bentfeld_path + '/ndvi.nc', combine='by_coords').load()

data = xr.Dataset()
data['temperature'] = temperature['t2m']
data['precipitation'] = precipitation['precipitation']
data['radiation'] = radiation['nr']
data['ndvi'] = ndvi['ndvi']
data['observedRunoff'] = runoffBentfeld.broadcast_like(radiation['nr'])

#get rid of anythin before 2000 and after 2022
data = data.sel(time=slice('2000-03-01', '2022-12-19'))
data

In [17]:
dataMean = xr.Dataset()
dataMean['temperature'] = data["temperature"].mean(dim=("x", "y"))
dataMean['precipitation'] = data["precipitation"].mean(dim=("x", "y"))
dataMean['radiation'] = data["radiation"].mean(dim=("x", "y"))
dataMean['ndvi'] = data["ndvi"].mean(dim=("x", "y"))
dataMean['observedRunoff'] = data["observedRunoff"].sum(dim=("x", "y"))  
params_new = {
    "c_s": 840,
    "alpha": 8,
    "gamma": 0.5,
    "beta": 0.8,
    "c_m": 2.0,
    "temp_w": 0.75,
    "ndvi_w": 0.5,
    "iota": 10
}

lowerBounds = [100, 1, 0, 0, 1, 0, 0, 1]
upperBounds = [1500, 15, 1, 1, 5, 1, 1, 30]

In [16]:
start_time = time.time()
wm_new = WaterModel(params=params_new, data= dataMean)
runoff_new = wm_new.run_simulation_whole_catchment()
duration = time.time() - start_time
print(f"calculations completed in {duration:.5f} seconds.")

calculations completed in 0.00100 seconds.


In [39]:
class Calibrator:
    def __init__(self, initParams, rasterData, areaSize, numCombi = 3, numIter = 3):
        self.params = initParams
        self.areaSize = areaSize
        self.rasterData = self.calculate_catchment_mean(rasterData)
        self.numCombi = numCombi
        self.numIter = numIter        

    def calculate_catchment_mean(self, data):
        dataMean = xr.Dataset()
        dataMean['temperature'] = data["temperature"].mean(dim=("x", "y"))
        dataMean['precipitation'] = data["precipitation"].mean(dim=("x", "y"))
        dataMean['radiation'] = data["radiation"].mean(dim=("x", "y"))
        dataMean['ndvi'] = data["ndvi"].mean(dim=("x", "y"))
        dataMean['observedRunoff'] = self.normalize_observedRunoff(data["observedRunoff"].sum(dim=("x", "y")), self.areaSize)
        return dataMean
    
    def normalize_observedRunoff(self, observedRunoff, areaSize):
        return observedRunoff*86400/(areaSize*1000)

    def create_paramsChoice(self, params, i):
        paramsChoice = {key: [
            value-value/(2**i),
            value, 
            value+value/(2**i)]
                for key, value in params.items()}
        return paramsChoice

    def split_data(self, data, splitPerc):
        leng = data.sizes["time"]
        maxYear = leng/365
        sample = np.random.randint(0, maxYear, int(maxYear*splitPerc))
        mask = np.zeros(leng, dtype=bool)
        for s in sample:
            mask[s*365:(s+1)*365] = True
        train = data.isel(time=np.where(~mask)[0])
        test = data.isel(time=np.where(mask)[0])

        return train, test

    def calibrate_pixel(self, paramsChoice, valTrain):
        allCombinations = list(product(*paramsChoice.values()))
        randomCombinationsSample = random.sample(allCombinations, self.numCombi)
        param_dicts = [
            dict(zip(paramsChoice.keys(), combo)) for combo in randomCombinationsSample
        ]
        train, val = self.split_data(valTrain, 0.9)

        observed = train["observedRunoff"].values
        nan_mask_obs = ~np.isnan(observed)        

        def evaluate(params):
            wm = WaterModel(params=params, data=train)
            runoff = wm.run_simulation_whole_catchment()
            mask = nan_mask_obs & ~np.isnan(runoff)

            if np.sum(mask) < 2:
                return params, -np.inf

            r, _ = pearsonr(runoff[mask], observed[mask])
            return params, r
        
        results = Parallel(n_jobs=-1)(delayed(evaluate)(params) for params in param_dicts)
        best_params, best_r = max(results, key=lambda x: x[1])

        wm = WaterModel(params=best_params, data=val)
        runoff = wm.run_simulation_whole_catchment()
        observed = val["observedRunoff"].values

        nan_mask_obs = ~np.isnan(observed) 
        mask = nan_mask_obs & ~np.isnan(runoff)

        if np.sum(mask) < 2:
            r_val = np.nan

        r_val, _ = pearsonr(runoff[mask], observed[mask])
        r_train = best_r
        return best_params, r_train, r_val
    
    def calculate_params_whole_catchment(self):
        valTrain, test = self.split_data(self.rasterData, 0.9)
        lParams = []
        lRVal = []
        lRTest = []
        lRTrain = []
        for i in range(1,self.numIter+1):
            paramsChoice = self.create_paramsChoice(self.params, i)
            newParams, r_train, r_val = self.calibrate_pixel(paramsChoice, valTrain)
            self.params = newParams
            lParams.append(newParams)
            lRVal.append(r_val)
    
            idxBest = np.argmax(lRVal)
            wm = WaterModel(params=lParams[i-1], data=test)
            runoff = wm.run_simulation_whole_catchment()
            observed = test["observedRunoff"].values
            mask = ~np.isnan(observed)  & ~np.isnan(runoff)

            if np.sum(mask) < 2:
                rTest = np.nan

            rTest, _ = pearsonr(runoff[mask], observed[mask])

            lRTest.append(rTest)
            lRTrain.append(r_train)


        #plt.plot(lRVal, label="VAL")
        #plt.plot(lRTest, label="lRTest")
        #plt.plot(lRTrain, label="TRAIN")
        #plt.legend()
        
        newParams["R2"] = rTest
        return newParams

In [None]:
class GridCellCalibrator:
    def __init__(self, initParams, rasterData, areaSize, numCombi=3, numIter=3):
        self.initParams = initParams
        self.areaSize = areaSize
        self.rasterData = rasterData
        self.numCombi = numCombi
        self.numIter = numIter
        self.grid_shape = rasterData["temperature"].shape[1:]  # (x, y)

    def normalize_observedRunoff(self, runoff):
        return runoff * 86400 / (self.areaSize * 1000)

    def create_paramsChoice(self, params, i):
        return {
            key: [value - value / (2 ** i), value, value + value / (2 ** i)]
            for key, value in params.items()
        }

    def split_data(self, data, splitPerc):
        leng = data.sizes["time"]
        maxYear = leng // 365
        sample = np.random.choice(maxYear, int(maxYear * splitPerc), replace=False)
        mask = np.zeros(leng, dtype=bool)
        for s in sample:
            mask[s * 365:(s + 1) * 365] = True
        train = data.isel(time=~mask)
        test = data.isel(time=mask)
        return train, test

    def calibrate_gridcell(self, x, y):
        data = self.rasterData.isel(x=x, y=y)
        observed = self.normalize_observedRunoff(data["observedRunoff"])
        data = data.drop_vars("observedRunoff")
        data["observedRunoff"] = observed

        train, val = self.split_data(data, 0.9)
        params = self.initParams.copy()

        for i in range(1, self.numIter + 1):
            paramsChoice = self.create_paramsChoice(params, i)
            allCombinations = list(product(*paramsChoice.values()))
            sampledCombinations = random.sample(allCombinations, self.numCombi)
            param_dicts = [dict(zip(paramsChoice.keys(), combo)) for combo in sampledCombinations]

            def evaluate(p):
                try:
                    wm = WaterModel(params=p, data=train)
                    runoff = wm.run_simulation_whole_catchment()
                    mask = ~np.isnan(runoff) & ~np.isnan(train["observedRunoff"].values)
                    if np.sum(mask) < 2:
                        return p, -np.inf
                    r, _ = pearsonr(runoff[mask], train["observedRunoff"].values[mask])
                    return p, r
                except:
                    return p, -np.inf

            results = Parallel(n_jobs=-1)(delayed(evaluate)(p) for p in param_dicts)
            best_params, best_r = max(results, key=lambda x: x[1])
            params = best_params
        
        return (x, y, best_params)

    def calibrate_all_gridcells(self):
        x_size, y_size = self.grid_shape
        grid_indices = [(x, y) for x in range(x_size) for y in range(y_size)]
        results = Parallel(n_jobs=-1)(delayed(self.calibrate_gridcell)(x, y) for x, y in grid_indices)
        return {(x, y): params for x, y, params in results}

In [47]:
data = xr.Dataset()
data['temperature'] = temperature['t2m']
data['precipitation'] = precipitation['precipitation']
data['radiation'] = radiation['nr']
data['ndvi'] = ndvi['ndvi']
data['observedRunoff'] = runoffBentfeld.broadcast_like(radiation['nr'])

#get rid of anythin before 2000 and after 2022
data = data.sel(time=slice('2000-01-01', '2021-12-31'))

params_new = {
    "c_s": 840,
    "alpha": 8,
    "gamma": 0.5,
    "beta": 0.8,
    "c_m": 2.0,
    "temp_w": 0.75,
    "ndvi_w": 0.5,
    "iota": 10
}

In [50]:
start_time = time.time()
cal = GridCellCalibrator(initParams=params_new, rasterData=data, areaSize=1018, numCombi=100, numIter=3)
resParams = cal.calibrate_all_gridcells()
print(resParams)
duration = time.time() - start_time
print(f"calculations completed in {duration:.5f} seconds.")

{(0, 0): {'c_s': 945.0, 'alpha': 9.0, 'gamma': 0.4921875, 'beta': 0.675, 'c_m': 2.625, 'temp_w': 0.984375, 'ndvi_w': 0.8203125, 'iota': 5.46875}, (0, 1): {'c_s': 708.75, 'alpha': 12.0, 'gamma': 0.65625, 'beta': 1.5000000000000002, 'c_m': 1.40625, 'temp_w': 0.4921875, 'ndvi_w': 0.65625, 'iota': 5.625}, (0, 2): {'c_s': 918.75, 'alpha': 5.0, 'gamma': 0.6328125, 'beta': 1.35, 'c_m': 2.0, 'temp_w': 0.984375, 'ndvi_w': 0.25, 'iota': 5.46875}, (0, 3): {'c_s': 708.75, 'alpha': 16.875, 'gamma': 0.375, 'beta': 0.8, 'c_m': 0.84375, 'temp_w': 1.265625, 'ndvi_w': 0.4375, 'iota': 18.75}, (0, 4): {'c_s': 420.0, 'alpha': 7.875, 'gamma': 0.1875, 'beta': 0.5, 'c_m': 1.125, 'temp_w': 0.41015625, 'ndvi_w': 0.375, 'iota': 12.65625}, (1, 0): {'c_s': 472.5, 'alpha': 16.875, 'gamma': 0.2109375, 'beta': 0.4, 'c_m': 1.6875, 'temp_w': 0.9375, 'ndvi_w': 0.2734375, 'iota': 11.25}, (1, 1): {'c_s': 1260.0, 'alpha': 15.0, 'gamma': 0.546875, 'beta': 0.6000000000000001, 'c_m': 2.5, 'temp_w': 1.58203125, 'ndvi_w': 0.218