# Data Assimilation with Newtonian Nudging 

In [None]:
import os
from pathlib import Path
pad = Path(os.getcwd())
if pad.name == 'data_assimilation':
    pad_correct = pad.parent
    os.chdir(pad_correct)
from functions.PDM import PDM
from functions.performance_metrics import NSE
import pandas as pd
import geopandas as gpd
import numpy as np
import matplotlib.pyplot as plt
import hvplot 
import hvplot.pandas
from numba import jit
from datetime import datetime

%load_ext autoreload 
%autoreload 2 

## Necessary data load in 

In [None]:
#Needed for PDM inputs
preprocess_output_folder = Path('data/Zwalm_data/preprocess_output')
p_zwalm = pd.read_pickle(preprocess_output_folder / 'zwalm_p_thiessen.pkl')
ep_zwalm = pd.read_pickle(preprocess_output_folder / 'zwalm_ep_thiessen.pkl')
param = pd.read_csv("data/Zwalm_PDM_parameters/NM_opt_param.csv")
zwalm_shape = gpd.read_file('data/Zwalm_shape/zwalm_shapefile_emma_31370.shp')
area_zwalm_new = np.single(zwalm_shape.area[0] / 10**6)
deltat = np.array(1, dtype=np.float32)  # hour
deltat_out = np.array(24, dtype=np.float32)  # daily averaging
deltat = np.array(1, dtype=np.float32)  # hour
deltat_out = np.array(24, dtype=np.float32)  # daily averaging

#observational C*
ml_obs_op_pad = Path("data/ml_obs_op_data")
Cstar_obs_lin_reg = pd.read_pickle(ml_obs_op_pad/'lin_reg/full_data/y_hat_retimed.pickle')
Cstar_obs_lin_reg_nt = pd.read_pickle(ml_obs_op_pad/'lin_reg/full_data_no_time/y_hat_retimed.pickle')
Cstar_obs_ridge_w = pd.read_pickle(ml_obs_op_pad/'ridge/window/y_hat_retimed.pickle')
Cstar_obs_SVR_lin = pd.read_pickle(ml_obs_op_pad/'SVR/linear/y_hat_retimed.pickle')
Cstar_obs_GPR = pd.read_pickle(ml_obs_op_pad/'GPR/y_hat_retimed.pickle')
#Observational flow for comparison
Q_obs_daily = pd.read_pickle('data/Zwalm_data/pywaterinfo_output/Q_day.pkl')
Q_obs_daily = Q_obs_daily.rename(columns = {'Timestamp':'t'})
Q_obs_daily = Q_obs_daily.set_index('t')
Q_obs_daily.head(2)

Evaluate model performance starting on the first day of the month of first observation. Evaluate based on daily flow!

In [None]:
first_obs_date = Cstar_obs_lin_reg.index[0]
start_p1 = pd.Timestamp(datetime(year = first_obs_date.year, month = first_obs_date.month, day = 1))
print(f'Start of evaluation: {start_p1}')
end_PDM_calibration = pd.Timestamp(datetime(year = 2019, month = 12, day = 31, hour = 23))
begin_ML_training_only = end_PDM_calibration + np.timedelta64(1,'h')
print(f'End of PDM calibration period: {end_PDM_calibration}')
end_ML_training = pd.Timestamp(datetime(year = 2020, month = 12, day =31))
print(f'End of ML training period: {end_ML_training}')
begin_all_test = end_ML_training + np.timedelta64(1,'D')
end_all_test = Cstar_obs_lin_reg.index[-1]
print(f'Last date used for training {end_all_test}')

## Define general function definition

Define a function to repeatedly compare different Newtonian Nudging parameters and observation operator models

In [None]:
def DA_OL_comparison(gamma, kappa, tau, Cstar_obs, plot_style = 'dynamic'):
    #Calculate DA and non DA PDM
    pd_zwalm_out_DA = PDM(P=p_zwalm['P_thiessen'].values,
                        EP=ep_zwalm['EP_thiessen'].values,
                        t=p_zwalm['Timestamp'].values,
                        area=area_zwalm_new, deltat=deltat, deltatout=deltat_out,
                        parameters=param, m=3, DA = True, Cstar_obs = Cstar_obs.values.flatten(),t_obs = Cstar_obs.index.values, gamma = gamma, kappa = kappa,  tau = np.timedelta64(tau,'h'))
    pd_zwalm_out_DA = pd_zwalm_out_DA.set_index('Time')
    pd_zwalm_out = PDM(P=p_zwalm['P_thiessen'].values,
                        EP=ep_zwalm['EP_thiessen'].values,
                        t=p_zwalm['Timestamp'].values,
                        area=area_zwalm_new, deltat=deltat, deltatout=deltat_out,
                        parameters=param, m=3, DA = False)
    pd_zwalm_out = pd_zwalm_out.set_index('Time')
    Q_out_diff = pd_zwalm_out_DA['qmodm3s'] - pd_zwalm_out['qmodm3s'
                                                           ]
    #Plotting
    if plot_style == 'dynamic':
        display(pd_zwalm_out_DA['Cstar'][start_p1:].hvplot(ylabel='[mm]',
            label = 'C* DA')*pd_zwalm_out['Cstar'][start_p1:].hvplot(label = 'C* OL'))

        display(Q_obs_daily['Value'][start_p1:].hvplot(label = 'Observed')*pd_zwalm_out_DA['qmodm3s'][start_p1:].hvplot(ylabel='[m^3/s]',label = 'DA')*pd_zwalm_out['qmodm3s'][start_p1:].hvplot(label = 'OL',line_dash = 'dotted', frame_width = 800, frame_height = 400))

        display(Q_out_diff[start_p1:].hvplot(title = 'Q_out DA - Q_out OL', ylabel = '[m^3/s]', frame_width = 800))
    
    elif plot_style == 'static':
        fig, ax = plt.subplots()
        pd_zwalm_out_DA['Cstar'][start_p1:].plot(label = 'DA', ylabel = r'$C^*$ [mm]', ax = ax)
        pd_zwalm_out['Cstar'][start_p1:].plot(label = 'OL', ax = ax)
        ax.legend()

        fig, ax = plt.subplots(figsize = (8,5))
        Q_obs_daily['Value'][start_p1:].plot(label = 'Observed', ax = ax)
        pd_zwalm_out_DA['qmodm3s'][start_p1:].plot(ylabel= r'$Q_{out}$ [m$^3$/s]', label = 'DA', ax = ax)
        pd_zwalm_out['qmodm3s'][start_p1:].plot(label = 'OL',linestyle = 'dotted', ax = ax)
        ax.legend()

        fig, ax = plt.subplots(figsize = (8,5))
        Q_out_diff[start_p1:].plot(title = r'$Q_{out}$ DA - $Q_{out}$ OL', ylabel = r'[m$^3$/s]')

    #Metrics
    def metric_wrapper_DA_OL_comparison(function, metric_name, p_start,p_end):
        metric_OL = function(pd_zwalm_out['qmodm3s'][p_start:p_end],Q_obs_daily['Value'][p_start:p_end])
        metric_DA = function(pd_zwalm_out_DA['qmodm3s'][p_start:p_end],Q_obs_daily['Value'][p_start:p_end])
        print(f'OL {metric_name} from {p_start} till {p_end}: {metric_OL}')
        print(f'DA {metric_name} from {p_start} till {p_end}: {metric_DA}')
        delta_metric = metric_DA - metric_OL
        print(f'Delta {metric_name}: {delta_metric}')
        return metric_OL, metric_DA
    metric_wrapper_DA_OL_comparison(NSE,'NSE',start_p1,end_PDM_calibration)
    print('\n')
    metric_wrapper_DA_OL_comparison(NSE,'NSE',begin_ML_training_only,end_ML_training)
    print('\n')
    metric_wrapper_DA_OL_comparison(NSE,'NSE',begin_all_test,end_all_test)
    print('\n')
    metric_wrapper_DA_OL_comparison(NSE,'NSE',start_p1,end_all_test)
        

Start with $\tau = 0.5$ day, $K*\gamma$ = 0.5

# Linear regressinon: full feature set

In [None]:
kappa = 1
gamma = 0.5
tau = 12
DA_OL_comparison(kappa, gamma, tau, Cstar_obs_lin_reg, plot_style = 'dynamic')

# Linear regression: no time

In [None]:
DA_OL_comparison(kappa, gamma, tau, Cstar_obs_lin_reg_nt, plot_style = 'dynamic')

# Ridge window regression

In [None]:
DA_OL_comparison(kappa, gamma, tau, Cstar_obs_ridge_w['C*'], plot_style = 'dynamic')

## SVR

In [None]:
DA_OL_comparison(kappa, gamma, tau, Cstar_obs_SVR_lin['C*'], plot_style = 'dynamic')

# GPR

In [None]:
DA_OL_comparison(kappa, gamma, tau, Cstar_obs_GPR['C*'], plot_style = 'dynamic')

# Old experimens only below 

In [None]:
pd_zwalm_out[200:]

In [None]:
t_hour = p_zwalm['Timestamp'].values.astype('datetime64[h]')
t_obs = Cstar_obs_lin_reg.index.values.astype('datetime64[h]') 
C_star_mod = pd_zwalm_out_hour['Cstar']

In [None]:
def tau_weighing(delta_t_abs, tau):
    if delta_t_abs < tau/2:
        W_t = 1
    elif delta_t_abs < tau:
        W_t = (tau - delta_t_abs)/(tau/2)
    else:
        W_t = 0
    return W_t
weights = [tau_weighing(np.abs(delta_t), 10) for delta_t in np.arange(-20,20,1)]
plt.plot(weights)

In [None]:
np.arange(-30,30)

In [None]:
def NewtonianNudging(Cstar_min, Cstar_obs, gamma, Kappa, delta_t, tau):
    W_t = tau_weighing(np.abs(delta_t),tau)
    Cstar_plus = Cstar_min + gamma*Kappa*W_t*(Cstar_obs -Cstar_min)
    if Cstar_plus != Cstar_min:
        import pdb; pdb.set_trace()
    return Cstar_plus


In [None]:
def NN_wrapper(i, t, t_obs, t_a,Cstar,C_star_obs):
    if np.any(np.abs(t[i] - t_obs) < t_a):
        t_assimilated=t_obs[np.abs((t[i] - t_obs)) < t_a]
        print(
            f'{t[i]} should be assimilated since less than {t_a/2} rmeoved from {t_assimilated}')

In [None]:
C_star_updated = C_star_mod.copy()
for i in range(len(C_star_mod)):
    C_star_min = C_star_mod[i]
    t_mod_i = t_hour[i]
    t_assimilated_index = np.abs(t_mod_i - t_obs).argmin()#t_obs[np.abs((t_hour[i] - t_obs)) < t_a]
    t_assimilated = t_obs[t_assimilated_index]
    delta_t = t_mod_i - t_assimilated
    C_star_updated[i] = NewtonianNudging(C_star_min, Cstar_obs_lin_reg.loc[t_assimilated,:].values[0],0.5,1,delta_t,np.timedelta64(24,'h'))

In [None]:
t_assimilated 
Cstar_obs_lin_reg.loc[t_assimilated,:].values[0]

In [None]:
type(Cstar_obs_lin_reg.index.values)

In [None]:
pd_zwalm_out = PDM(P=p_zwalm['P_thiessen'].values,
                       EP=ep_zwalm['EP_thiessen'].values,
                       t=p_zwalm['Timestamp'].values,
                       area=area_zwalm_new, deltat=deltat, deltatout=deltat_out,
                       parameters=param, m=3, DA = True, Cstar_obs = Cstar_obs_lin_reg.values.flatten(),t_obs = Cstar_obs_lin_reg.index.values, gamma = 0.5, kappa = 1,  tau = np.timedelta64(12,'h'),)
pd_zwalm_out_DA = pd_zwalm_out.set_index('Time')
pd_zwalm_out = PDM(P=p_zwalm['P_thiessen'].values,
                       EP=ep_zwalm['EP_thiessen'].values,
                       t=p_zwalm['Timestamp'].values,
                       area=area_zwalm_new, deltat=deltat, deltatout=deltat_out,
                       parameters=param, m=3, DA = False)
pd_zwalm_out = pd_zwalm_out.set_index('Time')

fig, ax = plt.subplots()
pd_zwalm_out_DA['Cstar'].plot(ylabel='[mm]', ax = ax, label = 'C* DA')
pd_zwalm_out['Cstar'].plot(ax = ax, label = 'C* OL')
ax.legend()
pd_zwalm_out.tail()

hvplot.extension('bokeh')
pd_zwalm_out_DA['Cstar'].hvplot(ylabel='[mm]',
    label = 'C* DA')*pd_zwalm_out['Cstar'].hvplot(label = 'C* OL')

Q_obs_daily['Value'].hvplot()

Q_obs_daily['Value'].hvplot(label = 'Observed')*pd_zwalm_out_DA[
    'qmodm3s'].hvplot(ylabel='[m^3/s]',label = 'DA')*pd_zwalm_out['qmodm3s'].hvplot(label = 'OL',line_dash = 'dotted', frame_width = 800, frame_height = 400)

Q_out_diff = pd_zwalm_out_DA['qmodm3s'] - pd_zwalm_out['qmodm3s']
Q_out_diff.hvplot()