# Zwalm: new processing with OpenEO

In [None]:
import xarray as xr
from pathlib import Path
import hvplot.xarray
import hvplot.pandas
from bokeh.models.formatters import DatetimeTickFormatter
import numpy as np
import pandas as pd
import numpy.ma as ma
from joblib import Parallel, delayed
import matplotlib.pyplot as plt
import datetime
import geopandas as gpd
from pathlib import Path
#own functions
from functions.PDM import PDM
from functions.performance_metrics import NSE
from holoviews import opts

%load_ext autoreload
%autoreload 2

## Visualisation of pre-processed data

### $\sigma_0$ backscatter

#### Raster

In [None]:
s1_xr = xr.open_dataset('data/s0_OpenEO/S0_zwalm_landuse.nc', decode_coords= 'all')
s1_xr = s1_xr.chunk({'t':20})
s1_xr

In [None]:
s1_xr['VV_db'].hvplot.image('x','y', geo = True, crs = 32631, 
                            tiles = 'OSM', cmap = 'bwr', frame_width = 320,
                            rasterize = True) + s1_xr['landuse'].hvplot.image(
    'x','y',geo = True, crs = 32631, cmap = 'viridis', frame_width = 320, tiles = 'OSM')

#### Timeseries

In [None]:
s1_pd = pd.read_csv('data/s0_OpenEO/s1_timeseries.csv',index_col ='t', parse_dates=True)
s1_pd.head()

In [None]:
s1_pd_ascending = s1_pd[s1_pd['Orbitdirection'] == 'ascending']
s1_pd_descending = s1_pd[s1_pd['Orbitdirection'] == 'descending']
cols = s1_pd.columns
VV_cols = cols[0:5]
#formatter = DatetimeTickFormatter(months='%b %Y')
s1_pd_ascending[VV_cols].hvplot(title = 'Ascending')

In [None]:
s1_pd_descending[VV_cols].hvplot.line(title = 'Descending')

### LAI

#### Raster

In [None]:
LAI_xr = xr.open_dataset('data/LAI/LAI_cube_Zwalm_landuse.nc')
LAI_xr['LAI_pv'].hvplot.image('x','y',geo = True, frame_width = 350, tiles = 'OSM',cmap = 'cividis')


#### Timeseries

In [None]:
LAI_pd = pd.read_csv('data/LAI/LAI_timeseries.csv',index_col = 't', parse_dates=True)
display(LAI_pd.head())
LAI_cols = LAI_pd.columns[0:4]
print(LAI_cols)

In [None]:
fig, ax = plt.subplots(figsize = (15,10))
LAI_pd[LAI_cols].plot(ax = ax, marker = '.')
s1_pd[LAI_cols].plot.line(marker = '.', ax = ax, linestyle = 'None')

## PDM

### Basic use

Model parameters van de Zwalm zoals verschaft door Pieter Cabus tijdens de Bachelorpoef! zie [link](data\Zwalm_data\342-PDMup_Zwalm.pdm). Bemerk dat vrij gelijkaardige (maar toch licht andere data wordt) gebruikt in [paper uit 2006](data\Zwalm_data\parameters_cabus.pdf)

In [None]:
parameters = pd.DataFrame({
    'cmax': 400.60999,
    'cmin':87.67600,
    'b':0.60000,
    'be':3.00000,
    'k1':8.00000,
    'k2':0.70000,
    'kb':5.04660,
    'kg':9000.00000,
    'St': 0.43043,
    'bg':1.00000,
    'tdly':2.00000,
    'qconst':0.00000,
    'rainfac':0.00000
}, dtype = np.float32, index =[0])
parameters

Also area taken from Cabus (not from own Area cf `Zwalm.ipynb`)

In [None]:
area_zwalm = np.single(109.2300034)

new timeseries data from pywaterinfo

In [None]:
preprocess_output_folder = Path('data/Zwalm_data/preprocess_output')
p_zwalm = pd.read_pickle(preprocess_output_folder/'zwalm_p_thiessen.pkl')
display(p_zwalm.hvplot(x = 'Timestamp', y =['Elst','P_thiessen']))
display(p_zwalm.head(3))

In [None]:
ep_zwalm = pd.read_pickle(preprocess_output_folder/'zwalm_ep_thiessen.pkl')
ep_zwalm.hvplot(x = 'Timestamp', y = ['Liedekerke','Waregem','EP_thiessen'])

In [None]:
print(len(ep_zwalm[np.isnan(ep_zwalm['EP_thiessen'])]))

Temporary fix: replace by 0

In [None]:
ep_zwalm.loc[np.isnan(ep_zwalm['EP_thiessen']),'EP_thiessen'] = 0 

In [None]:
pywaterinfo_output_folder = Path("data/Zwalm_data/pywaterinfo_output")
Q_hour = pd.read_pickle(pywaterinfo_output_folder/"Q_hour.pkl")
Q_day = pd.read_pickle(pywaterinfo_output_folder/"Q_day.pkl")
display(Q_hour.head(2))
display(Q_day.head(2))

Running the model

In [None]:
deltat = np.array(1,dtype = np.float32) #hour
deltat_out = np.array(24, dtype = np.float32) #daily averaging
pd_zwalm_out_day = PDM(P = p_zwalm['P_thiessen'].values, 
    EP = ep_zwalm['EP_thiessen'].values,
    t = p_zwalm['Timestamp'].values,
    area = area_zwalm, deltat = deltat, deltatout = deltat_out ,
    parameters = parameters)

Time how long the function take this run (includes new numba compiler)

In [None]:
import warnings
warnings.filterwarnings('ignore', category=RuntimeWarning)
%timeit pd_zwalm_out_day = PDM(P = p_zwalm['P_thiessen'].values, EP = ep_zwalm['EP_thiessen'].values,t = p_zwalm['Timestamp'].values,area = area_zwalm, deltat = deltat, deltatout = deltat_out ,parameters = parameters)

In [None]:
display(pd_zwalm_out_day.head())
pd_zwalm_out_day['q_obs'] = Q_day['Value']
pd_zwalm_out_day.hvplot(x = 'Time', y = ['qmodm3s','q_obs'], title = 'Flow on daily basis')

In [None]:
pd_zwalm_out_day.hvplot(x = 'Time', y = ['S1','Cstar'])

NSE calculation. Take first day of Sentinel data as start!

In [None]:
first_day_sentinel = s1_pd.index[0]
print(first_day_sentinel)

In [None]:
pd_zwalm_out_day_NSE = pd_zwalm_out_day.set_index('Time')[first_day_sentinel:]
nse_daily_new = NSE(pd_zwalm_out_day_NSE['qmodm3s'],pd_zwalm_out_day_NSE['q_obs'])
print('NSE of data interpolated with Thiessen polygons on a daily basis:' + str(nse_daily_new))

### Calibration for better performance

## C* compared to backscatter

In [None]:
merge_prep = pd_zwalm_out_day.rename(columns = {'Time':'t'})
merge_prep = merge_prep.set_index('t')
pd_compare = s1_pd.merge(merge_prep, on = 't', how = 'left')
pd_compare.head()

### Orbits and landuses combined

In [None]:
fig, ax = plt.subplots(figsize = (17,5))
ax2 = ax.twinx()
pd_zwalm_out_day_plot = pd_zwalm_out_day.set_index('Time')
pd_zwalm_out_day_plot['Cstar'].plot.line(ax = ax, color = 'blue')
s1_pd['VV_avg'].plot.line(ax = ax2, color = 'red', alpha = 0.8)

ax2.set_ylabel('Backscatter',color = 'red')
ax.set_ylabel('Cstar', color = 'blue')

In [None]:
print('Pearson correlation')
display(pd_compare[['VV_avg','Cstar']].corr())
print('Spearman correlation')
display(pd_compare[['VV_avg','Cstar']].corr('spearman'))

### Orbits seperated, landuses combined

In [None]:
fig, (ax1, ax2) = plt.subplots(2,1, figsize = (17,10))

#ascending
ax1_2 = ax1.twinx()
pd_zwalm_out_day_plot['Cstar'].plot.line(ax = ax1, color = 'blue')
s1_pd_ascending['VV_avg'].plot.line(ax = ax1_2, color = 'red', alpha = 0.8)

ax1.set_ylabel('Backscatter',color = 'red')
ax1_2.set_ylabel('Cstar', color = 'blue')
ax1.set_title('Ascending')

#descending
ax2_2 = ax2.twinx()
pd_zwalm_out_day_plot['Cstar'].plot.line(ax = ax2, color = 'blue')
s1_pd_descending['VV_avg'].plot.line(ax = ax2_2, color = 'red', alpha = 0.8)

ax2_2.set_ylabel('Backscatter',color = 'red')
ax2.set_ylabel('Cstar', color = 'blue')
ax2.set_title('Descending')


In [None]:
print('Pearson correlation')
display(pd_compare[['VV_avg','Cstar','Orbitdirection']].groupby('Orbitdirection').corr())
print('Spearman correlation')
display(pd_compare[['VV_avg','Cstar','Orbitdirection']].groupby('Orbitdirection').corr('spearman'))

Appraently ascending has better correlation than descending. Analogous to what was earlier obtained on gamma0_data

### Orbits and landuses seperated

In [None]:
sel_col = s1_pd.columns[0:5]
print(sel_col)

In [None]:
fig, (ax1, ax2) = plt.subplots(2,1, figsize = (17,10))

#ascending
ax1_2 = ax1.twinx()
pd_zwalm_out_day_plot['Cstar'].plot.line(ax = ax1, color = 'fuchsia', label = 'Cstar')
s1_pd_ascending[sel_col].plot.line(ax = ax1_2)

ax1.legend(loc = 'upper left')
ax1_2.legend(loc = 'upper right')
ax1_2.set_ylabel('Backscatter')
ax1.set_ylabel('Cstar', color = 'fuchsia')
ax1.set_title('Ascending')

#descending
ax2_2 = ax2.twinx()
pd_zwalm_out_day_plot['Cstar'].plot.line(ax = ax2, color = 'fuchsia', label = 'Cstar')
s1_pd_descending[sel_col].plot.line(ax = ax2_2)

ax2_2.legend(loc = 'upper right')
ax2.legend(loc = 'upper left')
ax2_2.set_ylabel('Backscatter')
ax2.set_ylabel('Cstar', color = 'fuchsia')
ax2.set_title('Descending')

In [None]:
list_col = sel_col.to_list()
list_col.append('Orbitdirection')
list_col.append('Cstar')
list_col.append('VV_avg')
corr_pd = pd_compare[list_col].groupby('Orbitdirection').corr()
corr_pd.loc[:]['Cstar']

Conlusions:
- Pasture = best correlation
- Water: good correlation BUT only a very small amount of gridcells have this correlation => do not use
- Agriculutre: no good correlation, but this can probably be explained by the vegetation on the fields!

Visualising the best correlation: pasture!

In [None]:
fig, (ax1, ax2) = plt.subplots(2,1, figsize = (17,10))

#ascending
ax1_2 = ax1.twinx()
pd_zwalm_out_day_plot['Cstar'].plot.line(ax = ax1, color = 'blue')
s1_pd_ascending['VVPasture'].plot.line(ax = ax1_2, color = 'red', alpha = 0.8)

ax1_2.set_ylabel(f'Backscatter: $\sigma_0$ [dB]',color = 'red')
ax1.set_ylabel('$C*$ [mm]', color = 'blue')
ax1.set_title('Ascending: pasture')

#descending
ax2_2 = ax2.twinx()
pd_zwalm_out_day_plot['Cstar'].plot.line(ax = ax2, color = 'blue')
s1_pd_descending['VVPasture'].plot.line(ax = ax2_2, color = 'red', alpha = 0.8)

ax2_2.set_ylabel(f'Backscatter: $\sigma_0$ [dB]',color = 'red')
ax2.set_ylabel('$C*$ [mm]', color = 'blue')
ax2.set_title('Descending: pasture')