# Zwalm

In [None]:
import xarray as xr
from pathlib import Path
import hvplot.xarray
import hvplot.pandas
import numpy as np
import pandas as pd
import numpy.ma as ma
from joblib import Parallel, delayed
import matplotlib.pyplot as plt
import datetime
import geopandas as gpd
from pathlib import Path
#own functions
from functions.PDM import PDM
from functions.performance_metrics import NSE
from holoviews import opts

%load_ext autoreload
%autoreload 2

## Test band 161

In [None]:
zwalm_161 = xr.open_mfdataset('data/g0_020m_Zwalm/*_161_Zwalm.nc') 
#concat_dim = 'time', combine = 'nested', coords = 'minimal')
zwalm_161

In [None]:
zwalm_161['g0vv'].hvplot.image(geo = True,tiles = 'OSM')

## Make 1 Dataset of all orbits

In [None]:
zwalm_xr = xr.open_mfdataset('data/g0_020m_Zwalm/*_1**_Zwalm.nc')

In [None]:
zwalm_xr

In [None]:
np.unique(zwalm_xr['satellite'].values)

In [None]:
np.unique(zwalm_xr['orbit'].values)

## Plotting

In [None]:
zwalm_xr['g0vv'].hvplot.image(geo = True, tiles = 'OSM', cmap = 'coolwarm', frame_width = 350, alpha = 0.8)

In [None]:
zwalm_xr['lia'].hvplot.image(geo = True, tiles = 'OSM', cmap = 'cividis', frame_width = 350)

## Average backscatter timeseries

In [None]:
zwalm_xr['g0vv_abs'] = 10**(zwalm_xr['g0vv']/10)
zwalm_xr['g0vv_abs'].attrs['grid_mapping'] = 'crs'
zwalm_xr

In [None]:
zwalm_xr['g0vv_abs'].hvplot.image(geo = True, tiles = 'OSM', cmap = 'cividis', frame_width = 350)

Duidelijk dat veel reflectie in zottegem => overwegen om deze waarden er uit te laten? Want hier is infrastructuur eerder...


In [None]:
zwalm_xr['g0vv_av_timeseries'] = 10*np.log10(zwalm_xr['g0vv_abs'].mean(dim = ['lat','lon']))
zwalm_xr

In [None]:
zwalm_xr['g0vv_av_timeseries'].plot()

In [None]:
zwalm_xr['g0vv_av_timeseries'].hvplot()

Plot timeseries for the 2 oribts seperately

In [None]:
dr_110 = zwalm_xr['g0vv_av_timeseries'][zwalm_xr['orbit']==110]
dr_161 = zwalm_xr['g0vv_av_timeseries'][zwalm_xr['orbit']==161]

In [None]:
fig, ax = plt.subplots()
dr_110.plot(ax = ax, label = 'orbit 110')
dr_161.plot(ax = ax, label = 'orbit 161')
ax.legend()

### Write out all the preprocessed data

## PDM for Zwalm

In [None]:
# nonan = True

Model parameters van de Zwalm zoals verschaft door Pieter Cabus tijdens de Bachelorpoef! zie [link](data\Zwalm_data\342-PDMup_Zwalm.pdm). Bemerk dat vrij gelijkaardige (maar toch licht andere data wordt) gebruikt in [paper uit 2006](data\Zwalm_data\parameters_cabus.pdf)

In [None]:
parameters = pd.DataFrame({
    'cmax': 400.60999,
    'cmin':87.67600,
    'b':0.60000,
    'be':3.00000,
    'k1':8.00000,
    'k2':0.70000,
    'kb':5.04660,
    'kg':9000.00000,
    'St': 0.43043,
    'bg':1.00000,
    'tdly':2.00000,
    'qconst':0.00000,
    'rainfac':0.00000
}, dtype = np.float32, index =[0])
parameters

Oppervlakte volgt opniew uit [link](data\Zwalm_data\342-PDMup_Zwalm.pdm). Bemerk verschil met oppervlakte zoals berekend in huidige shapefile.

In [None]:
area_zwalm = np.array([109.2300034], dtype = np.float32)
shape_emma = gpd.read_file(Path('data/Zwalm_shape/zwalm_shapefile_emma.shp'))
display(shape_emma)
print(area_zwalm, shape_emma['Area'].values/10**6)

Bemerk dat nog een andere oppervlakte op geopunt: 112.443 km^2 (cf VHA deelbekkens)

In [None]:
#check de berekening
lambert_shape = shape_emma['geometry'].to_crs(31370)
lambert_shape.area

In [None]:
#check de berekening
lambert_shape = shape_emma['geometry'].to_crs(31370)
lambert_shape.area

### Old timeseries data

In [None]:
dateparse = lambda x: datetime.datetime.strptime(x, "%Y-%m-%d %H:%M:%S")
preprocess_output_folder = Path('data/Zwalm_data/preprocess_output')
pd_zwalm = pd.read_csv(preprocess_output_folder/'zwalm_forcings_flow_OLD.csv', parse_dates = ['Time'],
    date_parser= dateparse, dtype = np.float32)
pd_zwalm.head()

Ook inladen van dagelijkse debieten van de Zwalm

In [None]:
dateparse_daily = lambda x: datetime.datetime.strptime(x, "%Y-%m-%d")
pd_zwalm_flow_daily = pd.read_csv(preprocess_output_folder/'zwalm_flow_daily_OLD.csv', parse_dates = ['Time'],
    date_parser= dateparse_daily, dtype = np.float32)
pd_zwalm_flow_daily.head()

In [None]:
first_day_sentinel = zwalm_xr['time'][0].values
first_day_sentinel = pd.Timestamp(first_day_sentinel)
print(first_day_sentinel)

In [None]:
first_day_sentinel = datetime.datetime(first_day_sentinel.year, first_day_sentinel.month, first_day_sentinel.day)
print(first_day_sentinel)

Satellite data starts in 2014-11-18 => take 1 year as warm up period for the model => start on 2014-01-01. 

In [None]:
pd_zwalm_short = pd_zwalm.set_index('Time')['2014-01-01':].reset_index()
pd_zwalm_flow_daily_short = pd_zwalm_flow_daily.set_index('Time')['2014-01-01':].reset_index()

In [None]:
#display(pd_zwalm_short)
print(len(pd_zwalm_short)/24)

In [None]:
pd_zwalm_short['Time'].values

In [None]:
#test with nan -> 0 when
#if nonan:
#    nan_bool_P = np.isnan(pd_zwalm_short['Neerslag'])
    #pd_zwalm_short['Neerslag'][nan_bool_P] = 0
    # pd_zwalm_short.loc[nan_bool_P,'Neerslag'] = 0
    # nan_bool_EP = np.isnan(pd_zwalm_short['Evaporatie'])
    # pd_zwalm_short.loc[nan_bool_EP, 'Evaporatie'] = 0

In [None]:
print(any(np.isnan(pd_zwalm_short['P'])))
print(any(np.isnan(pd_zwalm_short['EP'])))

In [None]:
pd_zwalm_short.hvplot(x = 'Time', y =  'P') + pd_zwalm_short.hvplot(x = 'Time',y = 'EP')

Model is run at 1 hour resolution, and then aggregated to 24 hour resolution (by taking the mean per 24 hours!)

In [None]:
deltat = np.array(1,dtype = np.float32) #hour
deltat_out = np.array(24, dtype = np.float32) #daily averaging
pd_zwalm_out = PDM(P = pd_zwalm_short['P'].values, 
    EP = pd_zwalm_short['EP'].values,
    t = pd_zwalm_short['Time'].values,
    area = area_zwalm, deltat = deltat, deltatout = deltat_out ,
    parameters = parameters)

In [None]:
#pd_zwalm_out.hvplot(x = 'Time', y = ['qmodm3s','qbm3s','qsm3s'])
pd_zwalm_out.hvplot(x = 'Time', y = 'qmodm3s') * pd_zwalm_flow_daily_short.hvplot(x = 'Time', y = 'Flow')

NSE is used as performance metric. Here it is calculated on a daily basis. Only use values after the warmup => start from the first day of sentinel data!

In [None]:
pd_zwalm_out_NSE = pd_zwalm_out.set_index('Time')[first_day_sentinel:].reset_index()
pd_zwalm_flow_daily_NSE = pd_zwalm_flow_daily_short.set_index('Time')[first_day_sentinel:].reset_index()

In [None]:
nse_daily_old = NSE(pd_zwalm_out_NSE['qmodm3s'].values, pd_zwalm_flow_daily_NSE['Flow'].values)
print(nse_daily_old)

Already quite a high value is obtained!

FYI: also calculated on an hourly basis

In [None]:
deltat_out = np.array(1, dtype = np.float32) #daily averaging
pd_zwalm_out_hourly = PDM(P = pd_zwalm_short['P'].values, 
    EP = pd_zwalm_short['EP'].values,
    t = pd_zwalm_short['Time'].values,
    area = area_zwalm, deltat = deltat, deltatout = deltat_out ,
    parameters = parameters)
pd_zwalm_out_hourly_NSE = pd_zwalm_out_hourly.set_index('Time')[first_day_sentinel:].reset_index()
pd_zwalm_hourly = pd_zwalm.set_index('Time')[first_day_sentinel:].reset_index()
nse_hourly_old = NSE(pd_zwalm_out_hourly_NSE['qmodm3s'].values, pd_zwalm_hourly['Flow'].values)
print(nse_hourly_old)

Note the significant DROP in NSE: worse performance on when compared hourly. This is of course to be expected!

In [None]:
pd_zwalm_out.hvplot(x = 'Time', y = ['S1','Cstar'])

In [None]:
pd_zwalm_out['Cstar'].values.dtype

### New timesries data as read in from pywaterinfo

In [None]:
p_zwalm = pd.read_pickle(preprocess_output_folder/'zwalm_p_thiessen.pkl')
display(p_zwalm.hvplot(x = 'Timestamp', y =['Elst','P_thiessen']) + pd_zwalm_short.hvplot(x = 'Time', y = 'P'))
display(p_zwalm.head(3))

In [None]:
ep_zwalm = pd.read_pickle(preprocess_output_folder/'zwalm_ep_thiessen.pkl')
ep_zwalm.hvplot(x = 'Timestamp', y = ['Liedekerke','Waregem','EP_thiessen']) + pd_zwalm_short.hvplot(x = 'Time', y = ['EP'])

In [None]:
print(len(ep_zwalm[np.isnan(ep_zwalm['EP_thiessen'])]))

So even with 3 stations included, there are still 274 hours with no EP data => as of now set to zero

In [None]:
ep_zwalm.loc[np.isnan(ep_zwalm['EP_thiessen']),'EP_thiessen'] = 0 

In [None]:
any(np.isnan(ep_zwalm['EP_thiessen']))

In [None]:
pywaterinfo_output_folder = Path("data/Zwalm_data/pywaterinfo_output")
Q_hour = pd.read_pickle(pywaterinfo_output_folder/"Q_hour.pkl")
Q_day = pd.read_pickle(pywaterinfo_output_folder/"Q_day.pkl")
display(Q_hour.head(2))
display(Q_day.head(2))

In [None]:
deltat = np.array(1,dtype = np.float32) #hour
deltat_out = np.array(24, dtype = np.float32) #daily averaging
pd_zwalm_out_day = PDM(P = p_zwalm['P_thiessen'].values, 
    EP = ep_zwalm['EP_thiessen'].values,
    t = p_zwalm['Timestamp'].values,
    area = area_zwalm, deltat = deltat, deltatout = deltat_out ,
    parameters = parameters)

In [None]:
#OWN: resample the Q_hour to Q_day
Q_day_own = Q_hour[['Timestamp','Value']].set_index('Timestamp').resample('1D').agg(pd.DataFrame.mean, skipna=False)
Q_day_own.head()

**Update 25/11/2022: with the new read-in corrected for timezones, the own sampled data and read in data is the same!**
code below is now skipped

In [None]:
#any(np.isnan(Q_day_own['Value']))

In [None]:
# print(len(Q_day))
# print(len(pd_zwalm_out_day))
# print(len(Q_day_own))

In [None]:
# Q_day['Value_temp'] = Q_day_own['Value'].values
# Q_day.hvplot(x = 'Timestamp', y =['Value','Value_temp'])

Weird: there seems to be a shift of exactly one day (with the own resampled one being one day later than the one from pywaterinfo directly!)

Compare with waterinfo!

In [None]:
#Q_day[0:7].hvplot.step(x = 'Timestamp', y =['Value','Value_temp'])

In [None]:
#<img src="data/waterinfo.png" width="700"/>

The above illustrates that Q_temp is the correct one to use! Therefore, the data will also be stored seperately! 

In [None]:
Q_day_own.to_csv("data/Zwalm_data/pywaterinfo_output/Q_day_own.csv")
Q_day_own.to_pickle("data/Zwalm_data/pywaterinfo_output/Q_day_own.pkl")

In [None]:
pd_zwalm_out_day['q_obs'] = Q_day['Value']
pd_zwalm_out_day.hvplot(x = 'Time', y = ['qmodm3s','q_obs'], title = 'Flow on daily basis')

In [None]:
pd_zwalm_out_day.hvplot(x = 'Time', y = ['S1','Cstar'])

So only start calculating the NSE starting from the first day of Sentinel 1 imagery. 

In [None]:
print(first_day_sentinel)

In [None]:
pd_zwalm_out_day_NSE = pd_zwalm_out_day.set_index('Time')[first_day_sentinel:]
nse_daily_new = NSE(pd_zwalm_out_day_NSE['qmodm3s'],pd_zwalm_out_day_NSE['q_obs'])
print('NSE of data interpolated with Thiessen polygons on a daily basis:' + str(nse_daily_new))
print('NSE of the old data as obtained from Jarne on a daily basis: ' + str(nse_daily_old))

So despite the effort of interpolation of rainfaill data en EP data, the NSE is now lower than when using the previous data...

Also comparing for hourly data!

In [None]:
deltat = np.array(1,dtype = np.float32) #hour
deltat_out = np.array(1, dtype = np.float32) #no daily averaging!
pd_zwalm_out_hour = PDM(P = p_zwalm['P_thiessen'].values, 
    EP = ep_zwalm['EP_thiessen'].values,
    t = p_zwalm['Timestamp'].values,
    area = area_zwalm, deltat = deltat, deltatout = deltat_out ,
    parameters = parameters)

In [None]:
pd_zwalm_out_hour['q_obs'] = Q_hour['Value']
pd_zwalm_out_hour.hvplot(x = 'Time', y = ['qmodm3s','q_obs'], title = 'Flow on hourly basis')

In [None]:
pd_zwalm_out_hour_NSE = pd_zwalm_out_hour.set_index('Time')[first_day_sentinel:]
nse_hourly_new = NSE(pd_zwalm_out_hour_NSE['qmodm3s'],pd_zwalm_out_hour_NSE['q_obs'])
print('NSE of data interpolated with Thiessen polygons on a daily basis:' + str(nse_hourly_new))
print('NSE of the old data as obtained from Jarne on a daily basis: ' + str(nse_hourly_old))

Again significant drop... 

### Quest for a better NSE: Recalculate with a different area for the Zwalm!

Focus on the dail flow for this, not the hourly. 

In [None]:
area_new = lambert_shape.area/10**6
area_new = area_new.astype(dtype = np.float32)
print(area_new)

In [None]:
deltat = np.array(1,dtype = np.float32) #hour
deltat_out = np.array(24, dtype = np.float32) #daily averaging
pd_zwalm_out_day_area115 = PDM(P = p_zwalm['P_thiessen'].values, 
    EP = ep_zwalm['EP_thiessen'].values,
    t = p_zwalm['Timestamp'].values,
    area = area_new, deltat = deltat, deltatout = deltat_out ,
    parameters = parameters)

In [None]:
pd_zwalm_out_day_area115['q_obs'] = Q_day['Value']
#display(pd_zwalm_out_day_area115.hvplot(x = 'Time', y = ['qmodm3s','q_obs']))
pd_zwalm_out_day_NSE_area115 = pd_zwalm_out_day_area115.set_index('Time')[first_day_sentinel:]
nse_daily_new_area115 = NSE(pd_zwalm_out_day_NSE_area115['qmodm3s'],pd_zwalm_out_day_NSE_area115['q_obs'])
print('NSE of data interpolated with Thiessen polygons on a daily basis with larger area (115 km^2):' + str(nse_daily_new_area115))
print('NSE of data interpolated with Thiessen polygons on a daily basis:' + str(nse_daily_new))

This is again a drop: the larger are seems to make the peformance worse (higher peaks?). What to do? 

## Compare Cstar with average backscatter

25/11/2022: use the PDM output given by the new pywaterinfo data

In [None]:
pd_average_backscatter = zwalm_xr[['g0vv_av_timeseries','orbit']].to_dataframe()

For concatting, drop the hour data of the satellite data 

In [None]:
pd_average_backscatter.reset_index(inplace = True)
pd_average_backscatter = pd_average_backscatter.rename(columns = {'time':'Time'})
display(pd_average_backscatter)

In [None]:
pd_average_backscatter['Time'] = pd_average_backscatter['Time'].dt.floor('d')
display(pd_average_backscatter.head(2))
print(pd_average_backscatter['Time'][0])

In [None]:
pd_zwalm_compare = pd_average_backscatter.merge(pd_zwalm_out_day, on = 'Time', how = 'left')
pd_zwalm_compare.head(2)

In [None]:
fig, ax = plt.subplots(figsize = (8,7))
ax2 = ax.twinx()
ax.plot(pd_zwalm_compare['Time'],pd_zwalm_compare['g0vv_av_timeseries'], color = 'red')
ax2.plot(pd_zwalm_compare['Time'], pd_zwalm_compare['Cstar'], color = 'blue')

ax.set_ylabel('Backscatter',color = 'red')
ax2.set_ylabel('Cstar', color = 'blue')

For Cstar, only values for which a backscatter value is observed, will be shown

In [None]:
pd_corr = pd_zwalm_compare[['g0vv_av_timeseries','Cstar']]
pd_corr.corr(method = 'pearson')

In [None]:
pd_corr.corr(method = 'spearman')

Above plot is with the backscatter of both 110 and 161 orbit combined. Below, this will be split up.

In [None]:
fig, (ax1, ax2) = plt.subplots(1,2, figsize = (17,7)) #in lines

bool_110 = pd_zwalm_compare['orbit'] == 110
backscatter_110 = pd_zwalm_compare['g0vv_av_timeseries'].loc[bool_110]
backscatter_161 = pd_zwalm_compare['g0vv_av_timeseries'].loc[~bool_110]
time_110 = pd_zwalm_compare['Time'].loc[bool_110]
time_161 = pd_zwalm_compare['Time'].loc[~bool_110]

#orbit_110
ax1_twin = ax1.twinx()
ax1.plot(time_110, backscatter_110, color = 'red', label = 'Orbit 110')
ax1_twin.plot(pd_zwalm_compare['Time'], pd_zwalm_compare['Cstar'], color = 'blue')
ax1.set_ylabel('Backscatter',color = 'red')
ax1.legend()
ax1_twin.set_ylabel('Cstar', color = 'blue')

#orbit_161
ax2_twin = ax2.twinx()
ax2.plot(time_161, backscatter_161, color = 'red', label = 'Orbit 161')
ax2_twin.plot(pd_zwalm_compare['Time'], pd_zwalm_compare['Cstar'], color = 'blue')
ax2.set_ylabel('Backscatter',color = 'red')
ax2_twin.set_ylabel('Cstar', color = 'blue')
ax2.legend()

fig.suptitle('Cstar for all timesteps')

In [None]:
Cstar_110 = pd_zwalm_compare['Cstar'].loc[bool_110]
Cstar_161 = pd_zwalm_compare['Cstar'].loc[~bool_110]

fig, (ax1, ax2) = plt.subplots(1,2, figsize = (17,7)) #in lines
#orbit_110
ax1_twin = ax1.twinx()
ax1.plot(time_110, backscatter_110, color = 'red', label = 'Orbit 110')
ax1_twin.plot(time_110, Cstar_110, color = 'blue')
ax1.set_ylabel('Backscatter',color = 'red')
ax1.legend()
ax1_twin.set_ylabel('Cstar', color = 'blue')

#orbit_161
ax2_twin = ax2.twinx()
ax2.plot(time_161, backscatter_161, color = 'red', label = 'Orbit 161')
ax2_twin.plot(time_161, Cstar_161, color = 'blue')
ax2.set_ylabel('Backscatter',color = 'red')
ax2_twin.set_ylabel('Cstar', color = 'blue')
ax2.legend()

fig.suptitle('Cstar for timesteps of respsective orbit')

In [None]:
pd_corr = pd_zwalm_compare[['g0vv_av_timeseries','Cstar','orbit']]
pd_corr.groupby('orbit').corr()

Apparently higher correlation for the 161 orbit. Is also visible on the plot!

## Incorporate Land use 

In [None]:
xr_zwalm_landuse = xr.open_dataset('data/xarray_zwalm_landuse_cube.nc')
xr_zwalm_landuse['landuse'].values = xr_zwalm_landuse['landuse'].values.astype(np.int32)

In [None]:
hvplot.extension('bokeh')
xr_zwalm_landuse['landuse'].hvplot.image(geo = True, tiles = 'OSM', frame_width = 350, cmap = 'viridis') + xr_zwalm_landuse['g0vv'].hvplot.image(geo = True, tiles = 'OSM', frame_width = 350)
#.opts(opts.image:colorbar_opts={"major_label_overrides": label_dictionary})

Goal is now to categorize by land use class

In [None]:
landuseclasses = ['Urban','Forest','Pasture','Agriculture','Water']
landusenumbers = [1,2,3,4,5]
name_list = []
landuseclasses

In [None]:
for i in range(len(landusenumbers)):
    g0vv_abs_temp = xr_zwalm_landuse['g0vv_abs'].where(xr_zwalm_landuse['landuse'] == landusenumbers[i])
    name = 'g0vv_timeseries_' + landuseclasses[i]
    name_list.append(name)
    xr_zwalm_landuse[name] = 10*np.log10(g0vv_abs_temp.mean(dim = ['lat','lon'])) #skips Nan by defualt for floats

In [None]:
name_list.append('orbit')

In [None]:
name_list

In [None]:
xr_zwalm_landuse

In [None]:
#xr_zwalm_landuse.hvplot(x= 'Time', y = name_list)
#name_list.append('orbit') #handy to have orbit in dataframe!
pd_timeseries = xr_zwalm_landuse[name_list].to_pandas()
pd_timeseries.head()

In [None]:
#fig, ax = plt.subplots(figsize = (8,8))
#pd_timeseries.plot(ax = ax)

In [None]:
pd_timeseries[name_list[0:-1]].hvplot(width = 1000, height = 500, title = 'No orbit differentiation')

In [None]:
#only_urban = xr_zwalm_landuse['g0vv'].where(xr_zwalm_landuse['landuse']==1)
#only_urban.hvplot.image(geo = True, tiles = 'OSM')

Now also split according to orbit!

In [None]:
pd_timeseries_110 = pd_timeseries[pd_timeseries['orbit'] == 110]
pd_timeseries_161 = pd_timeseries[pd_timeseries['orbit'] == 161]

In [None]:
pd_timeseries_110[name_list[0:-1]].hvplot(width = 1000, height = 500, title = 'orbit 110')

In [None]:
pd_timeseries_161[name_list[0:-1]].hvplot(width = 1000, height = 500, title = 'orbit 161')

### Compare Cstar with average backscatter per landuse class

Important is to calculate the correlations between the different g0vv and the Cstar

In [None]:
pd_zwalm_out_day['Time']

In [None]:
pd_timeseries = pd_timeseries.reset_index()
pd_timeseries['time'] = pd_timeseries['time'].dt.floor('d')
pd_timeseries = pd_timeseries.rename(columns = {'time':'Time'})
pd_zwalm_compare_landuse = pd_timeseries.merge(pd_zwalm_out_day, on = 'Time', how = 'left')
pd_zwalm_compare_landuse.head(3)

Calculate the correlations

In [None]:
g0vv_list = pd_zwalm_compare_landuse.columns[pd_zwalm_compare_landuse.columns.str.startswith('g0vv')].tolist()
g0vv_list.append('orbit')
g0vv_list.append('Cstar')
pd_corr = pd_zwalm_compare_landuse.loc[:,g0vv_list]
corr_out_orbit_split = pd_corr.groupby('orbit').corr()
corr_out = pd_corr.corr()

In [None]:
print('Classic Pearson Correlation')
print('')
display(corr_out['Cstar'])
display(corr_out_orbit_split['Cstar'])

In [None]:
corr_out_orbit_split_sp = pd_corr.groupby('orbit').corr('spearman')
corr_out_sp = pd_corr.corr('spearman')
print('Spearman (rank) Correlation')
print('')
display(corr_out_sp['Cstar'])
display(corr_out_orbit_split_sp['Cstar'])

Plot the correlation

In [None]:
fig, (ax1, ax2) = plt.subplots(1,2, figsize = (17,7)) #in lines

bool_110 = pd_zwalm_compare_landuse['orbit'] == 110
#df.loc[:, df.columns.str.startswith('alp')]
backscatter_110 = pd_zwalm_compare['g0vv_av_timeseries'].loc[bool_110]
backscatter_161 = pd_zwalm_compare['g0vv_av_timeseries'].loc[~bool_110]
time_110 = pd_zwalm_compare['Time'].loc[bool_110]
time_161 = pd_zwalm_compare['Time'].loc[~bool_110]

#orbit_110
ax1_twin = ax1.twinx()
ax1.plot(time_110, backscatter_110, color = 'red', label = 'Orbit 110')
ax1_twin.plot(pd_zwalm_compare['Time'], pd_zwalm_compare['Cstar'], color = 'blue')
ax1.set_ylabel('Backscatter',color = 'red')
ax1.legend()
ax1_twin.set_ylabel('Cstar', color = 'blue')

#orbit_161
ax2_twin = ax2.twinx()
ax2.plot(time_161, backscatter_161, color = 'red', label = 'Orbit 161')
ax2_twin.plot(pd_zwalm_compare['Time'], pd_zwalm_compare['Cstar'], color = 'blue')
ax2.set_ylabel('Backscatter',color = 'red')
ax2_twin.set_ylabel('Cstar', color = 'blue')
ax2.legend()

fig.suptitle('Cstar for all timesteps')

## Correlation between angle of incidene and g0vv backscatter

In [None]:
hvplot.extension('bokeh')
zwalm_xr.hvplot.scatter('lia','g0vv', groupby = [], rasterize = True, dynspread = True, cmap = 'plasma', xlabel = 'local incidence angle', ylabel = 'g0vv') 

Over the entire dataset

In [None]:
#import hvplot.dask #allows parallel computing BUT crashes my compter! not enough ram!! 
hvplot.extension('matplotlib')
#zwalm_xr.hvplot.scatter('lia','g0vv', groupby = [], cmap = 'plasma', datashade = True, dynspread = True, colorbar = True, xlabel = 'local incidence angle', ylabel = 'g0vv')
zwalm_xr.hvplot.scatter('lia','g0vv', groupby = [], rasterize = True, cmap = 'plasma', colorbar = True, xlabel = 'local incidence angle', ylabel = 'g0vv') 

In [None]:
ma.corrcoef(ma.masked_invalid(zwalm_xr['g0vv'].values.flatten()), 
            ma.masked_invalid(zwalm_xr['lia'].values.flatten()))

So very low correlation of only -0.002! Seems to indicate negligible correlation, which is wanted.

In [None]:
stacked = zwalm_xr.stack(latlon = ['lat','lon'])
stacked

In [None]:
corr = xr.corr(stacked['g0vv'],stacked['lia'], dim = "time").unstack()

In [None]:
zwalm_xr['corr_g0vv_lia'] = corr
zwalm_xr['corr_g0vv_lia'].attrs['grid_mapping'] = 'crs'

In [None]:
fig, ax = plt.subplots()
bins = corr.plot.hist(bins = 'auto', density = True, ax = ax)
ax.set_title('Distribution of correlation between backscatter and angle of incidence for every pixel')

Conclusion: as the correlations seem to be normally distributed around zero, no corrections seems necessary for this correlation? 

In [None]:
hvplot.extension('bokeh')
corr.hvplot(geo = True, cmap = 'RdBu', frame_width = 350, alpha = 0.8, tiles = 'OSM')

In urban areas, correlation between backscatter and angle of incidence

The goal is to also make a scatter/density plot of gamma_0 in function of local incidence angle. The result should be a more or less flat line!

In [None]:
g0vv_values = stacked['g0vv'].values.flatten()
lia_values = stacked['lia'].values.flatten()

In [None]:
import pandas as pd
import seaborn as sns
data = {'g0vv':g0vv_values,
        'lia':lia_values}
df_plotting = pd.DataFrame(data)
sns.kdeplot(df_plotting, x = 'g0vv',y= 'lia')