In [1]:
import pandas as pd 
import numpy as np
import scipy
from scipy import signal
from scipy.signal import argrelextrema
import plotly.express as px
import datetime 
pd.options.mode.chained_assignment = None  # default='warn'

df_leij = pd.read_csv('Leijgraaf_waterway.csv')

def filter_data(df: pd.DataFrame, window_length: int = 101, 
                polyorder: int = 3, derivative: int = 0, 
                default: bool = False):    
    '''takes a dataframe (from the "waterway_complete" function) with at least a
    "Diff(Verschil)" column and a "Weir compartment" column. It then applies a
    "Savitzky–Golay" filter to the dataframe. It puts the smoothed out data
    in a new column named "filtered diff". Window_length must be uneven. 
    A derivative column of the line can be extracted by setting the derivative to an int above 0
    Set default to True to not have to give input every time.
    This returns the old dataframe + the new column.
    '''
    if not default:
        print(df_leij['Weir compartment'].unique())
        weir = input('Copy one of the weirs and paste it in input')
        weir = weir.replace("'", "")
    else:
        weir = '211M_211N'
        

    df_oneweir = df_leij.loc[df['Weir compartment'] == weir]
    filtered = scipy.signal.savgol_filter(df_oneweir['Diff(Verschil)'],
                                          window_length = window_length, polyorder = polyorder, 
                                          deriv=0, delta=1.0, axis=- 1, mode='interp', cval=0.0)
    
    filtered_deriv = scipy.signal.savgol_filter(df_oneweir['Diff(Verschil)'],
                                          window_length = window_length, polyorder = polyorder, 
                                          deriv=derivative, delta=1.0, axis=- 1, mode='interp', cval=0.0)
    
    df_oneweir['filtered diff'] = filtered
    
    if derivative != 0:
        df_oneweir['derivative order ' + str(derivative)] = filtered_deriv

    df_oneweir = df_oneweir.set_index('Unnamed: 0', drop = True)
    df_oneweir.index.name = None
    
    return df_oneweir



In [103]:
filter_data(df_leij, 201, 4, 1, True)

Unnamed: 0,Time,Weir compartment,Discharge(Q),Diff(Verschil),filtered diff,derivative order 1
0,2018-01-01T00:00:00Z,211M_211N,0.646808,-0.021646,-0.074400,0.006979
1,2018-01-02T00:00:00Z,211M_211N,0.725779,-0.018823,-0.067595,0.006632
2,2018-01-03T00:00:00Z,211M_211N,1.014376,-0.007400,-0.061132,0.006295
3,2018-01-04T00:00:00Z,211M_211N,0.836132,-0.000322,-0.055001,0.005967
4,2018-01-05T00:00:00Z,211M_211N,0.711780,-0.020821,-0.049195,0.005648
...,...,...,...,...,...,...
1279,2021-07-05T00:00:00Z,211M_211N,0.494004,0.224000,0.238020,0.000783
1280,2021-07-06T00:00:00Z,211M_211N,0.449074,0.252667,0.238777,0.000729
1281,2021-07-07T00:00:00Z,211M_211N,0.485662,0.277416,0.239478,0.000674
1282,2021-07-08T00:00:00Z,211M_211N,0.456213,0.280677,0.240125,0.000618


In [51]:
data = filter_data(df_leij, 201, 4)
data['filtered diff'] = data['filtered diff'].clip(lower = 0)

n = 150  # number of points to be checked before and after

# Find local peaks

data['min'] = data.iloc[argrelextrema(data['Diff(Verschil)'].values, np.less_equal,
                    order=n)[0]]['Diff(Verschil)']
data['max'] = data.iloc[argrelextrema(data['Diff(Verschil)'].values, np.greater_equal,
                    order=n)[0]]['Diff(Verschil)']

fig = px.line(data, 'Time', ['Diff(Verschil)', 'filtered diff'])

fig.add_scatter(x = data['Time'], y = data['max'], mode = 'markers', marker = {'color' : 'red'}, name = 'local max')
fig.add_scatter(x = data['Time'], y = data['min'], mode = 'markers', marker = {'color' : 'green'}, name = 'local min')
# fig.add_trace()
fig.show()

['211VEL_211N' '211M_211N' '211L_211M' '211L_211K' '211K_211J' '211J_211I'
 '211I_211H' '211H_211G' '211G_211F' '211F_211E' '211E_211D' '211D_211C'
 '211C_211B' '211B_211A']
Copy one of the weirs and paste it in input211L_211K


In [52]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

data2 = filter_data(df_leij, 201, 4, 1, default = False)
# data2_deriv = filter_data(df_leij, 201, 4, 1, default = True)


fig = make_subplots(specs=[[{"secondary_y": True}]])

fig.add_trace(go.Scatter(x = data2['Time'], y = data2['Diff(Verschil)'], name = 'initial data'), secondary_y = False)
fig.add_trace(go.Scatter(x = data2['Time'], y = data2['filtered diff'], name = 'filtered data'), secondary_y = False)
fig.add_trace(go.Scatter(x = data2['Time'], y = data2['derivative order 1'], name = 'derivative'), secondary_y = True)
fig.update_layout(title = 'Weir ' + data2['Weir compartment'].iloc[0])
fig.show()

['211VEL_211N' '211M_211N' '211L_211M' '211L_211K' '211K_211J' '211J_211I'
 '211I_211H' '211H_211G' '211G_211F' '211F_211E' '211E_211D' '211D_211C'
 '211C_211B' '211B_211A']
Copy one of the weirs and paste it in input


ValueError: If mode is 'interp', window_length must be less than or equal to the size of x.

In [51]:
def plot_filtered(df):
    fig = make_subplots(specs=[[{"secondary_y": True}]])
    
    for c in df.columns:
        if 'Diff' in c:
            fig.add_trace(go.Scatter(x = df['Time'], y = df[c], name = 'initial data'), secondary_y = False)
        elif 'filtered' in c:
            fig.add_trace(go.Scatter(x = df['Time'], y = df[c], name = 'filtered data'), secondary_y = False)
        elif 'derivative' in c:
            fig.add_trace(go.Scatter(x = df['Time'], y = df[c], name = 'derivative'), secondary_y = True)
    fig.update_layout(title = 'Weir ' + df['Weir compartment'].iloc[0])
    fig.show()
        

In [90]:
plot_filtered(filter_data(df_leij, 201, 4, 1, default = True))

# new stuff

##### sorta

In [10]:

data = filter_data(df_leij, 55, 5, 1, False)
data['Diff(Verschil)'] = data['Diff(Verschil)'].clip(lower = 0)
data['Time'] = pd.to_datetime(data['Time'])
data = data.set_index('Time', drop = True)

n = 20  # number of points to be checked before and after

# Find local peaks

data['min'] = data.iloc[argrelextrema(data['derivative order 1'].values, np.less_equal,
                    order=n)[0]]['derivative order 1']
data['max'] = data.iloc[argrelextrema(data['derivative order 1'].values, np.greater_equal,
                    order=n)[0]]['derivative order 1']

fig = px.line(data, data.index, ['Diff(Verschil)', 'filtered diff', 'derivative order 1'])

# fig.add_scatter(x = data['Time'], y = data['max'], mode = 'markers', marker = {'color' : 'red'}, name = 'local max')
fig.add_scatter(x = data.index, y = data['min'], mode = 'markers', marker = {'color' : 'green'}, name = 'local min')

mins = data[data['min'].notnull()]
min_dates = []



for i, d in mins.iterrows():
    if d['Diff(Verschil)'] / data[i + datetime.timedelta(weeks=-2) : i]\
                                        ['Diff(Verschil)'].describe()['mean'] < 0.8:
        print('oi')
        min_dates.append(i)
#         if d['Diff(Verschil)'] < data['Diff(Verschil)'].describe(percentiles = [.8])['80%']:
        
#             min_dates.append(i)
#             print(d['Diff(Verschil)'], i)
        
for m in min_dates:
    fig.add_vline(x = m)

fig.show()

['211VEL_211N' '211M_211N' '211L_211M' '211L_211K' '211K_211J' '211J_211I'
 '211I_211H' '211H_211G' '211G_211F' '211F_211E' '211E_211D' '211D_211C'
 '211C_211B' '211B_211A']
Copy one of the weirs and paste it in input211L_211K
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi
oi


In [105]:
data = filter_data(df_leij, 101, 4, 1, True)

data['Time'] = pd.to_datetime(data['Time'])
data = data.set_index('Time', drop = True)
data

Unnamed: 0_level_0,Weir compartment,Discharge(Q),Diff(Verschil),filtered diff,derivative order 1
Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2018-01-01 00:00:00+00:00,211M_211N,0.646808,-0.021646,-0.006689,-0.001139
2018-01-02 00:00:00+00:00,211M_211N,0.725779,-0.018823,-0.007868,-0.001215
2018-01-03 00:00:00+00:00,211M_211N,1.014376,-0.007400,-0.009115,-0.001276
2018-01-04 00:00:00+00:00,211M_211N,0.836132,-0.000322,-0.010416,-0.001323
2018-01-05 00:00:00+00:00,211M_211N,0.711780,-0.020821,-0.011756,-0.001355
...,...,...,...,...,...
2021-07-05 00:00:00+00:00,211M_211N,0.494004,0.224000,0.245366,0.001011
2021-07-06 00:00:00+00:00,211M_211N,0.449074,0.252667,0.246303,0.000861
2021-07-07 00:00:00+00:00,211M_211N,0.485662,0.277416,0.247084,0.000700
2021-07-08 00:00:00+00:00,211M_211N,0.456213,0.280677,0.247700,0.000530


In [147]:

if mins.iloc[0]['Diff(Verschil)'] / data[mins.index[0] + datetime.timedelta(weeks=-5) : \
                                      mins.index[0] + datetime.timedelta(weeks=-1)]\
                                        ['Diff(Verschil)'].describe()['mean'] < 0.4:
    print('oi')



oi


In [124]:
data[str(mins.index[0]) : str(mins.index[0] +datetime.timedelta(weeks=+4))]

Unnamed: 0_level_0,Weir compartment,Discharge(Q),Diff(Verschil),filtered diff,derivative order 1,min,max
Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2018-03-24 00:00:00+00:00,211M_211N,0.377254,-0.017163,0.029274,-0.002692,-0.002692,
2018-03-25 00:00:00+00:00,211M_211N,0.321404,-0.025177,0.024024,-0.002685,,
2018-03-26 00:00:00+00:00,211M_211N,0.283504,-0.029589,0.017648,-0.00253,,
2018-03-27 00:00:00+00:00,211M_211N,0.309358,-0.08659,0.010901,-0.002333,,
2018-03-28 00:00:00+00:00,211M_211N,0.292828,-0.048323,0.004582,-0.002149,,
2018-03-29 00:00:00+00:00,211M_211N,0.244055,-0.022315,-0.000959,-0.001948,,
2018-03-30 00:00:00+00:00,211M_211N,0.306883,-0.021146,-0.006131,-0.001758,,
2018-03-31 00:00:00+00:00,211M_211N,0.416181,-0.014531,-0.010991,-0.001577,,
2018-04-01 00:00:00+00:00,211M_211N,0.434054,-0.013052,-0.015463,-0.0014,,
2018-04-02 00:00:00+00:00,211M_211N,0.435618,-0.01235,-0.019582,-0.001225,,


In [54]:
plot_filtered(filter_data(df_leij, 101, 4, 1, default = True))