In [1]:
import pandas as pd 
import numpy as np
import scipy
from scipy import signal
from scipy.signal import argrelextrema
import plotly.express as px
import datetime 
import plotly.graph_objects as go
from plotly.subplots import make_subplots
pd.options.mode.chained_assignment = None  # default='warn'

df_leij = pd.read_csv('Leijgraaf_waterway.csv')

def filter_data(df: pd.DataFrame, window_length: int = 101, 
                polyorder: int = 3, derivative: int = 0, 
                default: bool = False):    
    '''takes a dataframe (from the "waterway_complete" function) with at least a
    "Diff(Verschil)" column and a "Weir compartment" column. It then applies a
    "Savitzky–Golay" filter to the dataframe. It puts the smoothed out data
    in a new column named "filtered diff". Window_length must be uneven. 
    A derivative column of the line can be extracted by setting the derivative to an int above 0
    Set default to True to not have to give input every time.
    This returns the old dataframe + the new column.
    '''
    if not default:
        print(df_leij['Weir compartment'].unique())
        weir = input('Copy one of the weirs and paste it in input')
        weir = weir.replace("'", "")
    else:
        weir = '211L_211M'
        

    df_oneweir = df_leij.loc[df['Weir compartment'] == weir]
    filtered = scipy.signal.savgol_filter(df_oneweir['Diff(Verschil)'],
                                          window_length = window_length, polyorder = polyorder, 
                                          deriv=0, delta=1.0, axis=- 1, mode='interp', cval=0.0)
    
    filtered_deriv = scipy.signal.savgol_filter(df_oneweir['Diff(Verschil)'],
                                          window_length = window_length, polyorder = polyorder, 
                                          deriv=derivative, delta=1.0, axis=- 1, mode='interp', cval=0.0)
    
    df_oneweir['filtered diff'] = filtered
    
    if derivative != 0:
        df_oneweir['derivative order ' + str(derivative)] = filtered_deriv

    df_oneweir = df_oneweir.set_index('Unnamed: 0', drop = True)
    df_oneweir.index.name = None
    df_oneweir['Time'] = pd.to_datetime(df_oneweir['Time'])
    df_oneweir = df_oneweir.set_index('Time', drop = False) # I did this so previous functions still work
    
    return df_oneweir



In [55]:
filter_data(df_leij, 201, 4, 1, True)

Unnamed: 0_level_0,Time,Weir compartment,Discharge(Q),Diff(Verschil),filtered diff,derivative order 1
Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2018-04-24 00:00:00+00:00,2018-04-24 00:00:00+00:00,211L_211M,3.533706,-0.429777,-0.475258,0.033402
2018-04-25 00:00:00+00:00,2018-04-25 00:00:00+00:00,211L_211M,3.556817,-0.389919,-0.442321,0.032476
2018-04-26 00:00:00+00:00,2018-04-26 00:00:00+00:00,211L_211M,3.625251,-0.379706,-0.410301,0.031566
2018-04-27 00:00:00+00:00,2018-04-27 00:00:00+00:00,211L_211M,3.694125,-0.369497,-0.379184,0.030672
2018-04-28 00:00:00+00:00,2018-04-28 00:00:00+00:00,211L_211M,3.763409,-0.359965,-0.348952,0.029794
...,...,...,...,...,...,...
2021-07-05 00:00:00+00:00,2021-07-05 00:00:00+00:00,211L_211M,8.935084,0.190948,0.154192,-0.014748
2021-07-06 00:00:00+00:00,2021-07-06 00:00:00+00:00,211L_211M,8.387046,0.193094,0.139111,-0.015417
2021-07-07 00:00:00+00:00,2021-07-07 00:00:00+00:00,211L_211M,8.889134,0.208667,0.123354,-0.016099
2021-07-08 00:00:00+00:00,2021-07-08 00:00:00+00:00,211L_211M,8.884773,0.212635,0.106908,-0.016794


In [None]:
import numpy as np
import pandas as pd
import lmfit
import datetime
from datetime import date
from sklearn import linear_model
import datetime
import argparse
pd.options.mode.chained_assignment = None  # default='warn'

#---------------Please adjust variables here or in the command line------------------------------------------------------------
data_path='/Users/20182463/Desktop/Data challenge 3/data/feature_tables/' #(--data_path)
# weir='211VEL_211N' #(--weir)
# risk_date='2021-06-17' # (--risk_date)
prediction=True # True for prediction (--prediction)
last_days=7 # (--last_days) For prediction: Defines how many days the linear model takes into account to predict the next 21 days
avg_temp=22 # (--avg_temp) For prediction: Average Temperature adjusts the prediction +/- 20%
#---------------End of adjust variables-------------------------------------------------------------------------------------------

# From now on PLEASE DO NOT CHANGE------------------------------------------------------------------------------------------------

def get_data(weir,data_path,date_format=False):
    ''' Get the feature data of the individual weir
    Keyword arguments:
    weir -- the weir name as string
    date_format -- date_format boolean 
    data_path -- the local path of the weir feature data csv's
    Returns: data as dataframe'''
    datapath=data_path+weir+'_feature_table.csv'
    data=pd.read_csv(datapath,index_col="TIME",parse_dates=True)
    if date_format:
        data.index=data.index.strftime('%Y-%m-%d')
    return data
    
def get_model(weir:str,year:int,data_path):
    '''Get the model Aa-en-Maas uses to define the backwater caused in winter
    Keyword arguments:
    weir -- the weir name as string
    year -- year as int
    data_path -- the local path of the weir feature data csv's
    Returns: model'''
    weir_data=get_data(weir,data_path,date_format=True)
    selected_data=weir_data[['VERSCHIL', 'Q']]
    # Set values of Verschil lower than 0 to 0 as backwater cannot be negative
    selected_data['VERSCHIL']=negative_backwater_to_zero(selected_data['VERSCHIL'])
    # Set the Q value also to 0 where backwater is now 0
    selected_data.loc[(selected_data.VERSCHIL == 0),'Q']=0
    # Winter season where plants are "not" growing from 1st October to end of February
    winter_data=selected_data.loc[str(year-1)+'-10-01':str(year)+'-02-31']
    # The polynomial function Aa-en-Maas currently uses
    def eqn_poly(x, a, b):
        ''' simple polynomial function'''
        return  a*(x**b)     
    mod = lmfit.Model(eqn_poly)
    lm_result = mod.fit(np.array(winter_data['VERSCHIL']), x=np.array(winter_data['Q']), a=1.0, b=1.0)
    return lm_result
    
def negative_backwater_to_zero(vegetation_data):
    '''Set values lower than 0 to 0 as backwater cannot be negative'''
    return np.clip(vegetation_data, 0, None)
    
def calc_vegetation(weir,weir_data,risk_date,data_path):
    '''Calculate the back water caused by plants of a single data point
    Keyword arguments:
    weir -- the weir name as string
    weir_data -- the feature data of the weir
    risk_date -- the date where the vegetation risk should be evaluated on
    data_path -- the local path of the weir feature data csv's
    Returns: current vegetation '''
    try:
        # Take the necessary features(VERSCHIL and Q) of the data at the given date
        risk_date_data=weir_data[['VERSCHIL', 'Q']].loc[risk_date]
    except:
        print('This date is not in the database')
    risk_date=datetime.datetime.strptime(risk_date, "%Y-%m-%d")
    current_year=risk_date.year
    try:
        #Get the winter baseline model of the current year
        model=get_model(weir,year=current_year,data_path=data_path)
    except:
        print('Error, model cannot be created for year {}'.format(current_year))
        return 0
    # If the date is in the winter period, 
    if ((risk_date.month <= 2) | (risk_date.month >= 10)):
        # the back water is assumed to be 0 as the plants do "not" grow in winter
        current_vegetation = 0
    elif (risk_date_data.empty):
        print("No flow data for "+ weir+ " on date ", risk_date)
        current_vegetation = None
    elif (len(risk_date_data) == 0):
        print("No data for ", weir, " on date ", risk_date)
        current_vegetation = None
    else:
        # Predict the vegetation for every summer data point based on the winter baseline
        winter_pred = model.eval(x=risk_date_data['Q'])
        winter_pred=negative_backwater_to_zero(winter_pred)
        # Calculate the vegetation by plants: Current back water - predicted back water based on winter
        current_vegetation= risk_date_data.loc["VERSCHIL"] - winter_pred
        current_vegetation=negative_backwater_to_zero(current_vegetation)
    return current_vegetation   
    
    
####################################################################################################    
####################################################################################################
def calc_vegetation_risk(weir,risk_date, data_path):
    ''' Calculate the vegetation risk between 1 and 4
    Keyword arguments:
    weir -- the weir name as string
    risk_date -- the date where the vegetation risk should be evaluated on
    data_path -- the local path of the weir feature data csv's
    Returns: Risk category, Quantiles of vegetation 25,50,75'''
    weir_data=get_data(weir,date_format=True,data_path=data_path)
    # Calculate the back water by plants on given date
    current_vegetation=calc_vegetation(weir,weir_data,risk_date,data_path)
    risk_date=datetime.datetime.strptime(risk_date, "%Y-%m-%d")
    current_year=risk_date.year
    min_year=current_year - 2
    total_vegetation=[]
    # Get the vegetation for the last three years
    for year in range(min_year,current_year):
        try: 
            # load model of given year
            model_year =get_model(weir, year=year,data_path=data_path)
        except:
            print('Error,year '+str(year)+' model cannot be created')  
            continue
        # Select summer season data from March till end of September
        summer_data = weir_data.loc[str(year)+'-03-01':str(year)+'-09-31']
        # Predict the vegetation for every summer data point based on the winter baseline
        winter_pred = model_year.eval(x=summer_data['Q']) 
        winter_pred=negative_backwater_to_zero(winter_pred)
        # Calculate the vegetation by plants: Current back water - predicted back water based on winter
        vegetation_year=summer_data.loc[:,"VERSCHIL"]-winter_pred
        vegetation_year=negative_backwater_to_zero(vegetation_year)
        total_vegetation.append(vegetation_year)
    total_vegetation = pd.concat(total_vegetation)
    # Get Quantiles of the total vegetation
    vegetation_q25=np.quantile(total_vegetation,0.25)
    vegetation_q50=np.quantile(total_vegetation,0.50)
    vegetation_q75=np.quantile(total_vegetation,0.75)
    # Get Risk category based on quantile segment
    risk_cat=np.where(current_vegetation > vegetation_q75,4,np.where(current_vegetation > vegetation_q50,3,np.where(current_vegetation > vegetation_q25,2,1))).tolist()
    # Return risk category and the quantiles
    print("The risk category for the date: {} is {}. The following quantiles were used for the risk calculation 0.25:{}, 0.5:{}, 0.75:{}".format(risk_date,risk_cat,vegetation_q25, vegetation_q50, vegetation_q75))
    return [risk_cat,vegetation_q25, vegetation_q50, vegetation_q75], winter_pred
    
####################################################################################################
####################################################################################################

def predict_vegetation(weir,last_days,avg_temp,data_path):
    '''Predict the vegetation of the next 21 days based on the last 7 days with linear model
    Keyword arguments:
    weir -- the weir name as string
    last_days -- the number of days the linear model should base the prediction on
    avg_temp -- the average temperature adjusting the predictions by +/- 20%
    data_path -- the local path of the weir feature data csv's
    Returns: Dataframe of the backwater predictions of the next 21 days'''
    data=get_data(weir,data_path,date_format=True)
    data.reset_index(inplace=True)
    # Get the last data points depending on number of last_days
    last_data=data.tail(last_days)
    # Get last day to calculate 
    last_day = datetime.datetime.strptime(last_data.iloc[-1]['TIME'], "%Y-%m-%d")
    # Get dates of the next 21 days
    new_dates=[last_day+datetime.timedelta(days=i) for i in range(1,22)]
    # Calculate back water by vegetation for the last days
    last_data['vegetation']=last_data['TIME'].apply(lambda row:calc_vegetation(weir,get_data(weir,data_path,date_format=True),row,data_path))
    last_data.reset_index(inplace=True)
    # Define linear model
    reg = linear_model.LinearRegression()
    # Take index and the back water by vegetation as training data
    x_train=last_data.index.to_numpy().reshape(-1, 1)
    y_train=last_data['vegetation'].to_numpy().reshape(-1, 1)
    # Fit the linear model on the last days
    reg.fit(x_train,y_train)
    # Get index for the next 21 days
    x_test=[x_train[-1]+i for i in range(1,22)]
    # Predict the vegetation for the next 21 days
    predictions=reg.predict(x_test)
    # Format
    predictions= [item for elem in predictions.tolist() for item in elem]
    # Depending on the temperature add multplication value to adjust values
    try:
        if (avg_temp > 25):
            predictions=[pred*1.2 for pred in predictions]
        elif (avg_temp<20):
            predictions=[pred*0.8 for pred in predictions]
    except:
        print("The Temperature was not available")
    data = {'TIME':  new_dates,'Predicted backwater by vegetation': predictions}
    df = pd.DataFrame (data, columns = ['TIME','Predicted backwater by vegetation'])
    print(df)
    return df
    
def main():
    parser = argparse.ArgumentParser(description='Arguments get parsed via --commands')
    parser.add_argument('--weir', type=str,default=weir)
    parser.add_argument('--risk_date', type=str, default=risk_date)
    parser.add_argument('--data_path', type=str,default=data_path)
    parser.add_argument('--prediction', type=bool,default=prediction)
    parser.add_argument('--last_days', type=int,default=last_days)
    parser.add_argument('--avg_temp', type=int,default=avg_temp)
    args = parser.parse_args()
    if args.prediction:
        predict_vegetation(weir=args.weir,last_days=args.last_days,avg_temp=args.avg_temp,data_path=args.data_path)
    else:
        calc_vegetation_risk(weir=args.weir,risk_date=args.risk_date, data_path=args.data_path)
    
# if __name__ == '__main__':
#     main()

In [51]:
data = filter_data(df_leij, 201, 4)
data['filtered diff'] = data['filtered diff'].clip(lower = 0)

n = 150  # number of points to be checked before and after

# Find local peaks

data['min'] = data.iloc[argrelextrema(data['Diff(Verschil)'].values, np.less_equal,
                    order=n)[0]]['Diff(Verschil)']
data['max'] = data.iloc[argrelextrema(data['Diff(Verschil)'].values, np.greater_equal,
                    order=n)[0]]['Diff(Verschil)']

fig = px.line(data, 'Time', ['Diff(Verschil)', 'filtered diff'])

fig.add_scatter(x = data['Time'], y = data['max'], mode = 'markers', marker = {'color' : 'red'}, name = 'local max')
fig.add_scatter(x = data['Time'], y = data['min'], mode = 'markers', marker = {'color' : 'green'}, name = 'local min')
# fig.add_trace()
fig.show()

['211VEL_211N' '211M_211N' '211L_211M' '211L_211K' '211K_211J' '211J_211I'
 '211I_211H' '211H_211G' '211G_211F' '211F_211E' '211E_211D' '211D_211C'
 '211C_211B' '211B_211A']
Copy one of the weirs and paste it in input211L_211K


In [96]:
data2 = filter_data(df_leij, 201, 4, 1, default = False)
# data2_deriv = filter_data(df_leij, 201, 4, 1, default = True)


fig = make_subplots(specs=[[{"secondary_y": True}]])

fig.add_trace(go.Scatter(x = data2['Time'], y = data2['Diff(Verschil)'], name = 'initial data'), secondary_y = False)
fig.add_trace(go.Scatter(x = data2['Time'], y = data2['filtered diff'], name = 'filtered data'), secondary_y = False)
fig.add_trace(go.Scatter(x = data2['Time'], y = data2['derivative order 1'], name = 'derivative'), secondary_y = True)
fig.update_layout(title = 'Weir ' + data2['Weir compartment'].iloc[0])
fig.show()

['211VEL_211N' '211M_211N' '211L_211M' '211L_211K' '211K_211J' '211J_211I'
 '211I_211H' '211H_211G' '211G_211F' '211F_211E' '211E_211D' '211D_211C'
 '211C_211B' '211B_211A']
Copy one of the weirs and paste it in input211L_211K


In [4]:
def plot_filtered(df):
    fig = make_subplots(specs=[[{"secondary_y": True}]])
    
    for c in df.columns:
        if 'Diff' in c:
            fig.add_trace(go.Scatter(x = df['Time'], y = df[c], name = 'initial data'), secondary_y = False)
        elif 'filtered' in c:
            fig.add_trace(go.Scatter(x = df['Time'], y = df[c], name = 'filtered data'), secondary_y = False)
        elif 'derivative' in c:
            fig.add_trace(go.Scatter(x = df['Time'], y = df[c], name = 'derivative'), secondary_y = True)
    fig.update_layout(title = 'Weir ' + df['Weir compartment'].iloc[0])
    
    fig.show()
        

In [114]:
plot_filtered(filter_data(df_leij, 201, 4, 1, default = True))

In [115]:
plot_filtered(test)

# Trying the 75% thing and the filter below 0.05

In [80]:
data = filter_data(df_leij, 201, 4, 1, default = True)

In [60]:
data['Diff(Verschil)'].describe()['75%']

0.2960937916666666

In [74]:
# setting all verschil below 0.05 to 0.

mask = data['Diff(Verschil)'] > 0.05
test = data
test['Diff(Verschil)'] = data['Diff(Verschil)'].where(mask, 0)

In [81]:
data['Diff(Verschil)'].describe()

count    1173.000000
mean        0.204599
std         0.129364
min        -0.429777
25%         0.130781
50%         0.211719
75%         0.296094
max         0.511136
Name: Diff(Verschil), dtype: float64

In [82]:
test['Diff(Verschil)'].describe()

count    1173.000000
mean        0.207447
std         0.120150
min         0.000000
25%         0.130781
50%         0.211719
75%         0.296094
max         0.511136
Name: Diff(Verschil), dtype: float64

In [94]:
weir = '211L_211K'
date = '2021-02-10'
path = '../data/feature_tables/'

In [100]:
# finding all dates that would cross the treshold within 21 days

data = filter_data(df_leij, 201, 4, 1, default = False)
# data = data.loc['2020'] #uncomment for specific year

##### uncomment below for the 0.05 filter
# mask = data['Diff(Verschil)'] > 0.05
# data['Diff(Verschil)'] = data['Diff(Verschil)'].where(mask, 0)

lst, pred = calc_vegetation_risk(weir, date, path)
pred.index = pd.to_datetime(pred.index)
new_tst = data.loc[pred.index[0] : pred.index[-1]]
new_tst['Time'] = new_tst['Time'].dt.date
new_tst = new_tst.set_index('Time', drop = True)

new_tst['new_verschil'] = new_tst['Diff(Verschil)'] - pred
data = new_tst

th = .75
# threshold = data['Diff(Verschil)'].describe(percentiles = [th])[str(int(th*100))+'%']
# threshold = lst[-1]
threshold = 0.30

count = 0
tot_count = 0

for d in data.index:
    tot_count += 1
    if data.loc[d]['new_verschil'] + data.loc[d]['derivative order 1']*21 > threshold:
        for n in range(22):
            if data.loc[d]['new_verschil'] + data.loc[d]['derivative order 1']*n > threshold:
                print(str(d)[:10], 'will reach', str(int(th*100))+'%', 'in', n, 'days')
                break
        count += 1
        
print('\ntotal days:',tot_count)
print('days with risky slopes:', count)

['211VEL_211N' '211M_211N' '211L_211M' '211L_211K' '211K_211J' '211J_211I'
 '211I_211H' '211H_211G' '211G_211F' '211F_211E' '211E_211D' '211D_211C'
 '211C_211B' '211B_211A']
Copy one of the weirs and paste it in input211L_211K
Error, model cannot be created for year 2021
Error,year 2019 model cannot be created
The risk category for the date: 2021-02-10 00:00:00 is 1. The following quantiles were used for the risk calculation 0.25:0.15688790624999996, 0.5:0.18652605208333334, 0.75:0.24398690624999997
2020-03-01 will reach 75% in 0 days
2020-03-06 will reach 75% in 0 days
2020-03-10 will reach 75% in 0 days
2020-03-11 will reach 75% in 0 days
2020-03-12 will reach 75% in 0 days
2020-03-13 will reach 75% in 11 days
2020-05-19 will reach 75% in 17 days
2020-05-20 will reach 75% in 14 days
2020-05-21 will reach 75% in 4 days
2020-05-22 will reach 75% in 0 days
2020-05-23 will reach 75% in 0 days
2020-05-24 will reach 75% in 0 days
2020-05-25 will reach 75% in 0 days
2020-05-26 will reach 75


Indexing a timezone-aware DatetimeIndex with a timezone-naive datetime is deprecated and will raise KeyError in a future version.  Use a timezone-aware object instead.



In [97]:
lst[-1]

0.24398690624999997

The risk category for the date: 2021-02-10 00:00:00 is 1. The following quantiles were used for the risk calculation 0.25:0.0, 0.5:0.02946626617547643, 0.75:0.06668933249698686


In [87]:
new_tst

Unnamed: 0_level_0,Weir compartment,Discharge(Q),Diff(Verschil),filtered diff,derivative order 1,new_verschil
Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2020-03-01,211VEL_211N,0.178334,0.006198,0.002473,0.000701,-0.027419
2020-03-02,211VEL_211N,0.177821,0.008531,0.002996,0.000750,-0.025039
2020-03-03,211VEL_211N,0.176747,0.008771,0.003591,0.000797,-0.024702
2020-03-04,211VEL_211N,0.147431,0.008198,0.004268,0.000843,-0.022496
2020-03-05,211VEL_211N,0.164661,0.009594,0.005017,0.000887,-0.022765
...,...,...,...,...,...,...
2020-09-25,211VEL_211N,0.978691,0.138375,0.134730,-0.001202,0.062530
2020-09-26,211VEL_211N,1.025152,0.133688,0.133287,-0.001209,0.056143
2020-09-27,211VEL_211N,1.060153,0.135656,0.132244,-0.001198,0.056857
2020-09-28,211VEL_211N,1.047373,0.128625,0.130955,-0.001198,0.050281


In [19]:
print(str(int(0.8*100))+'%')

80%


# new stuff

### Okay, you can try different numbers in the cell below. The plot isn't the best without the second y-axis but you only need the vertical lines for now. The lines need to be on all the drops that look like mowing but not at the other ones so it's tough. I left the minimum filter on right now but the filters set to 1 because it gives decent results with it on. I tried (20, 55, 4, 1, True, 1, 1, 1).

In [101]:
n = 20  # number of points to be checked before and after for the min and max points
window_len = 55 # for the filter, smaller number results in a line that fits the data better
poly_num = 4 # degree polynomial. higher number result in less smoothing of the original line
derivative_degree = 1 # degree derivative :D

# the next parameters are there to filter the local minimums 
filter_mins = True  # set to True if you want to filter the minimums
treshold_mean = 1   # This checks if there was a drop, if the new value is below a certain treshold of the mean of all points.
                    # This one is for cases where the graph spikes, and returns to the original curve, not sure if this is
                    # the way the go though. 
treshold_prev = 1   # this parameter checks if there is a significant difference in means of the previous 
                    # weeks (number of weeks adjustable as well) and the current value (where it dropped to)
nr_prev_weeks = 1   # this is for changing the number of weeks the function looks back to attain the mean

# get the data
data = filter_data(df_leij, window_len, poly_num, derivative_degree, default = True)
data['Diff(Verschil)'] = data['Diff(Verschil)'].clip(lower = 0)
data['Time'] = pd.to_datetime(data['Time'])
data = data.set_index('Time', drop = True)

# Find local peaks
data['min'] = data.iloc[argrelextrema(data['derivative order 1'].values, np.less_equal,
                    order=n)[0]]['derivative order 1']

# start plot
fig = px.line(data, data.index, ['Diff(Verschil)', 'filtered diff', 'derivative order 1'])

fig.add_scatter(x = data.index, y = data['min'], mode = 'markers', marker = {'color' : 'green'}, name = 'local min')

mins = data[data['min'].notnull()]
min_dates = []


# local minimums filter
for i, d in mins.iterrows():
    if filter_mins == True:
        if d['Diff(Verschil)'] / data[i + datetime.timedelta(weeks=-nr_prev_weeks) : i]\
                                            ['Diff(Verschil)'].describe()['mean'] < treshold_prev:

            if d['Diff(Verschil)'] < data['Diff(Verschil)'].describe(
                percentiles = [treshold_mean])[str(int(treshold_mean*100))+'%']:
                min_dates.append(i)
    else:
        min_dates.append(i)

# add minimums lines to plot
for m in min_dates:
    fig.add_vline(x = m)
    
fig.update_yaxes(range=[-0.1, data['Diff(Verschil)'].describe()['max'] + 0.1])
fig.show()

In [105]:
data = filter_data(df_leij, 101, 4, 1, True)


Unnamed: 0_level_0,Weir compartment,Discharge(Q),Diff(Verschil),filtered diff,derivative order 1
Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2018-01-01 00:00:00+00:00,211M_211N,0.646808,-0.021646,-0.006689,-0.001139
2018-01-02 00:00:00+00:00,211M_211N,0.725779,-0.018823,-0.007868,-0.001215
2018-01-03 00:00:00+00:00,211M_211N,1.014376,-0.007400,-0.009115,-0.001276
2018-01-04 00:00:00+00:00,211M_211N,0.836132,-0.000322,-0.010416,-0.001323
2018-01-05 00:00:00+00:00,211M_211N,0.711780,-0.020821,-0.011756,-0.001355
...,...,...,...,...,...
2021-07-05 00:00:00+00:00,211M_211N,0.494004,0.224000,0.245366,0.001011
2021-07-06 00:00:00+00:00,211M_211N,0.449074,0.252667,0.246303,0.000861
2021-07-07 00:00:00+00:00,211M_211N,0.485662,0.277416,0.247084,0.000700
2021-07-08 00:00:00+00:00,211M_211N,0.456213,0.280677,0.247700,0.000530


In [147]:

if mins.iloc[0]['Diff(Verschil)'] / data[mins.index[0] + datetime.timedelta(weeks=-5) : \
                                      mins.index[0] + datetime.timedelta(weeks=-1)]\
                                        ['Diff(Verschil)'].describe()['mean'] < 0.4:
    print('oi')



oi


In [124]:
data[str(mins.index[0]) : str(mins.index[0] +datetime.timedelta(weeks=+4))]

Unnamed: 0_level_0,Weir compartment,Discharge(Q),Diff(Verschil),filtered diff,derivative order 1,min,max
Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2018-03-24 00:00:00+00:00,211M_211N,0.377254,-0.017163,0.029274,-0.002692,-0.002692,
2018-03-25 00:00:00+00:00,211M_211N,0.321404,-0.025177,0.024024,-0.002685,,
2018-03-26 00:00:00+00:00,211M_211N,0.283504,-0.029589,0.017648,-0.00253,,
2018-03-27 00:00:00+00:00,211M_211N,0.309358,-0.08659,0.010901,-0.002333,,
2018-03-28 00:00:00+00:00,211M_211N,0.292828,-0.048323,0.004582,-0.002149,,
2018-03-29 00:00:00+00:00,211M_211N,0.244055,-0.022315,-0.000959,-0.001948,,
2018-03-30 00:00:00+00:00,211M_211N,0.306883,-0.021146,-0.006131,-0.001758,,
2018-03-31 00:00:00+00:00,211M_211N,0.416181,-0.014531,-0.010991,-0.001577,,
2018-04-01 00:00:00+00:00,211M_211N,0.434054,-0.013052,-0.015463,-0.0014,,
2018-04-02 00:00:00+00:00,211M_211N,0.435618,-0.01235,-0.019582,-0.001225,,


In [54]:
plot_filtered(filter_data(df_leij, 101, 4, 1, default = True))

In [5]:
df_leij[df_leij.index == 0]

Unnamed: 0.1,Unnamed: 0,Time,Weir compartment,Discharge(Q),Diff(Verschil)
0,0,2018-01-02T00:00:00Z,211VEL_211N,0.101608,-0.026198
