In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import datetime
import numpy as np
from scipy.signal import argrelextrema
from scipy import signal

plt.rcParams['figure.figsize'] = (10, 8)

# Choose Time Interval

In [None]:
start_date = datetime.date(2017,9,1)
end_date = datetime.date(2018,3,1)

# choose to plot some data 
plot = False

# usage of self-made high/low water algorithm
extrema_algorithm = True

# resolution of the data
round_to = '30 min'

# create/use pickled data set 
create_pkl = False # pkl has to created only once
use_pickle_waterlevels = True

# Load Water level data

In [None]:
# create pkl file if needed
if create_pkl:
    
    # load water level data and save as pkl (saves loading time in future)
    path = "C:/Users/Marleen/Documents/thesis project/Data zaken/Data/Getij data/waterhoogte_Vlieland_1990_2018.csv"
    df_all_water_levels = pd.read_csv(path, delimiter=",")

    # set date time to pandas timestamp 
    df_all_water_levels['date_time'] = pd.to_datetime(df_all_water_levels['date_time'], dayfirst=True)

    # drop location and obstype and tijd, datum
    df_all_water_levels = df_all_water_levels[['date_time','waterheight']]

    # specify path and save
    file_name = "pkl_water_level_vlieland_1990_2018"
    path = 'C:/Users/Marleen/Documents/thesis project/Data zaken/Data/Getij data/'
    df_all_water_levels.to_pickle(path + file_name +".pkl")

# read pickled data set
if use_pickle_waterlevels:
    file_name = "pkl_water_level_vlieland_1990_2018"
    path = 'C:/Users/Marleen/Documents/thesis project/Data zaken/Data/Getij data/'
    df_all_water_levels = pd.read_pickle(path + file_name +".pkl")
    
# read original data set
else:
    
    # load the water level data
    path = "C:/Users/Marleen/Documents/thesis project/Data zaken/Data/Getij data/Waterhoogte Vlieland gemeten per 10 minuten.csv"
    df_all_water_levels = pd.read_csv(path, delimiter=";")

    # set date time to pandas timestamp 
    df_all_water_levels['date_time'] = pd.to_datetime(df_all_water_levels['date_time'])

    # drop location and obstype and tijd, datum
    df_all_water_levels = df_all_water_levels[['date_time','waterheight']]

# select dates 
df_water_levels = df_all_water_levels.loc[(df_all_water_levels['date_time'].dt.date >= start_date) & (df_all_water_levels['date_time'].dt.date < end_date)]

# set index and remove date_time column for now
df_water_levels.set_index(pd.to_datetime(df_water_levels["date_time"]), inplace=True)
del df_water_levels['date_time']

# add missing data with linear interpolation
idx = pd.date_range(min(df_water_levels.index), max(df_water_levels.index), freq='10min') # this should be 10 minutes always
df_water_levels = df_water_levels.reindex(index=idx, fill_value=np.nan)
df_water_levels.interpolate(method='linear', inplace=True)

# round the water levels to integers
df_water_levels.waterheight = df_water_levels.waterheight.round()

# only get necessary intervals
df_water_levels = df_water_levels.resample(round_to).first()

# re-order dataframe a bit
df_water_levels['date_time'] = df_water_levels.index
df_water_levels = df_water_levels.reset_index(drop=True)
df_water_levels = df_water_levels[['date_time', 'waterheight']]

df_water_levels.head()

# Find high/low tide time stamps

In [None]:
def remove_double_indices(indices):
    """ In case the arglextrema could not find the local/global maximum 
    with a suitable distance between the points, this function removes the double indices. 
    """
    
    # make sure the points are distanced enough
    indices_to_remove = []
    minimum_dist = 20

    # compare all values
    for i in range(len(indices)):
        for j in range(i + 1, len(indices)):
            a = indices[i] + minimum_dist
            if indices[j] < a:
                indices_to_remove.append(indices[j])

    # final set of values to remove
    indices_to_remove_final = list(set(indices_to_remove))

    # remove values from indices list
    indices = list(indices)
    for item in indices_to_remove_final:
        indices.remove(item)
    final_indices = indices
    return final_indices

if extrema_algorithm:
    
    # number of neigbours to compare, note: this is dependent on time step size 
    # also note: the outcome of the algorithm is very dependent on this value, so always check
    # the high/low tide intervals
    n = 10
    
    # find indices of max/min water levels in interval of n neighbours
    indices_high = argrelextrema(df_water_levels.waterheight.values, np.greater_equal, order=n)[0]
    indices_low = argrelextrema(df_water_levels.waterheight.values, np.less_equal, order=n)[0]
    
    # remove neigbouring indices due to flat extrema
    final_indices_high = remove_double_indices(indices_high)
    final_indices_low = remove_double_indices(indices_low)
    
    # add column with extrema
    df_water_levels_tides = df_water_levels
    df_water_levels_tides['extreem'] = np.nan
    df_water_levels_tides['extreem'].iloc[final_indices_high] = 'HW' 
    df_water_levels_tides['extreem'].iloc[final_indices_low] = 'LW'
    
else:
    
    # load the tide data
    path = "C:/Users/Marleen/Documents/thesis project/Data zaken/Data/Getij data/Tijden Hoogwater en Laagwater Vlieland vanaf 2016.csv"
    df_tide_times = pd.read_csv(path, delimiter=";")

    # remove unnessary columns
    df_tide_times = df_tide_times[['date_time', 'waterhoogte', 'extreem']]

    # set date time to pandas timestamp 
    df_tide_times['date_time'] = pd.to_datetime(df_tide_times['date_time'])

    # select dates 
    df_tide_times = df_tide_times.loc[(df_tide_times['date_time'].dt.date >= start_date) & (df_tide_times['date_time'].dt.date < end_date)]

    # transform data from europe time to UTC
    df_tide_times['date_time'] = df_tide_times['date_time'].dt.tz_localize('Europe/London').dt.tz_convert('UTC')

    # round timestamps to intervals of 10 minutes
    df_tide_times['date_time'] = df_tide_times['date_time'].dt.round(round_to) 
       
    # convert to same date time format as water levels data
    df_tide_times['date_time'] = df_tide_times['date_time'].values.astype('datetime64[ns]')

    # perform left join on date time
    df_water_levels_tides = pd.merge(df_water_levels, df_tide_times[['date_time','extreem']], how='left', on='date_time')

# make sure the data set starts and ends at high water
df_water_levels_tides = df_water_levels_tides.iloc[(df_water_levels_tides['extreem'] == 'HW').values.argmax():]
df_water_levels_tides = df_water_levels_tides.loc[:(df_water_levels_tides[df_water_levels_tides['extreem'] == 'HW']).last_valid_index()]

# create df with high/low water points 
df_high_water = df_water_levels_tides[df_water_levels_tides['extreem'] == 'HW']
df_low_water = df_water_levels_tides[df_water_levels_tides['extreem'] == 'LW']

# create a plot with water levels and high/low tide points
plt.plot(df_water_levels['date_time'], df_water_levels['waterheight'])
plt.plot(df_high_water.date_time, df_high_water.waterheight, 'ro')
plt.plot(df_low_water.date_time, df_low_water.waterheight, 'go')

df_water_levels_tides.head()

# Calculate the length of the tidal cycle and the length until low tide

For every tidal cycle we want to know how long the cycle lasts (from high tide to the next high tide) and also the time it takes before low tide is reached (from high tide to low tide).

The time between tidal cycles is calculated as the number of steps per tidal cycle.

In [None]:
# create extra column in df
df_water_levels_tides['time_steps_in_cycle'] = np.nan
df_water_levels_tides['time_steps_to_low_tide'] = np.nan

# get all indices with high water (easy now, see above code block)
high_water_indices = (df_water_levels_tides[df_water_levels_tides['extreem'] == 'HW']).index
low_water_indices =  (df_water_levels_tides[df_water_levels_tides['extreem'] == 'LW']).index

# number of time steps between high tides
num_time_steps_high_tides = np.diff(high_water_indices)

# calculate time steps till low tide 
print(len(low_water_indices), len(high_water_indices))
time_steps_to_low_tide = low_water_indices - high_water_indices[:-1]  

# couple number of time steps to high water time points (exclude the last one, since this point is the end of the simulation)
df_water_levels_tides.time_steps_in_cycle.loc[high_water_indices[:-1]] = num_time_steps_high_tides

# also add number of time steps to low tide for every high tide
df_water_levels_tides.time_steps_to_low_tide.loc[high_water_indices[:-1]] = time_steps_to_low_tide

# df_water_levels_tides[(df_water_levels_tides['extreem'] == 'HW') | (df_water_levels_tides['extreem'] == 'LW')]
df_water_levels_tides.head()

# check if intervals make sense
print(max(df_water_levels_tides.time_steps_in_cycle), min(df_water_levels_tides.time_steps_in_cycle))
print(max(df_water_levels_tides.time_steps_to_low_tide), min(df_water_levels_tides.time_steps_to_low_tide))

In [None]:
df_water_levels_tides[df_water_levels_tides.time_steps_in_cycle == 29]
df_water_levels_tides[df_water_levels_tides.time_steps_in_cycle == 28]
df_water_levels_tides[df_water_levels_tides.time_steps_to_low_tide == 9]

In [None]:
# quick plot to check if max and min tidal cycle lengths are correct
date = datetime.date(2016,11,20)
plt.plot(df_water_levels_tides.waterheight[df_water_levels_tides.date_time.dt.date == date])
plt.plot(df_high_water.waterheight[df_high_water.date_time.dt.date == date], 'ro')
plt.plot(df_low_water.waterheight[df_low_water.date_time.dt.date == date], 'go')

# Insert the reference weight data

The data has no date time yet, only the day within a year is mentioned. This day should be converted to a date time object and the data set should then be merged with the existing data set (and only on the high tide moments). 

Data comes from Zwart & Hulscher et al. (1996): Seasonal and Annual variation in body weight,...

(Let op! Dit gedeelte moet mogelijk aangepast worden als we andere jaren simuleren en als het een schrikkeljaar is)

In [None]:
# simulation years
start_year = start_date.year
end_year = end_date.year

# days in new year (this should be 31 + 29 als schrikkeljaar)
days_in_new_y = 31 + 28 # january + february

# load the data
path = "C:/Users/Marleen/Documents/thesis project/Data zaken/Streefgewicht Scholekster.csv"
df_ref_weight = pd.read_csv(path, delimiter=";")

# add all day numbers (should be 366 in case of schrikkeljaar)
new_df = pd.DataFrame()
new_df['day'] = range(1, 368)
df_ref_weight = pd.merge(new_df, df_ref_weight, how='left')

# interpolate to days
df_ref_weight = df_ref_weight.interpolate(method='linear') #todo: naar 12 uur itnervals interpoleren? half uur?

# last row is unnecessary (366 is the first day again)
df_ref_weight = df_ref_weight.iloc[:-2]

# add year to the data (right now for 2017/2018) 
df_ref_weight['year'] = np.where(df_ref_weight['day'] < days_in_new_y + 1, end_year, start_year)

# add date time to dataframe
df_ref_weight['date_time'] = pd.to_datetime(df_ref_weight['year'] * 1000 + df_ref_weight['day'], format='%Y%j')

# merge with rest of the data set
df_water_levels_tides_weight = pd.merge(df_water_levels_tides, df_ref_weight.weight, left_on=[df_water_levels_tides.date_time.dt.year, df_water_levels_tides.date_time.dt.month, df_water_levels_tides.date_time.dt.day],
        right_on=[df_ref_weight.date_time.dt.year, df_ref_weight.date_time.dt.month, df_ref_weight.date_time.dt.day])

# plot
if plot == True:
    plt.plot(df_water_levels_tides_weight.date_time, df_water_levels_tides_weight.weight)
    plt.title('Reference Weight')

# set non HW rows to np.nan (niet zo netjes zo)
df_water_levels_tides_weight.weight.loc[df_water_levels_tides_weight.extreem != "HW"] = np.nan

# remove key columns (kan netter)
df_water_levels_tides_weight = df_water_levels_tides_weight[['date_time', 'waterheight', 'extreem', 'time_steps_in_cycle', 'time_steps_to_low_tide', 'weight']]

# change name for convenience
df_final = df_water_levels_tides_weight

In [None]:
df_final[df_final.extreem=="HW"].iloc[230:240]

# Insert the temperature data

The temperature data should be coupled to the tidal cycles, ideally we choose the average daily temperature in which most of the cycle is located. Another possibility would be to simply get the temperature at the start of the cycle (implementation would be easier then).

For now: just take the temperature at the start of the cycle, if time is left we can change this. Note that this makes the data look a bit more "coarse".

In [None]:
# load the data
path = "C:/Users/Marleen/Documents/thesis project/Data zaken/Data/KNMI data/Weergegevens KNMI Vlieland.csv"
df_temperature = pd.read_csv(path, delimiter=",")

# only keep temperature and day/time columns
df_temperature = df_temperature[['YYYYMMDD', '    T']]

# change column names
df_temperature.columns = ['date', 'temperature']

# set date time to pandas timestamp 
df_temperature['date_time'] = pd.to_datetime(df_temperature['date'], format='%Y%m%d')
df_temperature = df_temperature[['date_time', 'temperature']]

# get part of the data we want
df_temperature = df_temperature[(df_temperature.date_time.dt.date >= start_date) & (df_temperature.date_time.dt.date < end_date)]

# convert to floats
df_temperature.temperature = df_temperature.temperature.astype(float)

# # get mean temperature per day
df_temperature_means = df_temperature.groupby('date_time').mean()
df_temperature_means['date_time'] = df_temperature_means.index 
df_temperature_means = df_temperature_means.reset_index(drop=True)

# change temperature to degrees celcius (instead of 0.1 degrees celcius)
df_temperature_means.temperature = df_temperature_means.temperature / 10

# couple with final df
df_final = pd.merge(df_final, df_temperature_means, 
                    left_on=[df_final.date_time.dt.year, df_final.date_time.dt.month, df_final.date_time.dt.day],
                    right_on=[df_temperature_means.date_time.dt.year, 
                              df_temperature_means.date_time.dt.month, 
                              df_temperature_means.date_time.dt.day])

# grab final columns (keys are in there now)
df_final = df_final[['date_time_x', 'waterheight', 'extreem', 'time_steps_in_cycle', 
                     'time_steps_to_low_tide', 'weight', 'temperature']]
df_final=df_final.rename(columns = {'date_time_x':'date_time'})

# plot to check if data is logical
if plot == True:
    plt.plot(df_final.date_time, df_final.temperature)

# set temperature of non HW rows to zero
df_final.temperature.loc[df_final.extreem != "HW"] = np.nan
df_final.head()

In [None]:
np.mean(df_final.time_steps_to_low_tide)

# Add Cockle Fresh Weight Change

In [None]:
# load the data
path = "C:/Users/Marleen/Documents/thesis project/Data zaken/Data/Voedsel data/Cockle_Fresh_Weight_Change.csv"
df_cockle_change = pd.read_csv(path, delimiter=";")

# add day of year
df_cockle_change['day_in_year'] = (df_cockle_change.Year_Time * 365 + 1) % 365

# round to day
df_cockle_change.day_in_year = df_cockle_change.day_in_year.round()

# divide wet weight change in 1 and 2 year cockle groups
df_cockle_change_1y = df_cockle_change[(df_cockle_change.Year_Time > 0.5) & (df_cockle_change.Year_Time < 1.5)]
df_cockle_change_2y = df_cockle_change[(df_cockle_change.Year_Time > 1.5) & (df_cockle_change.Year_Time < 2.5)]

# add correct years to dataframes
df_cockle_change_1y['year'] = np.where(df_cockle_change_1y['day_in_year'] < 200, end_date.year, start_date.year)
df_cockle_change_2y['year'] = np.where(df_cockle_change_2y['day_in_year'] < 200, end_date.year, start_date.year)

# create date time with day_in_year and year to date time
df_cockle_change_1y['date_time'] = pd.to_datetime(df_cockle_change_1y['year'] * 1000 + df_cockle_change_1y['day_in_year'], 
                                                  format='%Y%j')
df_cockle_change_2y['date_time'] = pd.to_datetime(df_cockle_change_2y['year'] * 1000 + df_cockle_change_2y['day_in_year'], 
                                                  format='%Y%j')

# rename growth columns
df_cockle_change_1y.rename(columns = {'Growth':'1y_fw_cockle_growth'}, inplace=True)
df_cockle_change_2y.rename(columns = {'Growth':'2y_fw_cockle_growth'}, inplace=True)

# create intervals and interpolate
new_df = pd.DataFrame()
new_df['date_time'] = pd.date_range(start=df_cockle_change_1y.date_time.iloc[0], end=df_cockle_change_1y.date_time.iloc[-1],
                                   freq='min')
df_cockles = pd.merge(new_df, df_cockle_change_1y[['date_time','1y_fw_cockle_growth']], how='left')
df_cockles = pd.merge(df_cockles, df_cockle_change_2y[['date_time','2y_fw_cockle_growth']], how='left')

# interpolate growth rates
df_cockles['1y_fw_cockle_growth'] = df_cockles['1y_fw_cockle_growth'].interpolate(method='linear')
df_cockles['2y_fw_cockle_growth'] = df_cockles['2y_fw_cockle_growth'].interpolate(method='linear')

# add new column to new df
# df_final['1y_cockle_growth'] = np.nan
# df_final['2y_cockle_growth'] = np.nan

# merge with data 
df_final = pd.merge(df_final, df_cockles[['date_time','1y_fw_cockle_growth', '2y_fw_cockle_growth']], 
                    how='left', on='date_time')

# set growth on non HW points to nan
df_final['1y_fw_cockle_growth'].loc[df_final.extreem != 'HW'] = np.nan
df_final['2y_fw_cockle_growth'].loc[df_final.extreem != 'HW'] = np.nan

# convert cockle growth/year to cockle growth per cycle
if round_to == '30 min':
    conversion_year_30min = 365 * 48 
    df_final['1y_fw_cockle_growth'] = (df_final['1y_fw_cockle_growth'] / conversion_year_30min) * df_final.time_steps_in_cycle
    df_final['2y_fw_cockle_growth'] = (df_final['2y_fw_cockle_growth'] / conversion_year_30min) * df_final.time_steps_in_cycle
    
# formula in model then is start_weight += (start_weight * (growth))

# Add wet weight change for cockles

As for the fresh weight, we again have a relative growth rate. Note that the mj cockles follow the same change in wtw as the 2y cockles. 

In [None]:
# function for relative growth and afdw from webtics & klepper1989
def relative_growth_wtw_cockles(t):
    afdw = 0.0136285 * np.exp(((5.13146 * t) / (1.28867 + t)) + 0.59952 * np.sin(3.05164 + 2 * np.pi * t))
    rel_g_r =  ((6.61274  / ((1.28867 + t) ** 2)) + 3.76689 * np.cos(3.05164 + 2 * np.pi * t))
    return afdw, rel_g_r

# create data with relative growth for every day 
time_since_birth = np.arange(0, 2.5, 1/365)
rel_gr = [relative_growth_wtw_cockles(t)[1] for t in time_since_birth]
new_df = pd.DataFrame()
new_df['Year_Time'] = time_since_birth
new_df['Growth_Rate_Year'] = rel_gr
new_df['day_in_year'] = (new_df['Year_Time'] * 365 + 1) % 365

# (df_cockle_change.Year_Time * 365 + 1) % 365

# divide wet weight change in 1 and 2 year cockle groups
df_cockle_wtw_change_1y = new_df[(new_df.Year_Time > 0.5) & (new_df.Year_Time < 1.5)]
df_cockle_wtw_change_2y = new_df[(new_df.Year_Time > 1.5) & (new_df.Year_Time < 2.5)]
# print(df_cockle_wtw_change_1y)
# add correct years to dataframes
df_cockle_wtw_change_1y['year'] = np.where((df_cockle_wtw_change_1y['day_in_year'] < 184), end_date.year, start_date.year)
df_cockle_wtw_change_2y['year'] = np.where(df_cockle_wtw_change_2y['day_in_year'] < 184, end_date.year, start_date.year)

# new_df

# create date time with day_in_year and year to date time
df_cockle_wtw_change_1y['date_time'] = pd.to_datetime(df_cockle_wtw_change_1y['year'] * 1000 + 
                                                      df_cockle_wtw_change_1y['day_in_year'] + 1, format='%Y%j',
                                                     errors='coerce')
df_cockle_wtw_change_2y['date_time'] = pd.to_datetime(df_cockle_wtw_change_2y['year'] * 1000 + 
                                                      df_cockle_wtw_change_2y['day_in_year'] + 1, format='%Y%j',
                                                     errors='coerce')
# rename growth columns
df_cockle_wtw_change_1y.rename(columns = {'Growth_Rate_Year':'1y_wtw_cockle_growth'}, inplace=True)
df_cockle_wtw_change_2y.rename(columns = {'Growth_Rate_Year':'2y_wtw_cockle_growth'}, inplace=True)

# create intervals and interpolate
new_df = pd.DataFrame()
new_df['date_time'] = pd.date_range(start=df_cockle_wtw_change_1y.date_time.iloc[0], 
                                    end=df_cockle_wtw_change_1y.date_time.iloc[-1], freq='min')
df_wtw_cockles = pd.merge(new_df, df_cockle_wtw_change_1y[['date_time','1y_wtw_cockle_growth']], how='left')
df_wtw_cockles = pd.merge(df_wtw_cockles, df_cockle_wtw_change_2y[['date_time','2y_wtw_cockle_growth']], how='left')

# interpolate growth rates
df_wtw_cockles['1y_wtw_cockle_growth'] = df_wtw_cockles['1y_wtw_cockle_growth'].interpolate(method='linear')
df_wtw_cockles['2y_wtw_cockle_growth'] = df_wtw_cockles['2y_wtw_cockle_growth'].interpolate(method='linear')

# # add new column to new df
# df_final['1y_wtw_cockle_growth'] = np.nan
# df_final['2y_wtw_cockle_growth'] = np.nan

# merge with data 
df_final = pd.merge(df_final, df_wtw_cockles[['date_time','1y_wtw_cockle_growth', '2y_wtw_cockle_growth']], 
                    how='left', on='date_time')

# set growth on non HW points to nan
df_final['1y_wtw_cockle_growth'].loc[df_final.extreem != 'HW'] = np.nan
df_final['2y_wtw_cockle_growth'].loc[df_final.extreem != 'HW'] = np.nan

# convert wtw cockle growth/year to cockle growth per cycle
if round_to == '30 min':
    conversion_year_30min = 365 * 48 
    df_final['1y_wtw_cockle_growth'] = (df_final['1y_wtw_cockle_growth'] / conversion_year_30min) * df_final.time_steps_in_cycle
    df_final['2y_wtw_cockle_growth'] = (df_final['2y_wtw_cockle_growth'] / conversion_year_30min) * df_final.time_steps_in_cycle

# Add change in density Macoma Balthica

# Save the dataframe 

The dataframe is pickled, using the to_pickle functionality of python. Pickle saves the dataframe in it's current state thus the data and its format is preserved (this is not the case if we save the df to .csv format).

In [None]:
# give file a name that indicates the time interval of the data
file_name = "{}_{}_{}_to_{}_{}_{}".format(start_date.year, start_date.month, start_date.day, 
                                          end_date.year, end_date.month, end_date.day)

# specify path
path = 'C:/Users/Marleen/Documents/thesis project/oystercatcher-model/Input data/'

# save!
df_final.to_pickle(path + file_name +".pkl")

In [None]:
# # for 2018, in February and January some points are missing #### THIS PART IS REPLACED BY LINEAR INTERPOLATION ### 
# if (start_date.year == 2017) & (end_date.year == 2018):

#     date_time_str = '2018-02-08 14:00:00.00000'  
#     date_time_obj = datetime.datetime.strptime(date_time_str, '%Y-%m-%d %H:%M:%S.%f')
#     df_water_levels = df_water_levels.append({'date_time' : date_time_obj, 'waterheight' : 43}, ignore_index=True)
#     df_water_levels = df_water_levels.sort_values(by='date_time')
    
#     date_time_str = '2018-01-16 08:30:00.00000'  
#     date_time_obj = datetime.datetime.strptime(date_time_str, '%Y-%m-%d %H:%M:%S.%f')
#     df_water_levels = df_water_levels.append({'date_time' : date_time_obj, 'waterheight' : 160}, ignore_index=True)
#     df_water_levels = df_water_levels.sort_values(by='date_time')
    
#     date_time_str = '2018-02-06 19:00:00.00000'  
#     date_time_obj = datetime.datetime.strptime(date_time_str, '%Y-%m-%d %H:%M:%S.%f')
#     df_water_levels = df_water_levels.append({'date_time' : date_time_obj, 'waterheight' : -123}, ignore_index=True)
#     df_water_levels = df_water_levels.sort_values(by='date_time')

# # convert to same date time format as water levels data
# df_tide_times['date_time'] = df_tide_times['date_time'].values.astype('datetime64[ns]')

# # perform left join on date time
# df_water_levels_tides = pd.merge(df_water_levels, df_tide_times[['date_time','extreem']], how='left', on='date_time')

# # make sure the data set starts and ends at high water
# df_water_levels_tides = df_water_levels_tides.iloc[(df_water_levels_tides['extreem'] == 'HW').values.argmax():]
# df_water_levels_tides = df_water_levels_tides.loc[:(df_water_levels_tides[df_water_levels_tides['extreem'] == 'HW']).last_valid_index()]

# # create df with high/low water points 
# df_high_water = df_water_levels_tides[df_water_levels_tides['extreem'] == 'HW']
# df_low_water = df_water_levels_tides[df_water_levels_tides['extreem'] == 'LW']
  

# # create a plot with water levels and high/low tide points
# plt.plot(df_water_levels['date_time'], df_water_levels['waterheight'])
# plt.plot(df_high_water.date_time, df_high_water.waterheight, 'ro')
# plt.plot(df_low_water.date_time, df_low_water.waterheight, 'go')