In [22]:
import pandas as pd
import scipy.io as sio
import numpy as np
import datetime
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import re

def Array_To_DataFrame(matDF, varName) :
    npArray = np.array([])
    for i in range (matDF[varName].shape[0]) :
        npArray = np.concatenate((npArray, matDF[varName][i].flatten()))
    return npArray
def matFile_To_DF(matFile) :
    tempDF = sio.loadmat(matFile)
    matDataMidFiltered = {dictKey:dictValue for dictKey, dictValue in tempDF.items() if dictKey[0] != '_'}
    matMidDF = pd.DataFrame({dictKey: np.array(dictValue).flatten() for dictKey, dictValue in matDataMidFiltered.items()})
    return matMidDF

#import midestuary data
matDataMid = sio.loadmat('2012-13\\hobo_PrivateDock_01.mat')

#mat lab date origin
origin = np.datetime64('0000-01-01', 'D') - np.timedelta64(1, 'D')

matDataMidFiltered = {dictKey:dictValue for dictKey, dictValue in matDataMid.items() if dictKey[0] != '_'}
matMidDF = pd.DataFrame({dictKey: np.array(dictValue).flatten() for dictKey, dictValue in matDataMidFiltered.items()})

# create numpy array that contains time and other variables
matMidTimeArray = Array_To_DataFrame(matMidDF, 'DN')
matMidSurfaceSalinityArray = Array_To_DataFrame(matMidDF, 'SS')
matMidBottomSalinityArray = Array_To_DataFrame(matMidDF, 'SB')
matMidBottomPressureArray = Array_To_DataFrame(matMidDF, 'BP')
matMidBottomTemperatureArray = Array_To_DataFrame(matMidDF, 'TB')
matMidSurfaceTemperatureArray = Array_To_DataFrame(matMidDF, 'TS')


matMidFlatDF = pd.DataFrame({'DN' : matMidTimeArray.flatten(), 'SS' : matMidSurfaceSalinityArray.flatten(), 'SB' : matMidBottomSalinityArray.flatten(), 'BP' : matMidBottomPressureArray.flatten(), 'TB' : matMidBottomTemperatureArray.flatten(), 'TS' : matMidSurfaceTemperatureArray.flatten()})

preTimestamp = None
# five minutes consistently through the data set
timeStep = 0.00347222222

#checks if distance between the timestamps is 5 minutes, if not it adds five minute intervals until there is a five minute distance between each value.
matMidFlatDF = matMidFlatDF.sort_values(['DN'], ascending = [True])
for index, row in matMidFlatDF.iterrows():
    if preTimestamp is None:
        preTimestamp = row['DN']
    else:
        while (row['DN'] - preTimestamp) > (timeStep * 1.5):
            #print(row['DN'], preTimestamp, timeStep, row['DN'] - preTimestamp)
            preTimestamp += timeStep 
            dataMidDF = pd.DataFrame({'DN' : [preTimestamp], 'SS' : [np.nan], 'SB' : [np.nan], 'BP' : [np.nan], 'TB' : [np.nan], 'TS' : [np.nan]})
            matMidFlatDF = pd.concat([matMidFlatDF, dataMidDF])
        preTimestamp = row['DN']

matMidFlatDF = matMidFlatDF.sort_values(['DN'], ascending = [True])




#interpolate the data
matMidFlatDF.interpolate(method = 'linear', inplace = True)

#set new time style
tmsMidriver = matMidFlatDF['DN'] * np.timedelta64(1, 'D') + origin

#reading in flow data
flowDF = pd.read_csv('2012_2013alsea.csv')
flowDF['fRate'] = flowDF['max_va'] * 0.0283168
#create a new column for flow dates
for index, row in flowDF.iterrows():
    calcDate = pd.Timestamp(year=int(row['begin_yr']), month=int(row['month_nu']), day=int(row['day_nu']))
    flowDF.loc[index, 'DN'] = calcDate

matMidDF = pd.DataFrame({'DN' : tmsMidriver, 'SurfaceSalCorrected' : matMidFlatDF['SS'], 'BottomSalCorrected' : matMidFlatDF['SB'], 
    'SurfaceTemp' : matMidFlatDF['TS'], 'BottomTemp' : matMidFlatDF['TB'], 'BottomPressure' : matMidFlatDF['BP']})
matMidDF.to_csv('midestuary_2012_2013_processed.csv', index=False)
tmsFlow = flowDF['DN']

#create the subplots
tempFigLabels = ['Surface', 'Bottom']
tempfig = px.line(x = matMidFlatDF['DN'], y = [matMidFlatDF['TS'], matMidFlatDF['TB']], color_discrete_sequence= ["cyan", "black"], title = "Temperature")
for idx in range(len(tempFigLabels)):
    tempfig.data[idx].name = tempFigLabels[idx]
    tempfig.data[idx].hovertemplate = 'variable=' + tempFigLabels[idx] + '<br>x=%{x}<br>value=%{y}<extra></extra>'
    tempfig.data[idx].legendgroup = tempFigLabels[idx]
salfig = px.line(x = tmsMidriver, y = [matMidFlatDF['SS'], matMidFlatDF['SB']], color_discrete_sequence= ["cyan", "black"], title = "Salinity")
for idx in range(len(tempFigLabels)):
    salfig.data[idx].name = tempFigLabels[idx]
    salfig.data[idx].hovertemplate = 'variable=' + tempFigLabels[idx] + '<br>x=%{x}<br>value=%{y}<extra></extra>'
    salfig.data[idx].legendgroup = tempFigLabels[idx]
pressFigLabels = ['Bottom']
rofig = px.line(x = tmsMidriver, y = [matMidFlatDF['BP']], color_discrete_sequence= ["black"], title = "Pressure")
for idx in range(len(pressFigLabels)):
    rofig.data[idx].name = pressFigLabels[idx]
    rofig.data[idx].hovertemplate = 'variable=' + pressFigLabels[idx] + '<br>x=%{x}<br>value=%{y}<extra></extra>'
    rofig.data[idx].legendgroup = pressFigLabels[idx]
flowfig = px.line(x = tmsFlow, y = [flowDF['fRate']], color_discrete_sequence= ["green"], title = "Flow Rate")
flowFigLabel = ['Flow Rate']
for idx in range(len(flowFigLabel)):
    flowfig.data[idx].name = flowFigLabel[idx]
    flowfig.data[idx].hovertemplate = 'variable=' + flowFigLabel[idx] + '<br>x=%{x}<br>value=%{y}<extra></extra>'
    flowfig.data[idx].legendgroup = flowFigLabel[idx]

tempfig.update_layout(title=dict(text= "Temperature for 2012-2013", font=dict(size=25)))
tempfig.update_xaxes(tickangle=30)
tempfig.update_xaxes(rangeslider_visible=True)
tempfig.update_xaxes(range = [pd.Timestamp('2013-01-20'),pd.Timestamp('2013-05-26')])

salfig.update_layout(title=dict(text= "Salinity for 2012-2013", font=dict(size=25)))
salfig.update_xaxes(tickangle=30)
salfig.update_xaxes(rangeslider_visible=True)
salfig.update_xaxes(range = [pd.Timestamp('2013-01-20'),pd.Timestamp('2013-05-26')])

rofig.update_layout(title=dict(text= "Pressure for 2012-2013", font=dict(size=25)))
rofig.update_xaxes(tickangle=30)
rofig.update_xaxes(rangeslider_visible=True)
rofig.update_xaxes(range = [pd.Timestamp('2013-01-20'),pd.Timestamp('2013-05-26')])

flowfig.update_layout(title=dict(text= "Flow Rate for 2012-2013", font=dict(size=25)))
flowfig.update_xaxes(tickangle=30)
flowfig.update_xaxes(rangeslider_visible=True)
flowfig.update_xaxes(range = [pd.Timestamp('2013-01-20'),pd.Timestamp('2013-05-26')])

tempfig.update_layout(xaxis_title="Date", yaxis_title="Temperature (°C)", legend_title="Locations")
salfig.update_layout(xaxis_title="Date", yaxis_title="Salinity", legend_title="Locations")
rofig.update_layout(xaxis_title = "Date", yaxis_title = "Pressure", legend_title = "Locations")
flowfig.update_layout(xaxis_title="Date", yaxis_title="Cubic Meters per Second",legend_title="Locations")

tempfig.show()
salfig.show()
rofig.show() 
flowfig.show() 
