In [23]:
import pandas as pd
import scipy.io as sio
import numpy as np
import datetime
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import re

def Array_To_DataFrame(matDF, varName) :
    npArray = np.array([])
    for i in range (matDF[varName].shape[0]) :
        npArray = np.concatenate((npArray, matDF[varName][i].flatten()))
    return npArray

def matFile_To_DF(matFile) :
    tempDF = sio.loadmat(matFile)
    matDataMidFiltered = {dictKey:dictValue for dictKey, dictValue in tempDF.items() if dictKey[0] != '_'}
    matMidDF = pd.DataFrame({dictKey: np.array(dictValue).flatten() for dictKey, dictValue in matDataMidFiltered.items()})
    return matMidDF

# Finding percentiles
def intervalPercentile(tmsArray, valArray, timeInstanceIdx) :
    # Calculates 97.5 percentile for valArray based on 24 hours before the midnight of timeInstance and 24 hours after midnight of timeInstance
    lowIdx = max(0, timeInstanceIdx - 288)
    highIdx = min(len(tmsArray) - 1, timeInstanceIdx + 288)
    pctArray = valArray[lowIdx : highIdx]
    if np.isnan(pctArray).all():
        return 1.0
    else:
        return (np.nanpercentile(pctArray, 99))

# Dinding scaling factor
def scalingFactor(tmsArray, timeInstance, surfaceArray, bottomArray):
    # To comment!!!!
    surfacePercentile = intervalPercentile(tmsArray, surfaceArray, timeInstance)
    bottomPercentile = intervalPercentile(tmsArray, bottomArray, timeInstance)
    return (bottomPercentile/surfacePercentile)


#import downriver bottom data
matDataMini2 = sio.loadmat('2012-13\\hobo_minilander_3826_Dep2.mat') 
matDataMini3 = sio.loadmat('2012-13\\hobo_minilander_3829_Dep3.mat')

#mat lab date origin
origin = np.datetime64('0000-01-01', 'D') - np.timedelta64(1, 'D')

matDataMini2Filtered = {dictKey:dictValue for dictKey, dictValue in matDataMini2.items() if dictKey[0] != '_'}
matMini2DF = pd.DataFrame({dictKey: np.array(dictValue).flatten() for dictKey, dictValue in matDataMini2Filtered.items()})
matDataMini3Filtered = {dictKey:dictValue for dictKey, dictValue in matDataMini3.items() if dictKey[0] != '_'}
matMini3DF = pd.DataFrame({dictKey: np.array(dictValue).flatten() for dictKey, dictValue in matDataMini3Filtered.items()})

matMiniDF = pd.concat([matMini2DF, matMini3DF]).reset_index(drop = True)
matMiniDF = matMiniDF.sort_values(['DN'], ascending = [True])

masterTimeDF = np.linspace(matMiniDF['DN'][0], matMiniDF['DN'][len(matMiniDF['DN']) - 1], 33386)
minilanderSalinity = np.interp(masterTimeDF, matMiniDF['DN'], matMiniDF['S'], left = np.nan, right = np.nan)
minilanderTemperature = np.interp(masterTimeDF, matMiniDF['DN'], matMiniDF['T'], left = np.nan, right = np.nan)

tmsMiniriver = (masterTimeDF * np.timedelta64(24*3600000, 'ms') + origin + np.timedelta64(500, 'ms')).astype('datetime64[s]')
matMiniDF = pd.DataFrame({'DN' : tmsMiniriver, 'S' : minilanderSalinity, 'T' : minilanderTemperature})

downRiverDF = pd.read_csv('downriver_2012_2013_processed.csv', parse_dates = ['DN'])
downRiverDF = downRiverDF.sort_values(['DN'], ascending = [True])

#reading in flow data
flowDF = pd.read_csv('2012_2013alsea.csv')
flowDF['fRate'] = flowDF['max_va'] * 0.0283168
#create a new column for flow dates
for index, row in flowDF.iterrows():
    calcDate = pd.Timestamp(year=int(row['begin_yr']), month=int(row['month_nu']), day=int(row['day_nu']))
    flowDF.loc[index, 'DN'] = calcDate

# First interval : Jan 25 to Feb 18
avgMiniSal = np.array(matMiniDF['S'].to_numpy(), copy = True)
avgDownBottomSal = np.array(downRiverDF['BottomSalCorrected'].to_numpy(), copy = True)
for idx in range(len(tmsMiniriver)):
    if not ((tmsMiniriver[idx] > pd.Timestamp('2013-01-25') and tmsMiniriver[idx] < pd.Timestamp('2013-02-18'))):
        avgMiniSal[idx] = np.nan
for idx in range(len(downRiverDF['DN'])):
    if not ((downRiverDF['DN'][idx] > pd.Timestamp('2013-01-25') and downRiverDF['DN'][idx] < pd.Timestamp('2013-02-18'))):
        avgDownBottomSal[idx] = np.nan
scaleFactor1 = np.nanmax(avgDownBottomSal)/np.nanmax(avgMiniSal)

# Third interval - March 21 to March 28
avgMiniSal = np.array(matMiniDF['S'].to_numpy(), copy = True)
avgDownBottomSal = np.array(downRiverDF['BottomSalCorrected'].to_numpy(), copy = True)
for idx in range(len(tmsMiniriver)):
    if not ((tmsMiniriver[idx] > pd.Timestamp('2013-03-21') and tmsMiniriver[idx] < pd.Timestamp('2013-03-28'))):
        avgMiniSal[idx] = np.nan
for idx in range(len(downRiverDF['DN'])):
    if not ((downRiverDF['DN'][idx] > pd.Timestamp('2013-03-21') and downRiverDF['DN'][idx] < pd.Timestamp('2013-03-28'))):
        avgDownBottomSal[idx] = np.nan
scaleFactor3 = np.nanmax(avgDownBottomSal)/np.nanmax(avgMiniSal)

for idx in range(len(tmsMiniriver)):
    if (tmsMiniriver[idx] > pd.Timestamp('2013-01-25') and tmsMiniriver[idx] < pd.Timestamp('2013-02-18')):
        matMiniDF.loc[idx, 'S'] = matMiniDF['S'][idx] * scaleFactor1
    elif (tmsMiniriver[idx] > pd.Timestamp('2013-03-21') and tmsMiniriver[idx] < pd.Timestamp('2013-05-21')):
        matMiniDF.loc[idx, 'S'] = matMiniDF['S'][idx] * scaleFactor3
    elif (tmsMiniriver[idx] > pd.Timestamp('2013-02-18') and tmsMiniriver[idx] < pd.Timestamp('2013-03-21')):
        # Roving scaling
        scaleFactor = scalingFactor(tmsMiniriver, idx, matMiniDF['S'], downRiverDF['SurfaceSalCorrected'])
        matMiniDF.loc[idx, 'S'] = matMiniDF['S'][idx] * scaleFactor
    else:
        matMiniDF.loc[idx, 'S'] = np.nan

# Write out CSV
timedMiniDF = pd.DataFrame({'DN' : tmsMiniriver, 'Salinity' : matMiniDF['S'], 'Temperature' : matMiniDF['T']})
timedMiniDF.to_csv('minilander_2012_2013_processed.csv', index=False)

tmsFlow = flowDF['DN']
tempFigLabels = ['Miniriver']
tempfig = px.line(x = tmsMiniriver, y = [matMiniDF['T']], color_discrete_sequence= ["purple"], title = "Mini")
for idx in range(len(tempFigLabels)):
    tempfig.data[idx].name = tempFigLabels[idx]
    tempfig.data[idx].hovertemplate = 'variable=' + tempFigLabels[idx] + '<br>x=%{x}<br>value=%{y}<extra></extra>'
    tempfig.data[idx].legendgroup = tempFigLabels[idx]
salfig = px.line(x= tmsMiniriver, y = [matMiniDF['S']], color_discrete_sequence = ["purple"], title = "Mini")
for idx in range(len(tempFigLabels)):
    salfig.data[idx].name = tempFigLabels[idx]
    salfig.data[idx].hovertemplate = 'variable=' + tempFigLabels[idx] + '<br>x=%{x}<br>value=%{y}<extra></extra>'
    salfig.data[idx].legendgroup = tempFigLabels[idx]
flowFigLabel = ['Flow Rate']
flowfig = px.line(x = tmsFlow, y = [flowDF['fRate']], color_discrete_sequence= ["green"], title = "Flow Rate")
for idx in range(len(flowFigLabel)):
    flowfig.data[idx].name = flowFigLabel[idx]
    flowfig.data[idx].hovertemplate = 'variable=' + flowFigLabel[idx] + '<br>x=%{x}<br>value=%{y}<extra></extra>'
    flowfig.data[idx].legendgroup = flowFigLabel[idx]

tempfig.update_layout(title=dict(text= "Bottom Temperature for 2012-2013", font=dict(size=25)))
tempfig.update_xaxes(tickangle=30)
tempfig.update_xaxes(rangeslider_visible=True)
tempfig.update_xaxes(range = [pd.Timestamp(preTimeStamp * np.timedelta64(1, 'D') + origin),pd.Timestamp('2013-05-25')])

salfig.update_layout(title=dict(text= "Bottom Salinity for 2012-2013", font=dict(size=25)))
salfig.update_xaxes(tickangle=30)
salfig.update_xaxes(rangeslider_visible=True)
salfig.update_xaxes(range = [pd.Timestamp(preTimeStamp * np.timedelta64(1, 'D') + origin),pd.Timestamp('2013-05-25')])

flowfig.update_layout(title=dict(text= "Flow Rate for 2012-2013", font=dict(size=25)))
flowfig.update_xaxes(tickangle=30)
flowfig.update_xaxes(rangeslider_visible=True)
flowfig.update_xaxes(range = [pd.Timestamp(preTimeStamp * np.timedelta64(1, 'D') + origin),pd.Timestamp('2013-05-25')])

tempfig.update_layout(xaxis_title="Date", yaxis_title="Temperature (°C)", legend_title="Locations")
salfig.update_layout(xaxis_title="Date", yaxis_title="Salinity", legend_title="Locations")
flowfig.update_layout(xaxis_title="Date", yaxis_title="Cubic Meters per Second",legend_title="Locations")

tempfig.show()
salfig.show()
flowfig.show()

