In [12]:
import tensorflow as tf
import numpy as np
import os
import pandas as pd

current_directory = os.listdir()
match = [match for match in current_directory if 'for_modeling' in match]
match = match[0]


df = pd.read_csv(match, index_col=0)

In [13]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 99178 entries, 0 to 99177
Data columns (total 14 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Date               99178 non-null  object 
 1   LittleFallsGage    99174 non-null  float64
 2   01646500-00060     99174 non-null  float64
 3   SenecaGage         99178 non-null  float64
 4   01645000-00060     99178 non-null  float64
 5   PointofRocksGage   99178 non-null  float64
 6   01638500-00060     99178 non-null  int64  
 7   EdwardsGage        99178 non-null  float64
 8   ShhepardstownGage  99177 non-null  float64
 9   01618000-00060     99177 non-null  float64
 10  HancockGage        99178 non-null  float64
 11  01613000-00060     99178 non-null  int64  
 12  SpringfieldGage    99178 non-null  float64
 13  01608500-00060     99178 non-null  int64  
dtypes: float64(10), int64(3), object(1)
memory usage: 11.4+ MB


In [14]:
# lag variables
# positive lags shift obs down, so they push past, old obs down so they are next to current obs
df['PointofRocksGage'] = df['PointofRocksGage'].shift(16)
df['EdwardsGage'] = df['EdwardsGage'].shift(12)
df['ShhepardstownGage'] = df['ShhepardstownGage'].shift(32)
df['HancockGage'] = df['HancockGage'].shift(60)
df['SpringfieldGage'] = df['SpringfieldGage'].shift(68)

# to add more lagged fetures for 17hr prediction
df['SpringfieldGage_extrashift1'] = df['SpringfieldGage'].shift(1)
df['SpringfieldGage_extrashift2'] = df['SpringfieldGage'].shift(2)
df['SpringfieldGage_extrashift3'] = df['SpringfieldGage'].shift(3)

df.drop(df.columns[[2,4,6,9,11,13]], axis=1, inplace=True)

In [15]:
df = df.dropna()

In [16]:
df.head(20)

Unnamed: 0,Date,LittleFallsGage,SenecaGage,PointofRocksGage,EdwardsGage,ShhepardstownGage,HancockGage,SpringfieldGage,SpringfieldGage_extrashift1,SpringfieldGage_extrashift2,SpringfieldGage_extrashift3
71,2022-04-13 07:00:00,4.99,2.35,5.26,7.88,5.85,7.45,3.96,3.97,3.97,3.98
72,2022-04-13 07:15:00,4.98,2.35,5.26,7.87,5.83,7.45,3.96,3.96,3.97,3.97
73,2022-04-13 07:30:00,4.97,2.35,5.25,7.87,5.82,7.44,3.96,3.96,3.96,3.97
74,2022-04-13 07:45:00,4.96,2.34,5.26,7.86,5.81,7.43,3.95,3.96,3.96,3.96
75,2022-04-13 08:00:00,4.97,2.34,5.25,7.85,5.81,7.43,3.95,3.95,3.96,3.96
76,2022-04-13 08:15:00,4.96,2.34,5.25,7.84,5.81,7.42,3.95,3.95,3.95,3.96
77,2022-04-13 08:30:00,4.96,2.34,5.24,7.84,5.79,7.41,3.94,3.95,3.95,3.95
78,2022-04-13 08:45:00,4.95,2.34,5.24,7.83,5.8,7.4,3.94,3.94,3.95,3.95
79,2022-04-13 09:00:00,4.95,2.34,5.24,7.82,5.78,7.4,3.93,3.94,3.94,3.95
80,2022-04-13 09:15:00,4.95,2.33,5.23,7.82,5.78,7.4,3.93,3.93,3.94,3.94


In [17]:
df_train = df.sample(frac=0.8,random_state=200)
df_test = df.drop(df_train.index)

In [18]:
Y_train = df_train['LittleFallsGage']
X_train = df_train.drop(columns=['Date', 'LittleFallsGage'])

In [19]:
Y_test = df_test['LittleFallsGage']
X_test = df_test.drop(columns=['Date', 'LittleFallsGage'])

In [20]:
# need to break them out by seperate hourly predictions
#X_train_17hrs = X_train['SpringfieldGage']

In [87]:
X_train_17hrs = X_train[['SpringfieldGage','SpringfieldGage_extrashift1','SpringfieldGage_extrashift2','SpringfieldGage_extrashift3']].copy()

X_test_17hrs = X_test[['SpringfieldGage','SpringfieldGage_extrashift1','SpringfieldGage_extrashift2','SpringfieldGage_extrashift3']].copy()

In [100]:
#del(model_17hrs)
model_17hrs = tf.keras.Sequential([
    tf.keras.layers.Dense(8, activation = 'tanh', kernel_regularizer='l2', input_shape = (4,)),  # input shape required
    tf.keras.layers.Dense(10, activation = 'tanh', kernel_regularizer='l2'),
    tf.keras.layers.Dense(3, activation = 'tanh', kernel_regularizer='l2'),
    tf.keras.layers.Dense(1, kernel_regularizer='l2')
])

model_17hrs.compile(optimizer = 'adam', loss = 'mean_squared_error', metrics = 'mean_squared_error')

In [111]:
# Train
model_17hrs.fit(X_train_17hrs , Y_train, epochs=50, verbose=0)

<keras.callbacks.History at 0x7fe126e99c70>

In [103]:
pred = model_17hrs.predict(X_test_17hrs)

In [104]:
compare_df = pd.DataFrame(pred, columns=['Prediction'])
compare_df['TrueValue'] = Y_test.values
# used values bcuz...
# the index from the Y_test index is used to map it onto the new compare_df
# but the compare_df got its index form the prediction so it starts at 0 unlike Y_test
# so it only matches up rows correctly when the indexes match, which is few

In [110]:
compare_df.iloc[500:700, :]

Unnamed: 0,Prediction,TrueValue
500,6.829505,9.73
501,6.824359,9.59
502,6.823647,9.54
503,6.821461,9.49
504,6.818334,9.36
505,6.815613,9.32
506,6.81269,9.25
507,6.807369,9.14
508,6.805057,9.06
509,6.801297,8.97


In [None]:
# not good enough
# need to add more features, like past 3 obs. 

In [None]:
# Save the weights
model.save_weights('./checkpoints/my_checkpoint')

# Create a new model instance
model = create_model()

# Restore the weights
model.load_weights('./checkpoints/my_checkpoint')

# Evaluate the model
loss, acc = model.evaluate(test_images, test_labels, verbose=2)
print("Restored model, accuracy: {:5.2f}%".format(100 * acc))

In [109]:
pd.set_option('display.max_rows', 500)

In [86]:
X_train_17hrs

Unnamed: 0,SpringfieldGage,SpringfieldGage_extrashift1,SpringfieldGage_extrashift2,SpringfieldGage_extrashift3
90402,2.39,2.39,2.40,2.40
90952,2.07,2.07,2.07,2.07
96498,2.57,2.57,2.57,2.57
97242,2.33,2.33,2.33,2.33
76043,1.24,1.24,1.24,1.24
...,...,...,...,...
33857,1.24,1.24,1.25,1.25
82906,1.24,1.24,1.24,1.24
96780,2.25,2.25,2.25,2.25
94208,2.38,2.37,2.37,2.36
