In [None]:
import math
import matplotlib.pyplot as plt
import keras
import pandas as pd
import numpy as np
import os
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Dropout
from keras.layers import *
from keras.callbacks import ReduceLROnPlateau
from math import sqrt
from sklearn.metrics import mean_squared_error 
from matplotlib import pyplot
from google.colab import drive
drive.mount('/content/drive')
#import LMPs
#df = pd.read_csv('/content/drive/MyDrive/CS229_Project/pacifica_oct_lmp.csv')
df = pd.read_csv('/content/drive/MyDrive/CS229_Project/bellehaven_2019.csv')
images_dir = '/content/drive/MyDrive/CS229_Project'

In [None]:
def train_test_split(df, split=0.8):
  train_test_split=.8
  num_train = round(train_test_split * df.shape[0])
  train=df.iloc[:num_train, 1].values
  test=df.iloc[num_train:, 1].values
  return train, test, num_train

def resize_scale(train):
  train.resize((len(train), 1)) 
  minmaxscale = MinMaxScaler(feature_range = (0, 1))
  scale_train = minmaxscale.fit_transform(train)
  return scale_train, minmaxscale

def data_shift(scale_train, num_train, sequence_length=40, shift=1):
  X_train = []
  for i in range(sequence_length, num_train):
    X_train.append(scale_train[i-(sequence_length-shift):i, 0])
  y_train = scale_train[sequence_length:num_train,0]
  X_train = np.array(X_train)
  X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
  return X_train, y_train

def LSTM_model(X_train):
  model = Sequential()
  # 3 layer LSTM with dropout regularization
  model.add(LSTM(units = 50, return_sequences = True, input_shape = (X_train.shape[1], 1)))
  model.add(Dropout(0.2)) 
  model.add(LSTM(units = 50, return_sequences = True))
  model.add(Dropout(0.2))
  model.add(LSTM(units = 50))
  model.add(Dropout(0.2))
  # make output layer
  model.add(Dense(units = 1))
  model.compile(optimizer = 'adam', loss = 'mean_squared_error')
  learning_rate = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=100)
  return model, learning_rate

def shift_test(df, num_train, minmaxscale, sequence_length=40, shift=1):
  y_train = df.iloc[:num_train, 1]
  y_test = df.iloc[num_train:, 1]
  data = pd.concat((y_train, y_test), axis = 0)
  test = data[len(data) - len(y_test) - sequence_length:].values
  test = test.reshape(-1,1)
  test = minmaxscale.transform(test)
  X_test = []
  for i in range(sequence_length, len(y_test)+sequence_length):
    X_test.append(test[i-(sequence_length-shift):i, 0]) 
  X_test = np.array(X_test)
  X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))
  return X_test, y_test

def inverse_predictions(X_test):
  predicted_price = model.predict(X_test)
  predicted_price = minmaxscale.inverse_transform(predicted_price)
  return predicted_price

def RMSE(actual, predicted):
  rmse = sqrt(mean_squared_error(actual, predicted))
  print('Test RMSE: %.3f' % rmse)
  return rmse

#plot LMP prediction
def plotLMP(y_test, predicted_price):
  pyplot.plot(y_test.values, label = 'Real Price')
  pyplot.plot(predicted_price, color='red', label = 'Predicted Price')
  plt.xlabel('Time [days]')
  plt.xticks(ticks=np.arange(0, len(predicted_price), 96), labels=[4*int(x) for x in np.arange(0, len(predicted_price), 24)/24])
  plt.ylabel('Price [$]')
  plt.legend()
  plt.savefig(f"{images_dir}/LSTM_LMP.png", dpi=500) 
  plt.show()

def loss_plot(model):
  plt.clf()
  plt.plot(model_params.history['loss'])
  plt.plot(model_params.history['val_loss'])
  plt.ylabel('Loss')
  plt.xlabel('Epoch')
  plt.legend(['Training Data', 'Validation Data'], loc='upper right')
  images_dir = '/content/drive/MyDrive/CS229_Project'
  plt.savefig(f"{images_dir}/LSTM_loss.png", dpi=500) 
  plt.show()

train, test, num_train = train_test_split(df)
scale_train, minmaxscale = resize_scale(train)
X_train, y_train = data_shift(scale_train, num_train)
epochs=100
batch_size=32
validation=.05
model, learning_rate = LSTM_model(X_train)
model_params=model.fit(X_train, y_train, batch_size, epochs, validation_split = validation, callbacks = [learning_rate])
X_test, y_test = shift_test(df, num_train, minmaxscale)
predicted_price = inverse_predictions(X_test)
rmse = RMSE(y_test.values, predicted_price)
plotLMP(y_test, predicted_price)
loss_plot(model)