In [17]:
# lstm for time series forecasting
from numpy import sqrt
from numpy import asarray
from pandas import read_csv
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import LSTM
import time
from datetime import datetime
from tensorflow import math
from tensorflow import reduce_mean
import tensorflow as tf
import sys
from matplotlib import pyplot as plt
import pandas as pd
import numpy as np

In [18]:
# split a univariate sequence into samples
def split_sequence(sequence, n_steps):
    X, y = list(), list()
    for i in range(len(sequence)):
    # find the end of this pattern
        end_ix = i + n_steps
        # check if we are beyond the sequence
        if end_ix > len(sequence)-1:
            break
        # gather input and output parts of the pattern
        seq_x, seq_y = sequence[i:end_ix], sequence[end_ix]
        X.append(seq_x)
        y.append(seq_y)
    print('X: \n', X[0:5], '\n Y: \n', y[0:5])
    return asarray(X), asarray(y)

In [19]:
# Need new function to evaluate RMSE based on just 'Percent Output'
# Current state: we have input tensors of shape (None [batch size], 21). Need to find a way to turn that into an "array of losses"
# of shape (None, 21). Then return that.
# Current step: figure out how to index through a tensor. Need to only focus on solar output
def custom_loss(y_actual, y_pred):
    SE_Tensor = math.square(y_pred - y_actual)  #squared difference
    MSE = reduce_mean(SE_Tensor, axis=0)
    #RMSE = tf.math.sqrt(MSE)
    
    Zeros = tf.zeros_like(MSE) #create tensor of zeros
    Mask = [False, False, False, False, False, False, False, False, False, False,
            False, False, False, False, False, False, True] #create mask
    Solar_MSE = tf.where(Mask, MSE, Zeros) #create tensor where every loss is 0 except solar output
    
    #print_output = tf.print(Solar_MSE, "Solar_MSE: ")
    
    return Solar_MSE

In [20]:
# Need to make a custom evaluation metric as well as loss function
# RMSE should be between 0 and 1. Giving RMSEs of 20+ and not dropping below 1. Something is wrong here.
def custom_eval(y_actual, y_pred):
    SE_Tensor = math.square(y_pred - y_actual)  #squared difference
    MSE = reduce_mean(SE_Tensor, axis=0)
    
    Zeros = tf.zeros_like(MSE) #create tensor of zeros
    Mask = [False, False, False, False, False, False, False, False, False, False,
            False, False, False, False, False, False, True] #create mask
    Solar_MSE = tf.where(Mask, MSE, Zeros) #create tensor where every loss is 0 except solar output
    
    Solar_RMSE = tf.math.sqrt(Solar_MSE)
    
    #print_output = tf.print(Solar_RMSE, "Solar_RMSE: ")
    
    return Solar_RMSE

In [21]:
# load the dataset
df = read_csv("Aggregated 2018 Compiled Weather Data.csv", index_col=0)

In [22]:
df.shape

(7789, 17)

In [23]:
# Create holdout set
PREDICTION_SET_SIZE = 580
N_STEPS = 5

df_holdout = df.iloc[-PREDICTION_SET_SIZE:]
df = df.iloc[:-PREDICTION_SET_SIZE]

holdout_X, holdout_y = split_sequence(df_holdout.values.astype('float32'), N_STEPS)

X: 
 [array([[4.9099998e+01, 4.9099998e+01, 3.8099998e+01, 6.5820000e+01,
        0.0000000e+00, 1.8000000e+00, 8.1000000e+01, 1.0299000e+03,
        2.0100000e+01, 9.8999996e+00, 2.4000000e+02, 8.9999998e-01,
        2.0000000e+00, 1.2000000e+01, 1.1000000e+01, 1.0000000e+01,
        3.7179804e-01],
       [5.5700001e+01, 5.5700001e+01, 3.8099998e+01, 5.1529999e+01,
        0.0000000e+00, 8.8999996e+00, 1.8600000e+02, 1.0292000e+03,
        2.0100000e+01, 9.8999996e+00, 2.8300000e+02, 1.0000000e+00,
        3.0000000e+00, 1.2000000e+01, 1.1000000e+01, 1.1000000e+01,
        4.8983470e-01],
       [5.9700001e+01, 5.9700001e+01, 3.5200001e+01, 3.9720001e+01,
        0.0000000e+00, 1.6500000e+01, 1.8400000e+02, 1.0279000e+03,
        1.1300000e+01, 9.8999996e+00, 3.6500000e+02, 1.3000000e+00,
        4.0000000e+00, 1.2000000e+01, 1.1000000e+01, 1.2000000e+01,
        4.8901382e-01],
       [5.8500000e+01, 5.8500000e+01, 3.5700001e+01, 4.2250000e+01,
        0.0000000e+00, 1.0000000e+01, 

In [24]:
# retrieve the values
values = df.values.astype('float32')
# specify the window size
n_steps = 5
# split into samples
X, y = split_sequence(values, n_steps)
# reshape into [samples, timesteps, features]
X = X.reshape((X.shape[0], X.shape[1], 17))
# split into train/test
n_test = 1000
X_train, X_test, y_train, y_test = X[:-n_test], X[-n_test:], y[:-n_test], y[-n_test:]

X: 
 [array([[ 5.4000000e+01,  5.4000000e+01,  4.3099998e+01,  6.6339996e+01,
         0.0000000e+00,  1.1000000e+00,  3.0000000e+02,  1.0260000e+03,
         1.0000000e+02,  9.5000000e+00,  6.5000000e+01,  2.0000000e-01,
         1.0000000e+00,  1.1000000e+01,  2.1000000e+01,  1.6000000e+01,
         1.8092882e-02],
       [ 5.3099998e+01,  5.3099998e+01,  4.4000000e+01,  7.1139999e+01,
         0.0000000e+00,  2.5999999e+00,  3.3400000e+02,  1.0259000e+03,
         1.0000000e+02,  9.8999996e+00,  2.2000000e+01,  1.0000000e-01,
         0.0000000e+00,  1.1000000e+01,  2.1000000e+01,  1.7000000e+01,
        -3.0698301e-03],
       [ 5.1299999e+01,  5.1299999e+01,  4.5200001e+01,  7.9540001e+01,
         0.0000000e+00,  1.1000000e+00,  2.9600000e+02,  1.0268000e+03,
         1.0000000e+02,  9.5000000e+00,  4.9000000e+01,  2.0000000e-01,
         0.0000000e+00,  1.1000000e+01,  2.1000000e+01,  1.8000000e+01,
        -1.2706623e-03],
       [ 5.1200001e+01,  5.1200001e+01,  4.6400002e+01,

In [25]:
# define model
# improvement area : try adding dropout
model = Sequential()
model.add(LSTM(100, activation='relu', kernel_initializer='he_normal', input_shape=(n_steps,18)))
model.add(Dense(50, activation='relu', kernel_initializer='he_normal'))
model.add(Dense(50, activation='relu', kernel_initializer='he_normal'))
model.add(Dense(1))
# compile the model
model.compile(optimizer='adam', loss=custom_loss, metrics=[custom_eval])
#model.compile(optimizer='adam', loss='mse', metrics=[custom_eval])

In [26]:
# fit the model
model.fit(X_train, y_train, epochs=100, batch_size=32, verbose=2, validation_data=(X_test, y_test))
# evaluate the model
mse, mae = model.evaluate(X_test, y_test, verbose=0)
print('MSE: %.3f, RMSE: %.3f, MAE: %.3f' % (mse, sqrt(mse), mae))

Epoch 1/100


ValueError: in user code:

    File "C:\Users\trevo\anaconda3\lib\site-packages\keras\engine\training.py", line 1160, in train_function  *
        return step_function(self, iterator)
    File "C:\Users\trevo\anaconda3\lib\site-packages\keras\engine\training.py", line 1146, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "C:\Users\trevo\anaconda3\lib\site-packages\keras\engine\training.py", line 1135, in run_step  **
        outputs = model.train_step(data)
    File "C:\Users\trevo\anaconda3\lib\site-packages\keras\engine\training.py", line 993, in train_step
        y_pred = self(x, training=True)
    File "C:\Users\trevo\anaconda3\lib\site-packages\keras\utils\traceback_utils.py", line 70, in error_handler
        raise e.with_traceback(filtered_tb) from None
    File "C:\Users\trevo\anaconda3\lib\site-packages\keras\engine\input_spec.py", line 295, in assert_input_compatibility
        raise ValueError(

    ValueError: Input 0 of layer "sequential" is incompatible with the layer: expected shape=(None, 5, 18), found shape=(None, 5, 17)


In [None]:
# Make prediction using holdout set
yhat = model.predict(holdout_X)
print(yhat)
figure, axis = plt.subplots(1, 1)
axis.plot([i for i in range(len(holdout_y))], holdout_y[:, -1])
axis.plot([i for i in range(len(yhat))], yhat)
plt.show()
#plt.savefig("Solar Forecasting/matplotlib.png")

In [None]:
#holdyDF = pd.DataFrame(holdout_y)
#holdyDF.head()

#print(holdout_y[:,-1])

#difference = rehat - holdout_y[:,-1]
#sq_difference = np.square(difference)
#sum_sq = np.sum(sq_difference)
#MSE = sum_sq/580
#print(MSE)

#rehat = np.reshape(yhat, (1, 575))
#print(rehat)

In [None]:
print(holdout_X.shape)