In [8]:
# lstm for time series forecasting
from numpy import sqrt
from numpy import asarray
from pandas import read_csv
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import LSTM
import time
from datetime import datetime

In [21]:
# split a univariate sequence into samples
def split_sequence(sequence, n_steps):
    X, y = list(), list()
    for i in range(len(sequence)):
    # find the end of this pattern
        end_ix = i + n_steps
        # check if we are beyond the sequence
        if end_ix > len(sequence)-1:
            break
        # gather input and output parts of the pattern
        seq_x, seq_y = sequence[i:end_ix], sequence[end_ix]
        X.append(seq_x)
        y.append(seq_y)
    print('X: \n', X[0:5], '\n Y: \n', y[0:5])
    return asarray(X), asarray(y)

In [17]:
# load the dataset
df = read_csv("SolarTrainingData.csv")

In [22]:
# IMPROVEMENT AREA: Datetime is treated as a float here. There's gotta be a better way to use it as a NN input.
# retrieve the values
values = df.values.astype('float32')
# specify the window size
n_steps = 5
# split into samples
X, y = split_sequence(values, n_steps)
# reshape into [samples, timesteps, features]
X = X.reshape((X.shape[0], X.shape[1], 1))
# split into train/test
n_test = 12
X_train, X_test, y_train, y_test = X[:-n_test], X[-n_test:], y[:-n_test], y[-n_test:]
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

X: 
 [array([[ 0.0000000e+00,  1.5147864e+09,  2.6900000e+01,  1.7200001e+01,
         2.1100000e+01,  7.8650002e+01,  0.0000000e+00,            nan,
                   nan,  0.0000000e+00,  0.0000000e+00,  2.0900000e+01,
         1.0300000e+01,  4.0000000e+00,  1.0383000e+03,  1.0000000e+02,
         9.8999996e+00,            nan,            nan,            nan,
                   nan,            nan],
       [ 1.0000000e+00,  1.5147864e+09,  2.6900000e+01,  1.7200001e+01,
         2.1100000e+01,  7.8650002e+01,  0.0000000e+00,            nan,
                   nan,  0.0000000e+00,  0.0000000e+00,  2.0900000e+01,
         1.0300000e+01,  4.0000000e+00,  1.0383000e+03,  1.0000000e+02,
         9.8999996e+00,            nan,            nan,            nan,
                   nan, -7.8492938e-04],
       [ 2.0000000e+00,  1.5147864e+09,  2.6900000e+01,  1.7200001e+01,
         2.1100000e+01,  7.8650002e+01,  0.0000000e+00,            nan,
                   nan,  0.0000000e+00,  0.00000

ValueError: cannot reshape array of size 8116570 into shape (73787,5,1)

In [None]:
# define model
model = Sequential()
model.add(LSTM(100, activation='relu', kernel_initializer='he_normal', input_shape=(n_steps,1)))
model.add(Dense(50, activation='relu', kernel_initializer='he_normal'))
model.add(Dense(50, activation='relu', kernel_initializer='he_normal'))
model.add(Dense(1))
# compile the model
model.compile(optimizer='adam', loss='mse', metrics=['mae'])

In [None]:
# fit the model
model.fit(X_train, y_train, epochs=350, batch_size=32, verbose=2, validation_data=(X_test, y_test))
# evaluate the model
mse, mae = model.evaluate(X_test, y_test, verbose=0)
print('MSE: %.3f, RMSE: %.3f, MAE: %.3f' % (mse, sqrt(mse), mae))

In [None]:
# make a prediction
row = asarray([18024.0, 16722.0, 14385.0, 21342.0, 17180.0]).reshape((1, n_steps, 1))
yhat = model.predict(row)
print('Predicted: %.3f' % (yhat))

In [None]:
test = df['datetime'].apply(time.mktime())
test.head()