In [189]:
import csv
import math
import sys
from datetime import datetime as dt
from datetime import timezone as tz

import numpy as np
import pandas as pd
import pytz as pytz
from keras.layers import Dense, Flatten
from keras.models import Sequential
from scipy.sparse import data
from sklearn.utils import validation
import tensorflow as tf
from tensorflow import keras
from keras.callbacks import EarlyStopping
from keras.callbacks import ModelCheckpoint
from keras.models import load_model

import utility

In [190]:
import os

In [191]:
os.environ['VIRTUAL_ENV']

'/Users/linusrandud/.pyenv/versions/3.10.14/envs/GhostPostCC'

In [192]:
source = 'coal'
IN_FILE_NAME = "data/MW_electricity_cleaned.csv"

OUT_FILE_NAME_PREFIX = 'data/src_prod_forecast'
OUT_FILE_NAME = OUT_FILE_NAME_PREFIX + "_" + source + ".csv"

In [193]:
NUM_FEATURES_DICT = {"coal":6, "nat_gas":6, "nuclear":6, "oil":6, "hydro":11, "solar": 11,
                    "wind":11, "others": 6}

NUM_VAL_DAYS = 30
NUM_TEST_DAYS = 184
TRAINING_WINDOW_HOURS = 24
PREDICTION_WINDOW_HOURS = 24
MODEL_SLIDING_WINDOW_LEN = 24

COAL = 1
NAT_GAS = 2
NUCLEAR = 3
OIL = 4
HYDRO = 5
SOLAR = 6
WIND = 7
OTHERS = 8 

FUEL = {COAL:"coal", NAT_GAS:"nat_gas", NUCLEAR:"nuclear", OIL:"oil", HYDRO:"hydro", SOLAR:"solar", WIND:"wind", OTHERS:"others"}
SOURCE_TO_SOURCE_COL_MAP = {y: x for x, y in FUEL.items()}

SOURCE_COL = SOURCE_TO_SOURCE_COL_MAP[source]
NUM_FEATURES = NUM_FEATURES_DICT[FUEL[SOURCE_COL]]

In [194]:
def initDataset(inFileName, sourceCol):
    dataset = pd.read_csv(inFileName, header=0, infer_datetime_format=True, parse_dates=['UTC Time at End of Hour'], index_col=['UTC Time at End of Hour'])

    print(dataset.head())
    print(dataset.columns)
    dateTime = dataset.index.values
    
    print("\nAdding features related to date & time...")
    modifiedDataset = utility.addDateTimeFeatures(dataset, dateTime, sourceCol)
    dataset = modifiedDataset
    print("Features related to date & time added")
    
    for i in range(sourceCol, len(dataset.columns.values)):
        col = dataset.columns.values[i]
        dataset[col] = dataset[col].astype(np.float64)
        # print(col, dataset[col].dtype)

    return dataset, dateTime

In [195]:
# convert training data into inputs and outputs (labels)
def manipulateTrainingDataShape(data, trainWindowHours, labelWindowHours): 
    print("Data shape: ", data.shape)
    X, y = list(), list()
    # step over the entire history one time step at a time
    for i in range(len(data)-(trainWindowHours+labelWindowHours)+1):
        # define the end of the input sequence
        trainWindow = i + trainWindowHours
        labelWindow = trainWindow + labelWindowHours
        xInput = data[i:trainWindow, :]
        # xInput = xInput.reshape((len(xInput), 1))
        X.append(xInput)
        y.append(data[trainWindow:labelWindow, 0])
        # print(data[trainWindow:labelWindow, 0])
    return np.array(X, dtype=np.float64), np.array(y, dtype=np.float64)

def manipulateTestDataShape(data, slidingWindowLen, predictionWindowHours, isDates=False): 
    X = list()
    # step over the entire history one time step at a time
    for i in range(0, len(data)-(predictionWindowHours)+1, slidingWindowLen):
        # define the end of the input sequence
        predictionWindow = i + predictionWindowHours
        X.append(data[i:predictionWindow])
    if (isDates is False):
        X = np.array(X, dtype=np.float64)
    else:
        X = np.array(X)
    return X

In [196]:
dataset, dateTime = initDataset(IN_FILE_NAME, SOURCE_COL)

  dataset = pd.read_csv(inFileName, header=0, infer_datetime_format=True, parse_dates=['UTC Time at End of Hour'], index_col=['UTC Time at End of Hour'])


                         Unnamed: 0  Net Generation (MW) from Coal  \
UTC Time at End of Hour                                              
2021-01-01 06:00:00               0                        25367.0   
2021-01-01 07:00:00               1                        24662.0   
2021-01-01 08:00:00               2                        25365.0   
2021-01-01 09:00:00               3                        24811.0   
2021-01-01 10:00:00               4                        25041.0   

                         Net Generation (MW) from Natural Gas  \
UTC Time at End of Hour                                         
2021-01-01 06:00:00                                   15999.0   
2021-01-01 07:00:00                                   16063.0   
2021-01-01 08:00:00                                   16057.0   
2021-01-01 09:00:00                                   15870.0   
2021-01-01 10:00:00                                   15755.0   

                         Net Generation (MW) from Nuc

In [197]:
dataset

Unnamed: 0_level_0,Unnamed: 0,Net Generation (MW) from Coal,hour_sin,hour_cos,month_sin,month_cos,weekend,Net Generation (MW) from Natural Gas,Net Generation (MW) from Nuclear,Net Generation (MW) from All Petroleum Products,Net Generation (MW) from Hydropower and Pumped Storage,Net Generation (MW) from Solar,Net Generation (MW) from Wind,Net Generation (MW) from Other
UTC Time at End of Hour,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2021-01-01 06:00:00,0,25367.0,1.000000,6.123234e-17,0.008601,0.999963,0.0,15999.0,11534.0,,714.0,0.0,9486.0,840.0
2021-01-01 07:00:00,1,24662.0,0.965926,-2.588190e-01,0.009318,0.999957,0.0,16063.0,11544.0,,664.0,0.0,8780.0,840.0
2021-01-01 08:00:00,2,25365.0,0.866025,-5.000000e-01,0.010035,0.999950,0.0,16057.0,11555.0,,661.0,0.0,6912.0,839.0
2021-01-01 09:00:00,3,24811.0,0.707107,-7.071068e-01,0.010751,0.999942,0.0,15870.0,11560.0,,664.0,0.0,5967.0,840.0
2021-01-01 10:00:00,4,25041.0,0.500000,-8.660254e-01,0.011468,0.999934,0.0,15755.0,11565.0,,668.0,0.0,5074.0,844.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-12-31 19:00:00,26269,19663.0,-0.965926,2.588190e-01,-0.012185,0.999926,1.0,26718.0,8205.0,,811.0,1060.0,11799.0,330.0
2023-12-31 20:00:00,26270,19740.0,-0.866025,5.000000e-01,-0.011468,0.999934,1.0,26912.0,8203.0,,805.0,976.0,11074.0,332.0
2023-12-31 21:00:00,26271,20145.0,-0.707107,7.071068e-01,-0.010751,0.999942,1.0,26794.0,8204.0,,802.0,825.0,10717.0,340.0
2023-12-31 22:00:00,26272,21403.0,-0.500000,8.660254e-01,-0.010035,0.999950,1.0,27195.0,8203.0,,850.0,541.0,10181.0,344.0


In [43]:
def trainANN(trainX, trainY, valX, valY, hyperParams, modelDir):
    n_timesteps, n_features, nOutputs = trainX.shape[1], trainX.shape[2], trainY.shape[1]
    epochs = 1 #hyperParams['epoch']
    batchSize = hyperParams['batchsize']
    activationFunc = hyperParams['actv']
    lossFunc = hyperParams['loss']
    optimizer = hyperParams['optim']
    hiddenDims = hyperParams['hidden']
    learningRates = hyperParams['lr']
    model = Sequential()
    model.add(Flatten())
    model.add(Dense(hiddenDims[0], input_shape=(n_timesteps, n_features), activation=activationFunc)) # 20 for coal, nat_gas, nuclear
    model.add(Dense(hiddenDims[1], activation='relu')) # 50 for coal, nat_gas, nuclear
    model.add(Dense(nOutputs))

    opt = tf.keras.optimizers.Adam(learning_rate = learningRates)
    model.compile(loss=lossFunc, optimizer=optimizer[0],
                    metrics=['mean_absolute_error'])
    es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=10)
    mc = ModelCheckpoint(modelDir, monitor='val_loss', mode='min', verbose=1, save_best_only=True)
    # fit network
    hist = model.fit(trainX, trainY, epochs=epochs, batch_size=batchSize[0], verbose=2,
                        validation_data=(valX, valY), callbacks=[es, mc])
    model = load_model(modelDir)
    utility.showModelSummary(hist, model)
    return model, n_features

def getDayAheadForecasts(trainX, trainY, model, history, testData, 
                            trainWindowHours, numFeatures, depVarColumn):
    global MODEL_SLIDING_WINDOW_LEN
    global PREDICTION_WINDOW_HOURS
    # walk-forward validation over each day
    print("Testing...")
    predictions = list()
    for i in range(0, len(testData)//24):
        dayAheadPredictions = list()
        tempHistory = history.copy()
        currentDayHours = i* MODEL_SLIDING_WINDOW_LEN
        for j in range(0, PREDICTION_WINDOW_HOURS, 24):
            yhat_sequence, newTrainingData = getForecasts(model, tempHistory, trainWindowHours, numFeatures)
            dayAheadPredictions.extend(yhat_sequence)
            # add current prediction to history for predicting the next day
            # following 3 lines are redundant currently. Will become useful if
            # prediction period goes beyond 24 hours.
            latestHistory = testData[currentDayHours+j:currentDayHours+j+24, :].tolist()
            for k in range(24):
                latestHistory[k][depVarColumn] = yhat_sequence[k]
            tempHistory.extend(latestHistory)

        # get real observation and add to history for predicting the next day
        history.extend(testData[currentDayHours:currentDayHours+MODEL_SLIDING_WINDOW_LEN, :].tolist())
        predictions.append(dayAheadPredictions)

    # evaluate predictions days for each day
    predictedData = np.array(predictions, dtype=np.float64)
    return predictedData


def getForecasts(model, history, trainWindowHours, numFeatures):
    # flatten data
    data = np.array(history, dtype=np.float64)
    # retrieve last observations for input data
    input_x = data[-trainWindowHours:]
    # reshape into [1, n_input, num_features]
    input_x = input_x.reshape((1, len(input_x), numFeatures))
    # print("ip_x shape: ", input_x.shape)
    yhat = model.predict(input_x, verbose=0)
    # we only want the vector forecast
    yhat = yhat[0]
    return yhat, input_x

def getANNHyperParams():
    hyperParams = {}
    hyperParams['epoch'] = 100 
    hyperParams['batchsize'] = [10] 
    hyperParams['actv'] = "relu"
    hyperParams['loss'] = "mse"
    hyperParams['optim'] = ["adam"] #, "rmsprop"]
    hyperParams['lr'] = 1e-2 #, 1e-3
    hyperParams['hidden'] = [20, 50] #, [50, 50]]#, [20, 50]] #, [50, 50]]
    return hyperParams

### train

In [171]:
import pickle

In [211]:
def forecast_all_fuel_sources(fuel_sources):
    for source in fuel_sources:
        IN_FILE_NAME = f"data/data_cleaned_{source}.csv"

        OUT_FILE_NAME_PREFIX = 'data/src_prod_forecast'
        OUT_FILE_NAME = OUT_FILE_NAME_PREFIX + "_" + source + ".csv"
        OUT_MODEL_NAME = 'model/' + source + "_ann.keras"

        NUM_FEATURES_DICT = {"coal": 6, "nat_gas": 6, "nuclear": 6, "oil": 6, "hydro": 13, "solar": 13,
                            "wind": 13, "others": 6}

        SOURCE_COL = 0
        NUM_FEATURES = NUM_FEATURES_DICT[source]

        dataset, dateTime = initDataset(IN_FILE_NAME, SOURCE_COL)

        trainData, valData, testData, fullTrainData = utility.splitDataset(dataset.values, NUM_TEST_DAYS, NUM_VAL_DAYS)
        trainDates = dateTime[: -(NUM_TEST_DAYS*24)]
        fullTrainDates = np.copy(trainDates)
        trainDates, validationDates = trainDates[: -(NUM_VAL_DAYS*24)], trainDates[-(NUM_VAL_DAYS*24):]
        testDates = dateTime[-(NUM_TEST_DAYS*24):]
        trainData = trainData[:, SOURCE_COL: SOURCE_COL+NUM_FEATURES]
        valData = valData[:, SOURCE_COL: SOURCE_COL+NUM_FEATURES]
        testData = testData[:, SOURCE_COL: SOURCE_COL+NUM_FEATURES]

        print("TrainData shape: ", trainData.shape) # (days x hour) x features
        print("ValData shape: ", valData.shape) # (days x hour) x features
        print("TestData shape: ", testData.shape) # (days x hour) x features
        print("***** Dataset split done *****")

        for i in range(trainData.shape[0]):
            for j in range(trainData.shape[1]):
                if(np.isnan(trainData[i, j])):
                    trainData[i, j] = trainData[i-1, j]

        for i in range(valData.shape[0]):
            for j in range(valData.shape[1]):
                if(np.isnan(valData[i, j])):
                    valData[i, j] = valData[i-1, j]

        for i in range(testData.shape[0]):
            for j in range(testData.shape[1]):
                if(np.isnan(testData[i, j])):
                    testData[i, j] = testData[i-1, j]

        featureList = dataset.columns.values[SOURCE_COL:SOURCE_COL+NUM_FEATURES]
        print("Features: ", featureList)

        print("Scaling data...")
        trainData, valData, testData, ftMin, ftMax = utility.scaleDataset(trainData, valData, testData)
        print("***** Data scaling done *****")
        print(trainData.shape, valData.shape, testData.shape)

        print("\nManipulating training data...")
        X, y = manipulateTrainingDataShape(trainData, TRAINING_WINDOW_HOURS, TRAINING_WINDOW_HOURS)
        valX, valY = manipulateTrainingDataShape(valData, TRAINING_WINDOW_HOURS, TRAINING_WINDOW_HOURS)
        print("***** Training data manipulation done *****")
        print("X.shape, y.shape: ", X.shape, y.shape)

        hyperParams = getANNHyperParams()

        print("\nStarting training)...")
        bestModel, numFeatures = trainANN(X, y, valX, valY, hyperParams, OUT_MODEL_NAME)
        print("***** Training done *****")
        history = valData[-TRAINING_WINDOW_HOURS:, :].tolist()
        predictedData = getDayAheadForecasts(X, y, bestModel, history, testData, 
                        TRAINING_WINDOW_HOURS, numFeatures, 0)            
        actualData = manipulateTestDataShape(testData[:, 0], 
                MODEL_SLIDING_WINDOW_LEN, PREDICTION_WINDOW_HOURS, False)
        formattedTestDates = manipulateTestDataShape(testDates, 
                MODEL_SLIDING_WINDOW_LEN, PREDICTION_WINDOW_HOURS, True)
        formattedTestDates = np.reshape(formattedTestDates, 
                formattedTestDates.shape[0]*formattedTestDates.shape[1])
        actualData = actualData.astype(np.float64)
        print("ActualData shape: ", actualData.shape)
        actual = np.reshape(actualData, actualData.shape[0]*actualData.shape[1])
        print("actual.shape: ", actual.shape)
        unscaledTestData = utility.inverseDataScaling(actual, ftMax[0], 
                        ftMin[0])
        predictedData = predictedData.astype(np.float64)
        print("PredictedData shape: ", predictedData.shape)
        predicted = np.reshape(predictedData, predictedData.shape[0]*predictedData.shape[1])
        print("predicted.shape: ", predicted.shape)
        unScaledPredictedData = utility.inverseDataScaling(predicted, 
                ftMax[0], ftMin[0])
        rmseScore, mapeScore = utility.getScores(actualData, predictedData, 
                                unscaledTestData, unScaledPredictedData)
        print("***** Forecast done *****")
        print("Overall RMSE score: ", rmseScore)
        print(rmseScore)

        data = [bestModel, ftMin, ftMax]
        with open('model/' + source + "_ann.pkl", 'wb') as f:
            for d in data:
                pickle.dump(d, f)

        data = []
        for i in range(len(unScaledPredictedData)):
                row = []
                row.append(str(formattedTestDates[i]))
                row.append(str(unscaledTestData[i]))
                row.append(str(unScaledPredictedData[i]))
                data.append(row)
        utility.writeOutFuelForecastFile(OUT_FILE_NAME, data, source)

In [212]:
fuel_sources = ['coal', 'nat_gas', 'nuclear', 'hydro', 'solar', 'wind', 'others']
forecast_all_fuel_sources(fuel_sources)

  dataset = pd.read_csv(inFileName, header=0, infer_datetime_format=True, parse_dates=['UTC Time at End of Hour'], index_col=['UTC Time at End of Hour'])


                         Net Generation (MW) from Coal
UTC Time at End of Hour                               
2021-01-01 06:00:00                            25367.0
2021-01-01 07:00:00                            24662.0
2021-01-01 08:00:00                            25365.0
2021-01-01 09:00:00                            24811.0
2021-01-01 10:00:00                            25041.0
Index(['Net Generation (MW) from Coal'], dtype='object')

Adding features related to date & time...
18738 7536
                         Net Generation (MW) from Coal  hour_sin  \
UTC Time at End of Hour                                            
2021-01-01 06:00:00                            25367.0  1.000000   
2021-01-01 07:00:00                            24662.0  0.965926   
2021-01-01 08:00:00                            25365.0  0.866025   
2021-01-01 09:00:00                            24811.0  0.707107   
2021-01-01 10:00:00                            25041.0  0.500000   

                           

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)



Epoch 1: val_loss improved from inf to 0.00485, saving model to model/coal_ann.keras
2110/2110 - 2s - 977us/step - loss: 0.0084 - mean_absolute_error: 0.0625 - val_loss: 0.0049 - val_mean_absolute_error: 0.0559
Showing model summary...


***** Model summary shown *****
dict_keys(['loss', 'mean_absolute_error', 'val_loss', 'val_mean_absolute_error'])
***** Training done *****
Testing...
ActualData shape:  (184, 24)
actual.shape:  (4416,)
PredictedData shape:  (184, 24)
predicted.shape:  (4416,)
Actual data shape, Predicted data shape:  (184, 24) (184, 24)
***** Forecast done *****
Overall RMSE score:  0.071778
0.071778
Writing to  data/src_prod_forecast_coal.csv ...
                         Net Generation (MW) from Natural Gas
UTC Time at End of Hour                                      
2021-01-01 06:00:00                                   15999.0
2021-01-01 07:00:00                                   16063.0
2021-01-01 08:00:00                                   16057.0
2021-01-01 09:00:00                                   15870.0
2021-01-01 10:00:00                                   15755.0
Index(['Net Generation (MW) from Natural Gas'], dtype='object')

Adding features related to date & time...


  dataset = pd.read_csv(inFileName, header=0, infer_datetime_format=True, parse_dates=['UTC Time at End of Hour'], index_col=['UTC Time at End of Hour'])


18738 7536
                         Net Generation (MW) from Natural Gas  hour_sin  \
UTC Time at End of Hour                                                   
2021-01-01 06:00:00                                   15999.0  1.000000   
2021-01-01 07:00:00                                   16063.0  0.965926   
2021-01-01 08:00:00                                   16057.0  0.866025   
2021-01-01 09:00:00                                   15870.0  0.707107   
2021-01-01 10:00:00                                   15755.0  0.500000   

                             hour_cos  month_sin  month_cos  weekend  
UTC Time at End of Hour                                               
2021-01-01 06:00:00      6.123234e-17   0.008601   0.999963        0  
2021-01-01 07:00:00     -2.588190e-01   0.009318   0.999957        0  
2021-01-01 08:00:00     -5.000000e-01   0.010035   0.999950        0  
2021-01-01 09:00:00     -7.071068e-01   0.010751   0.999942        0  
2021-01-01 10:00:00     -8.660254e-01

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)



Epoch 1: val_loss improved from inf to 0.00743, saving model to model/nat_gas_ann.keras
2110/2110 - 3s - 1ms/step - loss: 0.0089 - mean_absolute_error: 0.0688 - val_loss: 0.0074 - val_mean_absolute_error: 0.0686
Showing model summary...


***** Model summary shown *****
dict_keys(['loss', 'mean_absolute_error', 'val_loss', 'val_mean_absolute_error'])
***** Training done *****
Testing...
ActualData shape:  (184, 24)
actual.shape:  (4416,)
PredictedData shape:  (184, 24)
predicted.shape:  (4416,)
Actual data shape, Predicted data shape:  (184, 24) (184, 24)
***** Forecast done *****
Overall RMSE score:  0.091737
0.091737
Writing to  data/src_prod_forecast_nat_gas.csv ...
                         Net Generation (MW) from Nuclear
UTC Time at End of Hour                                  
2021-01-01 06:00:00                               11534.0
2021-01-01 07:00:00                               11544.0
2021-01-01 08:00:00                               11555.0
2021-01-01 09:00:00                               11560.0
2021-01-01 10:00:00                               11565.0
Index(['Net Generation (MW) from Nuclear'], dtype='object')

Adding features related to date & time...


  dataset = pd.read_csv(inFileName, header=0, infer_datetime_format=True, parse_dates=['UTC Time at End of Hour'], index_col=['UTC Time at End of Hour'])


18738 7536
                         Net Generation (MW) from Nuclear  hour_sin  \
UTC Time at End of Hour                                               
2021-01-01 06:00:00                               11534.0  1.000000   
2021-01-01 07:00:00                               11544.0  0.965926   
2021-01-01 08:00:00                               11555.0  0.866025   
2021-01-01 09:00:00                               11560.0  0.707107   
2021-01-01 10:00:00                               11565.0  0.500000   

                             hour_cos  month_sin  month_cos  weekend  
UTC Time at End of Hour                                               
2021-01-01 06:00:00      6.123234e-17   0.008601   0.999963        0  
2021-01-01 07:00:00     -2.588190e-01   0.009318   0.999957        0  
2021-01-01 08:00:00     -5.000000e-01   0.010035   0.999950        0  
2021-01-01 09:00:00     -7.071068e-01   0.010751   0.999942        0  
2021-01-01 10:00:00     -8.660254e-01   0.011468   0.999934      

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)



Epoch 1: val_loss improved from inf to 0.00150, saving model to model/nuclear_ann.keras
2110/2110 - 2s - 1ms/step - loss: 0.0076 - mean_absolute_error: 0.0426 - val_loss: 0.0015 - val_mean_absolute_error: 0.0273
Showing model summary...


***** Model summary shown *****
dict_keys(['loss', 'mean_absolute_error', 'val_loss', 'val_mean_absolute_error'])
***** Training done *****
Testing...
ActualData shape:  (184, 24)
actual.shape:  (4416,)
PredictedData shape:  (184, 24)
predicted.shape:  (4416,)
Actual data shape, Predicted data shape:  (184, 24) (184, 24)
***** Forecast done *****
Overall RMSE score:  0.04594
0.04594
Writing to  data/src_prod_forecast_nuclear.csv ...
                         Net Generation (MW) from Hydropower and Pumped Storage  \
UTC Time at End of Hour                                                           
2021-01-01 06:00:00                                                  714.0        
2021-01-01 07:00:00                                                  664.0        
2021-01-01 08:00:00                                                  661.0        
2021-01-01 09:00:00                                                  664.0        
2021-01-01 10:00:00                                              

  dataset = pd.read_csv(inFileName, header=0, infer_datetime_format=True, parse_dates=['UTC Time at End of Hour'], index_col=['UTC Time at End of Hour'])


18738 7536
                         Net Generation (MW) from Hydropower and Pumped Storage  \
UTC Time at End of Hour                                                           
2021-01-01 06:00:00                                                  714.0        
2021-01-01 07:00:00                                                  664.0        
2021-01-01 08:00:00                                                  661.0        
2021-01-01 09:00:00                                                  664.0        
2021-01-01 10:00:00                                                  668.0        

                         hour_sin      hour_cos  month_sin  month_cos  \
UTC Time at End of Hour                                                 
2021-01-01 06:00:00      1.000000  6.123234e-17   0.008601   0.999963   
2021-01-01 07:00:00      0.965926 -2.588190e-01   0.009318   0.999957   
2021-01-01 08:00:00      0.866025 -5.000000e-01   0.010035   0.999950   
2021-01-01 09:00:00      0.707107 -7.07106

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)



Epoch 1: val_loss improved from inf to 0.00810, saving model to model/hydro_ann.keras
2110/2110 - 2s - 932us/step - loss: 0.0192 - mean_absolute_error: 0.0964 - val_loss: 0.0081 - val_mean_absolute_error: 0.0688
Showing model summary...


***** Model summary shown *****
dict_keys(['loss', 'mean_absolute_error', 'val_loss', 'val_mean_absolute_error'])
***** Training done *****
Testing...
ActualData shape:  (184, 24)
actual.shape:  (4416,)
PredictedData shape:  (184, 24)
predicted.shape:  (4416,)
Actual data shape, Predicted data shape:  (184, 24) (184, 24)
***** Forecast done *****
Overall RMSE score:  0.120165
0.120165
Writing to  data/src_prod_forecast_hydro.csv ...
                         Net Generation (MW) from Solar  temp  dwpt   rhum  \
UTC Time at End of Hour                                                      
2021-01-01 06:00:00                                 0.0  -6.0  -6.0  100.0   
2021-01-01 07:00:00                                 0.0  -6.0  -6.0  100.0   
2021-01-01 08:00:00                                 0.0  -6.0  -6.0  100.0   
2021-01-01 09:00:00                                 0.0  -6.0  -6.0  100.0   
2021-01-01 10:00:00                                 0.0  -6.0  -6.0  100.0   

                

  dataset = pd.read_csv(inFileName, header=0, infer_datetime_format=True, parse_dates=['UTC Time at End of Hour'], index_col=['UTC Time at End of Hour'])


18738 7536
                         Net Generation (MW) from Solar  hour_sin  \
UTC Time at End of Hour                                             
2021-01-01 06:00:00                                 0.0  1.000000   
2021-01-01 07:00:00                                 0.0  0.965926   
2021-01-01 08:00:00                                 0.0  0.866025   
2021-01-01 09:00:00                                 0.0  0.707107   
2021-01-01 10:00:00                                 0.0  0.500000   

                             hour_cos  month_sin  month_cos  weekend  temp  \
UTC Time at End of Hour                                                      
2021-01-01 06:00:00      6.123234e-17   0.008601   0.999963        0  -6.0   
2021-01-01 07:00:00     -2.588190e-01   0.009318   0.999957        0  -6.0   
2021-01-01 08:00:00     -5.000000e-01   0.010035   0.999950        0  -6.0   
2021-01-01 09:00:00     -7.071068e-01   0.010751   0.999942        0  -6.0   
2021-01-01 10:00:00     -8.660254e-01

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)



Epoch 1: val_loss improved from inf to 0.01252, saving model to model/solar_ann.keras
2110/2110 - 2s - 861us/step - loss: 0.0091 - mean_absolute_error: 0.0612 - val_loss: 0.0125 - val_mean_absolute_error: 0.0801
Showing model summary...


***** Model summary shown *****
dict_keys(['loss', 'mean_absolute_error', 'val_loss', 'val_mean_absolute_error'])
***** Training done *****
Testing...
ActualData shape:  (184, 24)
actual.shape:  (4416,)
PredictedData shape:  (184, 24)
predicted.shape:  (4416,)
Actual data shape, Predicted data shape:  (184, 24) (184, 24)
***** Forecast done *****
Overall RMSE score:  0.134581
0.134581
Writing to  data/src_prod_forecast_solar.csv ...
                         Net Generation (MW) from Wind  temp  dwpt   rhum  \
UTC Time at End of Hour                                                     
2021-01-01 06:00:00                             9486.0  -6.0  -6.0  100.0   
2021-01-01 07:00:00                             8780.0  -6.0  -6.0  100.0   
2021-01-01 08:00:00                             6912.0  -6.0  -6.0  100.0   
2021-01-01 09:00:00                             5967.0  -6.0  -6.0  100.0   
2021-01-01 10:00:00                             5074.0  -6.0  -6.0  100.0   

                       

  dataset = pd.read_csv(inFileName, header=0, infer_datetime_format=True, parse_dates=['UTC Time at End of Hour'], index_col=['UTC Time at End of Hour'])


18738 7536
                         Net Generation (MW) from Wind  hour_sin  \
UTC Time at End of Hour                                            
2021-01-01 06:00:00                             9486.0  1.000000   
2021-01-01 07:00:00                             8780.0  0.965926   
2021-01-01 08:00:00                             6912.0  0.866025   
2021-01-01 09:00:00                             5967.0  0.707107   
2021-01-01 10:00:00                             5074.0  0.500000   

                             hour_cos  month_sin  month_cos  weekend  temp  \
UTC Time at End of Hour                                                      
2021-01-01 06:00:00      6.123234e-17   0.008601   0.999963        0  -6.0   
2021-01-01 07:00:00     -2.588190e-01   0.009318   0.999957        0  -6.0   
2021-01-01 08:00:00     -5.000000e-01   0.010035   0.999950        0  -6.0   
2021-01-01 09:00:00     -7.071068e-01   0.010751   0.999942        0  -6.0   
2021-01-01 10:00:00     -8.660254e-01   0.01

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)



Epoch 1: val_loss improved from inf to 0.02952, saving model to model/wind_ann.keras
2110/2110 - 2s - 1ms/step - loss: 0.0290 - mean_absolute_error: 0.1361 - val_loss: 0.0295 - val_mean_absolute_error: 0.1486
Showing model summary...


***** Model summary shown *****
dict_keys(['loss', 'mean_absolute_error', 'val_loss', 'val_mean_absolute_error'])
***** Training done *****
Testing...
ActualData shape:  (184, 24)
actual.shape:  (4416,)
PredictedData shape:  (184, 24)
predicted.shape:  (4416,)
Actual data shape, Predicted data shape:  (184, 24) (184, 24)
***** Forecast done *****
Overall RMSE score:  0.170944
0.170944
Writing to  data/src_prod_forecast_wind.csv ...
                         Net Generation (MW) from Other
UTC Time at End of Hour                                
2021-01-01 06:00:00                               840.0
2021-01-01 07:00:00                               840.0
2021-01-01 08:00:00                               839.0
2021-01-01 09:00:00                               840.0
2021-01-01 10:00:00                               844.0
Index(['Net Generation (MW) from Other'], dtype='object')

Adding features related to date & time...


  dataset = pd.read_csv(inFileName, header=0, infer_datetime_format=True, parse_dates=['UTC Time at End of Hour'], index_col=['UTC Time at End of Hour'])


18738 7536
                         Net Generation (MW) from Other  hour_sin  \
UTC Time at End of Hour                                             
2021-01-01 06:00:00                               840.0  1.000000   
2021-01-01 07:00:00                               840.0  0.965926   
2021-01-01 08:00:00                               839.0  0.866025   
2021-01-01 09:00:00                               840.0  0.707107   
2021-01-01 10:00:00                               844.0  0.500000   

                             hour_cos  month_sin  month_cos  weekend  
UTC Time at End of Hour                                               
2021-01-01 06:00:00      6.123234e-17   0.008601   0.999963        0  
2021-01-01 07:00:00     -2.588190e-01   0.009318   0.999957        0  
2021-01-01 08:00:00     -5.000000e-01   0.010035   0.999950        0  
2021-01-01 09:00:00     -7.071068e-01   0.010751   0.999942        0  
2021-01-01 10:00:00     -8.660254e-01   0.011468   0.999934        0  
Features

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)



Epoch 1: val_loss improved from inf to 0.00164, saving model to model/others_ann.keras
2110/2110 - 2s - 1ms/step - loss: 0.0043 - mean_absolute_error: 0.0416 - val_loss: 0.0016 - val_mean_absolute_error: 0.0268
Showing model summary...


***** Model summary shown *****
dict_keys(['loss', 'mean_absolute_error', 'val_loss', 'val_mean_absolute_error'])
***** Training done *****
Testing...
ActualData shape:  (184, 24)
actual.shape:  (4416,)
PredictedData shape:  (184, 24)
predicted.shape:  (4416,)
Actual data shape, Predicted data shape:  (184, 24) (184, 24)
***** Forecast done *****
Overall RMSE score:  0.035236
0.035236
Writing to  data/src_prod_forecast_others.csv ...


### inference

In [213]:
def scale_dataset_trained(valData, testData, ftMin, ftMax):
    # Scaling columns to range (0, 1)
    row, col = valData.shape[0], valData.shape[1]
    for i in range(col):
        if((ftMax[i] - ftMin[i]) == 0):
            continue
        valData[:, i] = (valData[:, i] - ftMin[i]) / (ftMax[i] - ftMin[i])
        testData[:, i] = (testData[:, i] - ftMin[i]) / (ftMax[i] - ftMin[i])

    return valData, testData

def get_day_ahead_forecasts(model_filepath, history, train_window_hours, num_features, dep_var_column):
    global MODEL_SLIDING_WINDOW_LEN
    global PREDICTION_WINDOW_HOURS
    
    # Load the trained ANN model
    model = load_model(model_filepath)

    # walk-forward validation over each day
    print("Generating day-ahead forecasts...")
    predictions = []
    for i in range(len(history) // 24):
        day_ahead_predictions = []
        temp_history = history.copy()
        current_day_hours = i * MODEL_SLIDING_WINDOW_LEN
        for j in range(0, PREDICTION_WINDOW_HOURS, 24):
            # Get forecasts for the next 24 hours
            yhat_sequence, new_training_data = get_forecasts(model, temp_history, train_window_hours, num_features)
            day_ahead_predictions.extend(yhat_sequence)
            # Update history for predicting the next day
            latest_history = history[current_day_hours + j : current_day_hours + j + 24].copy()
            for k in range(24):
                latest_history[k][dep_var_column] = yhat_sequence[k]
            temp_history = np.concatenate([temp_history, latest_history], axis=0)

        # Update history for predicting the next day
        history = np.concatenate([history, history[current_day_hours : current_day_hours + MODEL_SLIDING_WINDOW_LEN]], axis=0)
        predictions.append(day_ahead_predictions)

    # Convert predictions to numpy array
    predicted_data = np.array(predictions, dtype=np.float64)
    return predicted_data


def get_forecasts(model, history, train_window_hours, num_features):
    # Flatten data
    data = np.array(history, dtype=np.float64)
    # Retrieve last observations for input data
    input_x = data[-train_window_hours:]
    # Reshape into [1, n_input, num_features]
    input_x = input_x.reshape((1, len(input_x), num_features))
    # Make predictions
    yhat = model.predict(input_x, verbose=0)
    # Extract the vector forecast
    yhat = yhat[0]
    return yhat, input_x

def inference_test(fuel_sources, inference_timestamp):
    for source in fuel_sources:
        IN_FILE_NAME = "data/MW_electricity_cleaned.csv"
        IN_MODEL_NAME = 'model/' + source + "_ann.keras"

        NUM_FEATURES_DICT = {"coal": 6, "nat_gas": 6, "nuclear": 6, "oil": 6, "hydro": 13, "solar": 13,
                            "wind": 13, "others": 6}

        NUM_VAL_DAYS = 30
        NUM_TEST_DAYS = 184
        TRAINING_WINDOW_HOURS = 24
        PREDICTION_WINDOW_HOURS = 24
        MODEL_SLIDING_WINDOW_LEN = 24

        SOURCE_COL = 0
        NUM_FEATURES = NUM_FEATURES_DICT[source]

        print("initializing dataset...")
        
        dataset, dateTime = initDataset(IN_FILE_NAME, SOURCE_COL)
        nearest_lower_timestamp = max(filter(lambda x: x <= np.datetime64(inference_timestamp), dateTime))

        # Get data up to last_date and last 24 hours of data
        last_past_date = pd.to_datetime(nearest_lower_timestamp).strftime("%Y-%m-%d %H:%M:%S")
        past = dataset.loc[dataset.index <= last_past_date].tail(24)

        # Get data minimum last_date, max last_date + 24 hours of data
        last_future_date = (pd.to_datetime(nearest_lower_timestamp) + pd.Timedelta(hours=24)).strftime("%Y-%m-%d %H:%M:%S")
        future = dataset.loc[dataset.index <= last_future_date].tail(24)

        # trainData, valData, testData, fullTrainData = utility.splitDataset(dataset.values, NUM_TEST_DAYS, NUM_VAL_DAYS)

        # trainDates = dateTime[: -(NUM_TEST_DAYS*24)]
        # fullTrainDates = np.copy(trainDates)
        # trainDates, validationDates = trainDates[: -(NUM_VAL_DAYS*24)], trainDates[-(NUM_VAL_DAYS*24):]
        # testDates = dateTime[-(NUM_TEST_DAYS*24):]

        trainDates = dateTime[: -(NUM_TEST_DAYS*24)]
        fullTrainDates = np.copy(trainDates)
        trainDates, validationDates = trainDates[: -(NUM_VAL_DAYS*24)], trainDates[-(NUM_VAL_DAYS*24):]
        testDates = future.index

        past = past.iloc[:, SOURCE_COL: SOURCE_COL+NUM_FEATURES].values
        future = future.iloc[:, SOURCE_COL: SOURCE_COL+NUM_FEATURES].values

        print("past shape: ", past.shape) # (days x hour) x features
        print("future shape: ", future.shape) # (days x hour) x features

        for i in range(past.shape[0]):
            for j in range(past.shape[1]):
                if(np.isnan(past[i, j])):
                    past[i, j] = past[i-1, j]

        for i in range(future.shape[0]):
            for j in range(future.shape[1]):
                if(np.isnan(future[i, j])):
                    future[i, j] = future[i-1, j]

        featureList = dataset.columns.values[SOURCE_COL:SOURCE_COL+NUM_FEATURES]
        print("Features: ", featureList)

        data = []
        with open(f'../GhostPostCC/model/{source}_ann.pkl', 'rb') as f:
            while True:
                try:
                    data.append(pickle.load(f))
                except EOFError:
                    break
        
        model = data[0]
        ftMin = data[1]
        ftMax = data[2]

        print("Scaling data...")
        past, future = scale_dataset_trained(past, future, ftMin, ftMax)
        print("***** Data scaling done *****")
        print(past.shape, future.shape)

        history = past.tolist()
        predictedData = get_day_ahead_forecasts(IN_MODEL_NAME, history, TRAINING_WINDOW_HOURS, NUM_FEATURES, 0)
        actualData = manipulateTestDataShape(future[:, 0], MODEL_SLIDING_WINDOW_LEN, PREDICTION_WINDOW_HOURS, False)
        
        formattedTestDates = manipulateTestDataShape(testDates, MODEL_SLIDING_WINDOW_LEN, PREDICTION_WINDOW_HOURS, True)
        formattedTestDates = np.reshape(formattedTestDates, formattedTestDates.shape[0]*formattedTestDates.shape[1])
        actualData = actualData.astype(np.float64)

        print("ActualData shape: ", actualData.shape)
        actual = np.reshape(actualData, actualData.shape[0]*actualData.shape[1])
        print("actual.shape: ", actual.shape)
        unscaledTestData = utility.inverseDataScaling(actual, ftMax[0], ftMin[0])
        predictedData = predictedData.astype(np.float64)
        print("PredictedData shape: ", predictedData.shape)
        predicted = np.reshape(predictedData, predictedData.shape[0]*predictedData.shape[1])
        print("predicted.shape: ", predicted.shape)
        unScaledPredictedData = utility.inverseDataScaling(predicted, ftMax[0], ftMin[0])
        rmseScore, mapeScore = utility.getScores(actualData, predictedData, unscaledTestData, unScaledPredictedData)
        print("***** Forecast done *****")
        print("Overall RMSE score: ", rmseScore)
        print(rmseScore)

        data = []
        for i in range(len(unScaledPredictedData)):
                row = []
                row.append(str(formattedTestDates[i]))
                row.append(str(unscaledTestData[i]))
                row.append(str(unScaledPredictedData[i]))
                data.append(row)
    return data

def inference(fuel_sources, inference_timestamp):
    for source in fuel_sources:
        IN_FILE_NAME = "data/MW_electricity_cleaned.csv"
        IN_MODEL_NAME = 'model/' + source + "_ann.keras"

        NUM_FEATURES_DICT = {"coal": 6, "nat_gas": 6, "nuclear": 6, "oil": 6, "hydro": 6, "solar": 6,
                            "wind": 6, "others": 6}

        NUM_VAL_DAYS = 30
        NUM_TEST_DAYS = 184
        TRAINING_WINDOW_HOURS = 24
        PREDICTION_WINDOW_HOURS = 24
        MODEL_SLIDING_WINDOW_LEN = 24

        COAL = 1
        NAT_GAS = 2
        NUCLEAR = 3
        OIL = 4
        HYDRO = 5
        SOLAR = 6
        WIND = 7
        OTHERS = 8

        FUEL = {COAL: "coal", NAT_GAS: "nat_gas", NUCLEAR: "nuclear", OIL: "oil", HYDRO: "hydro", SOLAR: "solar", WIND: "wind", OTHERS: "others"}
        SOURCE_TO_SOURCE_COL_MAP = {y: x for x, y in FUEL.items()}

        SOURCE_COL = SOURCE_TO_SOURCE_COL_MAP[source]
        NUM_FEATURES = NUM_FEATURES_DICT[FUEL[SOURCE_COL]]

        print("initializing dataset...")
        
        dataset, dateTime = initDataset(IN_FILE_NAME, SOURCE_COL)
        nearest_lower_timestamp = max(filter(lambda x: x <= np.datetime64(inference_timestamp), dateTime))

        # Get data up to last_date and last 24 hours of data
        last_past_date = pd.to_datetime(nearest_lower_timestamp).strftime("%Y-%m-%d %H:%M:%S")
        past = dataset.loc[dataset.index <= last_past_date].tail(24)

        # Get data minimum last_date, max last_date + 24 hours of data
        last_future_date = (pd.to_datetime(nearest_lower_timestamp) + pd.Timedelta(hours=24)).strftime("%Y-%m-%d %H:%M:%S")
        future = dataset.loc[dataset.index <= last_future_date].tail(24)

        trainDates = dateTime[: -(NUM_TEST_DAYS*24)]
        fullTrainDates = np.copy(trainDates)
        trainDates, validationDates = trainDates[: -(NUM_VAL_DAYS*24)], trainDates[-(NUM_VAL_DAYS*24):]
        testDates = future.index

        past = past.iloc[:, SOURCE_COL: SOURCE_COL+NUM_FEATURES].values
        future = future.iloc[:, SOURCE_COL: SOURCE_COL+NUM_FEATURES].values

        print("past shape: ", past.shape) # (days x hour) x features
        print("future shape: ", future.shape) # (days x hour) x features

        for i in range(past.shape[0]):
            for j in range(past.shape[1]):
                if(np.isnan(past[i, j])):
                    past[i, j] = past[i-1, j]

        featureList = dataset.columns.values[SOURCE_COL:SOURCE_COL+NUM_FEATURES]
        print("Features: ", featureList)

        data = []
        with open(f'../GhostPostCC/model/{source}_ann.pkl', 'rb') as f:
            while True:
                try:
                    data.append(pickle.load(f))
                except EOFError:
                    break
        
        model = data[0]
        ftMin = data[1]
        ftMax = data[2]

        print("Scaling data...")
        past, future = scale_dataset_trained(past, future, ftMin, ftMax)
        print("***** Data scaling done *****")
        print(past.shape, future.shape)

        history = past.tolist()
        predictedData = get_day_ahead_forecasts(IN_MODEL_NAME, history, TRAINING_WINDOW_HOURS, NUM_FEATURES, 0)
        formattedTestDates = manipulateTestDataShape(testDates, MODEL_SLIDING_WINDOW_LEN, PREDICTION_WINDOW_HOURS, True)
        formattedTestDates = np.reshape(formattedTestDates, formattedTestDates.shape[0]*formattedTestDates.shape[1])

        predictedData = predictedData.astype(np.float64)
        print("PredictedData shape: ", predictedData.shape)
        predicted = np.reshape(predictedData, predictedData.shape[0]*predictedData.shape[1])
        print("predicted.shape: ", predicted.shape)
        unScaledPredictedData = utility.inverseDataScaling(predicted, ftMax[0], ftMin[0])

        data = []
        for i in range(len(unScaledPredictedData)):
            row = []
            row.append(str(formattedTestDates[i]))
            row.append(str(unScaledPredictedData[i]))
            data.append(row)
    return data

In [214]:
# dataset, dateTime = initDataset(IN_FILE_NAME, SOURCE_COL)
# nearest_lower_timestamp = max(filter(lambda x: x <= np.datetime64(inference_timestamp), dateTime))

# # if nearest_lower_timestamp != inference_timestamp:
# #     print(f"No data available for inference timestamp {inference_timestamp}")
# #     continue

# last_date = pd.to_datetime(nearest_lower_timestamp) + pd.Timedelta(hours=24)
# last_date = last_date.strftime("%Y-%m-%d %H:%M:%S")

# # Get data up to last_date and last 24 hours of data
# data_up_to_last_date = dataset.loc[dataset.index <= last_date].tail(24)
# inference_data = data_up_to_last_date.iloc[:, SOURCE_COL:SOURCE_COL+NUM_FEATURES].values
# dateTime = data_up_to_last_date.index

In [215]:
inference(['wind'], '2023-04-07 10:02:00')

initializing dataset...
                         Unnamed: 0  Net Generation (MW) from Coal  \
UTC Time at End of Hour                                              
2021-01-01 06:00:00               0                        25367.0   
2021-01-01 07:00:00               1                        24662.0   
2021-01-01 08:00:00               2                        25365.0   
2021-01-01 09:00:00               3                        24811.0   
2021-01-01 10:00:00               4                        25041.0   

                         Net Generation (MW) from Natural Gas  \
UTC Time at End of Hour                                         
2021-01-01 06:00:00                                   15999.0   
2021-01-01 07:00:00                                   16063.0   
2021-01-01 08:00:00                                   16057.0   
2021-01-01 09:00:00                                   15870.0   
2021-01-01 10:00:00                                   15755.0   

                         Net 

  dataset = pd.read_csv(inFileName, header=0, infer_datetime_format=True, parse_dates=['UTC Time at End of Hour'], index_col=['UTC Time at End of Hour'])


18738 7536
                         Unnamed: 0  Net Generation (MW) from Coal  \
UTC Time at End of Hour                                              
2021-01-01 06:00:00               0                        25367.0   
2021-01-01 07:00:00               1                        24662.0   
2021-01-01 08:00:00               2                        25365.0   
2021-01-01 09:00:00               3                        24811.0   
2021-01-01 10:00:00               4                        25041.0   

                         Net Generation (MW) from Natural Gas  \
UTC Time at End of Hour                                         
2021-01-01 06:00:00                                   15999.0   
2021-01-01 07:00:00                                   16063.0   
2021-01-01 08:00:00                                   16057.0   
2021-01-01 09:00:00                                   15870.0   
2021-01-01 10:00:00                                   15755.0   

                         Net Generation (M

Generating day-ahead forecasts...


ValueError: Exception encountered when calling Sequential.call().

[1mInput 0 of layer "dense_168" is incompatible with the layer: expected axis -1 of input shape to have value 312, but received input with shape (1, 144)[0m

Arguments received by Sequential.call():
  • inputs=tf.Tensor(shape=(1, 24, 6), dtype=float32)
  • training=False
  • mask=None

In [None]:
def get_day_ahead_forecasts(model_filepath, history, train_window_hours, num_features, dep_var_column):
    global MODEL_SLIDING_WINDOW_LEN
    global PREDICTION_WINDOW_HOURS
    
    # Load the trained ANN model
    model = load_model(model_filepath)

    # walk-forward validation over each day
    print("Generating day-ahead forecasts...")
    predictions = []
    for i in range(len(history) // 24):
        day_ahead_predictions = []
        temp_history = history.copy()
        current_day_hours = i * MODEL_SLIDING_WINDOW_LEN
        for j in range(0, PREDICTION_WINDOW_HOURS, 24):
            # Get forecasts for the next 24 hours
            yhat_sequence, new_training_data = get_forecasts(model, temp_history, train_window_hours, num_features)
            day_ahead_predictions.extend(yhat_sequence)
            # Update history for predicting the next day
            latest_history = history[current_day_hours + j : current_day_hours + j + 24].copy()
            for k in range(24):
                latest_history[k][dep_var_column] = yhat_sequence[k]
            temp_history = np.concatenate([temp_history, latest_history], axis=0)

        # Update history for predicting the next day
        history = np.concatenate([history, history[current_day_hours : current_day_hours + MODEL_SLIDING_WINDOW_LEN]], axis=0)
        predictions.append(day_ahead_predictions)

    # Convert predictions to numpy array
    predicted_data = np.array(predictions, dtype=np.float64)
    return predicted_data


In [175]:
data2 = []
with open('../GhostPostCC/model/coal_ann.pkl', 'rb') as f:
    while True:
        try:
            data2.append(pickle.load(f))
        except EOFError:
            break

In [176]:
data2

[<Sequential name=sequential_37, built=True>,
 [8137.0, -1.0, -1.0, -0.9999999357805159, -1.0, 0.0],
 [43316.0, 1.0, 1.0, 0.9999999357805159, 1.0, 1.0]]