In [1]:
# Import python libraries
#
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow.keras as keras
import importlib
from tqdm import tqdm
import pickle
import plotly.graph_objects as go
from joblib import Parallel, delayed
from IPython.display import display
from sklearn.metrics import mean_squared_error
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import mean_squared_error

# Imports own modules.
# All imports are done relative to the root of the project.
#
import sys
sys.path.append('../../')
import controller.model.Model as model
import models.weather_data as weather_data
import models.demandprofiles_readout as demandprofiles
import controller.model.LstmAdapter as LstmAdapter
import controller.model.visualization as visualization
import models.standardprofiles_readout as standardprofiles





In [75]:
importlib.reload(weather_data)
importlib.reload(demandprofiles)
importlib.reload(LstmAdapter)
importlib.reload(standardprofiles)

def get_lstm_data(seed=None, shuffle_data=True):

    # Readout the weather data
    #
    weather_measurements = weather_data.WeatherMeasurements()
    weatherData = weather_measurements.get_data()

    # Readout the aggregated characteristic power profiles and bring it to 
    # the format needed by the model
    #
    profiles = demandprofiles.DemandProfiles_Readout()
    powerProfile = profiles.get_aggregated_powerprofile()
    lstmAdapter = LstmAdapter.LstmAdapter(train_size = 265, dev_size = 0, add_tda_features=True, addAllFeatures=True, shuffle_data=shuffle_data, seed=seed)
    X_model, Y_model = lstmAdapter.transformData(powerProfile, weatherData)

    return X_model, Y_model, lstmAdapter

X, Y, lstmAdapter = get_lstm_data()


In [22]:
# Benchmark: Persistent Forecast
# Calculate the MSE for each powerprofile compared to the powerprofile x days ago
#

from sklearn.metrics import mean_squared_error

def calculate_mse(dataset_type):
    result = []
    for lag in range(0, 9):
        mse_values = []
        for i in range(Y[dataset_type].shape[0]):
            current_profile = Y[dataset_type][i, None, :, 0]
            past_day = lstmAdapter.getUnshuffeledIndex(dataset_type, i) - lag
            if past_day >= 0:
                past_profile = Y['all'][past_day, None, :, 0]
                mse_values.append(mean_squared_error(current_profile, past_profile))
        result.append("{:.5f}".format(np.mean(mse_values)))
    return result
print(f"mse_train:", calculate_mse('train'))
print(f"mse_test: ", calculate_mse('test'))


mse_train: ['0.00000', '0.28567', '0.48064', '0.58146', '0.62128', '0.57084', '0.46750', '0.32497', '0.51064']
mse_test:  ['0.00000', '0.25499', '0.41372', '0.46414', '0.46026', '0.50043', '0.34296', '0.21146', '0.37060']


In [124]:
# Benchmark: Persistent Forecast with average of the last x*7 days
#

mse_values = []
# lag = np.array([-14,-7,7,14,21,])
lag = np.array([7,14,21])
for i in range(Y['all'].shape[0] - 14):
    current_profile = Y['all'][i, None, :, 0]
    reference_days = i - lag
    if np.all(reference_days >= 0):#  and np.all(reference_days < 362) :
        past_profiles = Y['all'][reference_days, None, :, 0]
        mean_past_profile = np.mean(past_profiles, axis=0)
        mse_values.append(mean_squared_error(current_profile, mean_past_profile))
mse = np.mean(mse_values)
print(f"MSE: {mse:.5f}")


MSE: 0.08611


In [4]:
# Benchmark: KNN with univariate output
#

# Split the data into training and testing sets
X_train = X['train'][:, 24:, :19].reshape(-1, 19)
X_test = X['test'][:, 24:, :19].reshape(-1, 19)
Y_train = Y['train'][:, :, 0].reshape(-1)
Y_test = Y['test'][:, :, 0].reshape(-1)

# Create and fit the KNeighborsRegressor model
knn_model = KNeighborsRegressor(n_neighbors=5)
knn_model.fit(X_train, Y_train)

# Predict
print("Test MSE:", mean_squared_error(Y_test, knn_model.predict(X_test)))


Test MSE: 0.22226952728044866


In [31]:
# Benchmark: KNN with multivariate output
#

for seed in range(10):
    X, Y, lstmAdapter = get_lstm_data()

    # Split the data into training and testing sets
    X_train_knn = X['train'][:, :, :].reshape(X['train'].shape[0], -1)
    X_test_knn = X['test'][:, :, :].reshape(X['test'].shape[0], -1)
    Y_train_knn = Y['train'][:, :, 0]
    Y_test_knn = Y['test'][:, :, 0]

    # Create and fit the KNeighborsRegressor model
    knn_model = KNeighborsRegressor(n_neighbors=5)
    knn_model.fit(X_train_knn, Y_train_knn)

    # Predict
    predicted_profile = knn_model.predict(X_test_knn)
    print("Seed = " + str(seed) + ". Test MSE:", mean_squared_error(Y_test_knn, predicted_profile))



Seed = 0. Test MSE: 0.08349037557757634
Seed = 1. Test MSE: 0.11711166816201617
Seed = 2. Test MSE: 0.10690521010029959
Seed = 3. Test MSE: 0.12938869724691782
Seed = 4. Test MSE: 0.08798795339434383
Seed = 5. Test MSE: 0.08525720049051837
Seed = 6. Test MSE: 0.09285150165929151
Seed = 7. Test MSE: 0.08483456240676424
Seed = 8. Test MSE: 0.08137954639865293
Seed = 9. Test MSE: 0.09042679178240681


In [131]:
# Benchmark: KNN with multivariate output and moving horizone
#

mse_over_days = []

for day in range(7, 363):

    # Split the data into training and testing sets
    X_train_knn = define_inputs(X['all'][:day, :, :], left_out_features)
    X_test_knn = define_inputs(X['all'][day, np.newaxis, :, :], left_out_features)
    Y_train_knn = Y['all'][:day, :, 0]
    Y_test_knn = Y['all'][day, np.newaxis, :, 0]

    # Create and fit the KNeighborsRegressor model
    knn_model = KNeighborsRegressor(n_neighbors=5)
    knn_model.fit(X_train_knn, Y_train_knn)

    # Predict
    predicted_profile = knn_model.predict(X_test_knn)
    mse = mean_squared_error(Y_test_knn, predicted_profile)
    mse_over_days.append(mse)

fig = go.Figure()
fig.add_trace(go.Scatter(y=mse_over_days))
fig.show()

mse = np.mean(mse_values)
print(f"MSE: {mse:.5f}")


MSE: 0.08611


In [121]:

test_day = 357
day = test_day

# Split the data into training and testing sets
X_train_knn = X['all'][:day, :, :].reshape(day, -1)
X_test_knn = X['all'][day, np.newaxis, :, :].reshape(1, -1)
Y_train_knn = Y['all'][:day, :, 0]
Y_test_knn = Y['all'][day, np.newaxis, :, 0]

# Create and fit the KNeighborsRegressor model
k = 5
knn_model = KNeighborsRegressor(n_neighbors=k)
knn_model.fit(X_train_knn, Y_train_knn)

# Predict
predicted_profile = knn_model.predict(X_test_knn)
mse = mean_squared_error(Y_test_knn, predicted_profile)
print(mse)

distances, neighbours = knn_model.kneighbors(X_test_knn[: , :], n_neighbors=k, return_distance=True)
arr = X_train_knn.reshape(X_train_knn.shape[0], -1)
print(test_day, neighbours, (test_day-neighbours)%7, distances)


9.674723759414766
357 [[ 14  34  30  43 281]] [[0 1 5 6 6]] [[68.39265025 68.43306291 68.49110992 69.5570548  69.70241328]]


In [125]:

all_features = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 
                'Sunday or Public Holidy', 'hour_sin', 'hour_cos', 'day_of_year_sin', 
                'day_of_year_cos', 'weather_temp', 
                'pers_entropy_dim0', 'pers_entropy_dim1', 'pers_entropy_dim2', 
                'weather_prcp', 'weather_wspd', 'weather_tsun', 'prev_profile']

left_out_features = ['hour_sin', 'hour_cos',
                    'prev_profile', 
                    'weather_temp', 'weather_prcp', 'weather_wspd', 'weather_tsun',
                    'pers_entropy_dim0', 'pers_entropy_dim1', 'pers_entropy_dim2' 
                    ]


In [48]:
# Benchmark: KNN with multivariate output and missing features
#

def define_inputs(X, left_out_features):
    indices = []
    for feature in left_out_features:
        index = np.argwhere(np.array(all_features) == feature)
        indices.append(index)
    X = np.delete(X, indices, axis=2)
    # X = np.unique(X, axis=1)
    # X = X[:, [0, 12 24 36], :]
    X = X.reshape(X.shape[0], -1)
    return X

for seed in range(10):
    X, Y, lstmAdapter = get_lstm_data(seed, shuffle_data=False)

    # Split the data into training and testing sets
    X_train_knn = define_inputs(X['train'][:, :, :], left_out_features)
    X_test_knn = define_inputs(X['test'][:, :, :], left_out_features)
    Y_train_knn = Y['train'][:, :, 0]
    Y_test_knn = Y['test'][:, :, 0]

    # Create and fit the KNeighborsRegressor model
    knn_model = KNeighborsRegressor(n_neighbors=5)
    knn_model.fit(X_train_knn, Y_train_knn)

    # Predict
    predicted_profile = knn_model.predict(X_test_knn)
    print("Seed = " + str(seed) + ". Test MSE:", mean_squared_error(Y_test_knn, predicted_profile))



Seed = 0. Test MSE: 0.5101952735891097
Seed = 1. Test MSE: 0.5101952735891097
Seed = 2. Test MSE: 0.5101952735891097
Seed = 3. Test MSE: 0.5101952735891097
Seed = 4. Test MSE: 0.5101952735891097
Seed = 5. Test MSE: 0.5101952735891097
Seed = 6. Test MSE: 0.5101952735891097
Seed = 7. Test MSE: 0.5101952735891097
Seed = 8. Test MSE: 0.5101952735891097
Seed = 9. Test MSE: 0.5101952735891097


In [84]:

# X, Y, lstmAdapter = get_lstm_data()

# Split the data into training and testing sets
X_train_knn = define_inputs(X['train'][:, :, :], left_out_features)
X_test_knn = define_inputs(X['test'][:, :, :], left_out_features)
Y_train_knn = Y['train'][:, :, 0]
Y_test_knn = Y['test'][:, :, 0]

# Create and fit the KNeighborsRegressor model
k=5
knn_model = KNeighborsRegressor(n_neighbors=k)
knn_model.fit(X_train_knn, Y_train_knn)

# Predict
predicted_profile = knn_model.predict(X_test_knn)
print("Test MSE:", mean_squared_error(Y_test_knn, predicted_profile))

index = 0
distances, neighbours_shuffled = knn_model.kneighbors(X_test_knn[index, None, :], n_neighbors=k, return_distance=True)
print(neighbours_shuffled)
arr = X_train_knn.reshape(X_train_knn.shape[0], -1)
test_day = lstmAdapter.getUnshuffeledIndex('test', index)
neighbour = lstmAdapter.getUnshuffeledIndex('train', neighbours_shuffled[0])
print(test_day, neighbour, (test_day-neighbour)%7, distances)


Test MSE: 0.12038752205038299
[[ 99  71 186 154 178]]
155 [162 148 141 176 134] [0 0 0 0 0] [[1.1788667  1.18088913 2.35993933 3.51545896 3.53321463]]
