In [2]:

#Install git
!apt-get install git

#Clone Git Repo
#Insert Code from ReamMe here

#Commented out IPython magic to ensure Python compatibility.
import os
if os.getcwd() != "/content/ElectricityGermany":
     %cd /content/ElectricityGermany
!git pull



import os
import math
import uuid

import pandas as pd
import numpy as np

import matplotlib.pyplot as plt

from pandas import DataFrame, concat
from datetime import datetime

from keras.models import Sequential
from keras.layers import LSTM, Dropout, Dense

from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

#Session management variables
full_session_id = str(uuid.uuid4())  # Full Session ID
short_session_id = full_session_id.split('-')[0]  # Shortened Session ID
file_save_counter = 0

#Session timing
start_time_stamp = datetime.now().strftime("%Y_%m_%d_%H_%M_%S")  # Start time of the session
end_time_stamp = datetime.now().strftime("%Y_%m_%d_%H_%M_%S")  # End time of the session (initialized just in case)

#Load data
data_directory = '/content/ElectricityGermany/data'
data_file_name = 'day_ahead_price_today_temp_germany_1.csv'
data_file_path = os.path.join(data_directory, data_file_name)
dataframe = pd.read_csv(data_file_path)
print(dataframe.head())

#Set the date as the index
dataframe['date'] = pd.to_datetime(dataframe['date'])
dataframe = dataframe.set_index('date')

#Creating a new directory for the session
session_directory = f"/content/ElectricityGermany/session_reports/lstm_model_info_{start_time_stamp}_{short_session_id}"
if not os.path.exists(session_directory):
    os.makedirs(session_directory)
print(f"Directory '{session_directory}' created successfully")

#Plot the data
data_values = dataframe.values
data_groups = [0, 1]
plot_titles = ["Price [€/MWh]", "Temperature[°C]"]
plt.figure(figsize=(15, 10))

for idx, group in enumerate(data_groups):
    plt.subplot(len(data_groups), 1, idx + 1)
    plt.plot(dataframe.index, data_values[:, group])
    plt.title(plot_titles[group], y=0.85, loc="left", pad=20, color='orange')
    plt.grid()
    plt.xlabel('Date')
    plt.ylabel('Value')

plt.gcf()
file_save_counter += 1
plt.savefig(f"{session_directory}/{file_save_counter}_Price_and_Temperature_{short_session_id}.png")
plt.show()
plt.close()


#Function to transform series to supervised learning format
def transform_series_to_supervised(data, input_lags=1, output_horizon=1, remove_nan=True):
    num_vars = 1 if type(data) is list else data.shape[1]
    df = DataFrame(data)
    cols, names = list(), list()
    # Input sequence (t-n, ... t-1)
    for i in range(input_lags, 0, -1):
        cols.append(df.shift(i))
        names += [('var%d(t-%d)' % (j+1, i)) for j in range(num_vars)]
    # Forecast sequence (t, t+1, ... t+n)
    for i in range(0, output_horizon):
        cols.append(df.shift(-i))
        if i == 0:
            names += [('var%d(t)' % (j+1)) for j in range(num_vars)]
        else:
            names += [('var%d(t+%d)' % (j+1, i)) for j in range(num_vars)]
    #Combine
    aggregated = concat(cols, axis=1)
    aggregated.columns = names
    #Remove rows with NaN values
    if remove_nan:
        aggregated.dropna(inplace=True)
    return aggregated


#Ensure all data is float
data_values = data_values.astype('float32')

#Convert date strings to datetime
train_start_date = pd.to_datetime('2022-01-01T00:00+01:00')
train_end_date = pd.to_datetime('2023-01-01T00:00+01:00')
test_start_date = pd.to_datetime('2023-01-01T01:00+01:00')

#Split into train and test sets
train_set = dataframe[train_start_date:train_end_date]
test_set = dataframe[test_start_date:]

#Normalize the features after splitting
scaler = MinMaxScaler(feature_range=(0, 1))
train_scaled = scaler.fit_transform(train_set)
test_scaled = scaler.transform(test_set)

#Frame as supervised learning
train_supervised = transform_series_to_supervised(train_scaled, 50, 1)  # IMPORTANT: 50 time steps
test_supervised = transform_series_to_supervised(test_scaled, 50, 1)  # IMPORTANT: 50 time steps

#Drop columns we don't want to predict
train_supervised.drop(train_supervised.columns[[3]], axis=1, inplace=True)
test_supervised.drop(test_supervised.columns[[3]], axis=1, inplace=True)
print(train_supervised.head())  # Debugging
print(test_supervised.head())  # Debugging

#Split into input and outputs
train_data_values = train_supervised.values
test_data_values = test_supervised.values
train_X, train_y = train_data_values[:, :-1], train_data_values[:, -1]
test_X, test_y = test_data_values[:, :-1], test_data_values[:, -1]

#Reshape input to be 3D [samples, timesteps, features]
train_X = train_X.reshape((train_X.shape[0], 50, train_X.shape[1] // 50))  # IMPORTANT: ADJUST RESHAPE TO 50 TIME STEPS
test_X = test_X.reshape((test_X.shape[0], 50, test_X.shape[1] // 50))  # IMPORTANT: ADJUST RESHAPE TO 50 TIME STEPS
print(train_X.shape, train_y.shape, test_X.shape, test_y.shape)

#Design the model
lstm_model = Sequential()
lstm_model.add(LSTM(50, input_shape=(train_X.shape[1], train_X.shape[2])))
lstm_model.add(Dense(1))
lstm_model.compile(loss='mae', optimizer='adam')

#Fit
training_history = lstm_model.fit(train_X, train_y, epochs=50, batch_size=32, validation_data=(test_X, test_y), verbose=2, shuffle=False)

#Training history
plt.plot(training_history.history['loss'], label='train')
plt.plot(training_history.history['val_loss'], label='test')
plt.legend()
plt.grid()

plt.gcf()
file_save_counter += 1
plt.savefig(f"{session_directory}/{file_save_counter}_History_loss_val_loss_{short_session_id}.png")
plt.show()
plt.close()

#Make a prediction
predicted_y = lstm_model.predict(test_X)
#Correctly reshape test_X (frequent execution errors before 202405)
test_X = test_X.reshape((test_X.shape[0], test_X.shape[1] * test_X.shape[2])) # Correctly reshape test_X

#Invert scaling for forecast (frequent execution errors before 202405)
inv_predicted_y = np.concatenate((predicted_y[:, 0:1], test_X[:, 1:2]), axis=1)  # Select the first column of predicted_y
inv_predicted_y = scaler.inverse_transform(inv_predicted_y)
inv_predicted_y = inv_predicted_y[:, 0]

#Invert scaling for actual values (frequent execution errors before 202405)
test_y = test_y.reshape((len(test_y), 1))
inv_actual_y = np.concatenate((test_y, test_X[:, 1:2]), axis=1)
inv_actual_y = scaler.inverse_transform(inv_actual_y)
inv_actual_y = inv_actual_y[:, 0]


end_time_stamp = datetime.now().strftime("%Y_%m_%d_%H_%M_%S")  #End time of the session

#MAE
mae_value = mean_absolute_error(inv_actual_y, inv_predicted_y)
print(f'Mean Absolute Error (MAE): {mae_value}')

#RMSE
rmse_value = np.sqrt(mean_squared_error(inv_actual_y, inv_predicted_y))
print(f'Root Mean Squared Error (RMSE): {rmse_value}')

#Filter non-zero values
non_zero_actual_y = inv_actual_y != 0
non_zero_actual_y_values = inv_actual_y[non_zero_actual_y]
non_zero_predicted_y = inv_predicted_y[non_zero_actual_y]

#Calculate MAPE and MAAPE
mape_value = np.mean(np.abs((non_zero_actual_y_values - non_zero_predicted_y) / non_zero_actual_y_values)) * 100
print(f'Mean Absolute Percentage Error (MAPE): {mape_value}')

maape_value = np.mean(np.arctan(np.abs((non_zero_actual_y_values - non_zero_predicted_y) / non_zero_actual_y_values)))
print(f'Mean Arctangent Absolute Percentage Error: {maape_value}')

#R-squared with sklearn's r2_score function
r2_value = r2_score(inv_actual_y, inv_predicted_y)
print(f"R²: {np.round(r2_value, 5)}")


#Session logs
file_save_counter += 1
session_log_content = f"""
Session ID: {short_session_id}
Start timestamp: {start_time_stamp}
End timestamp: {end_time_stamp}

Start Date of Training: {train_start_date}
End Date of Training: {train_end_date}
Start Date of Testing: {test_start_date}
End Date of Testing: 2023-12-31T23:00+01:00

Mean Absolute Error (MAE): {mae_value}
Mean Absolute Percentage Error (MAPE): {mape_value}
Root Mean Squared Error (RMSE): {rmse_value}
Mean Arctangent Absolute Percentage Error (MAAPE): {maape_value}
R-squared (R²) value: {r2_value}

Number of files saved in directory: {file_save_counter}
"""

print(session_log_content)  # Debugging

log_file_name = f"{file_save_counter}_lstm_logs_{end_time_stamp}_{short_session_id}.txt"
log_file_path = os.path.join(session_directory, log_file_name)
print(log_file_path)
with open(log_file_path, "w") as log_file:
    log_file.write(session_log_content)

file_save_counter += 1

# onvert index to datetime
dataframe.index = pd.to_datetime(dataframe.index, utc=True)

#DataFrame for the actual and predicted values for easy plotting
results_dataframe = pd.DataFrame({
    'Date': dataframe.index[-len(inv_actual_y):],
    'Real': inv_actual_y,
    'Forecast': inv_predicted_y
})

#Plot the results
plt.figure(figsize=(15, 8))
plt.plot(results_dataframe['Date'], results_dataframe['Real'], color='blue', label='Real')
plt.plot(results_dataframe['Date'], results_dataframe['Forecast'], color='red', label='Forecast')
plt.title('Day Ahead Auction Price (EUR/MWh)')
plt.xlabel('Date')
plt.ylabel('Price [€/MWh]')
plt.legend()
plt.grid()

plt.gcf()
plt.savefig(f"{session_directory}/{file_save_counter}_Results_Real_Forecast_{short_session_id}.png")
plt.show()
plt.close()

#Source directory
source_directory = session_directory

#Push User Configs
user_email_git = userdata.get('user_email')
user_name_git = userdata.get('user_name')

!git config --global user.email "{user_email_git}"
!git config --global user.name "{user_name_git}"

#Add file to git
!git add "{source_directory}"

#Commit message
!git commit -m "{end_time_stamp}_sess_ID_{short_session_id}"

#Push To Git
#Insert code from ReadMe here

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
git is already the newest version (1:2.34.1-1ubuntu1.11).
0 upgraded, 0 newly installed, 0 to remove and 0 not upgraded.
[Errno 2] No such file or directory: '/content/ElectricityGermany'
/content
fatal: not a git repository (or any of the parent directories): .git


KeyboardInterrupt: 