In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from pykalman import KalmanFilter
from sklearn.metrics import mean_squared_error
import warnings
import optuna

warnings.filterwarnings("ignore")

file_path = r"D:\Downloads\Germany_Simplified_Data_Short.csv"
germany_data = pd.read_csv(file_path)
germany_data['Datetime'] = pd.to_datetime(germany_data['Datetime'], format='%d/%m/%Y %H:%M')
germany_data.set_index('Datetime', inplace=True)

def feature_engineering(df):
    df['lag_price_1'] = df['market_price'].shift(1)
    df['lag_price_2'] = df['market_price'].shift(2)
    df['7d_moving_avg'] = df['market_price'].rolling(window=7).mean()
    df['volatility'] = df['market_price'].rolling(window=7).std()
    return df.dropna()

data = pd.DataFrame({
    'market_price': germany_data['Price (EUR/MWHE)'],
    'temperature': germany_data['Temperature (Celcius)']
})
data = feature_engineering(data)

train_data, test_data = train_test_split(data, test_size=0.66, shuffle=False)

def objective(trial):
    transition_matrix = trial.suggest_uniform('transition_matrix', 0.5, 1.5) 
    observation_matrix = trial.suggest_uniform('observation_matrix', 0.5, 1.5)  
    n_iter = trial.suggest_int('n_iter', 5, 20)

    kf = KalmanFilter(transition_matrices=[transition_matrix], observation_matrices=[observation_matrix])
    kf = kf.em(train_data['market_price'], n_iter=n_iter)
    (filtered_state_means, _) = kf.filter(test_data['market_price'])

    mse = mean_squared_error(test_data['market_price'], filtered_state_means.flatten())
    return mse

study = optuna.create_study(direction='minimize') 
study.optimize(objective, n_trials=50)  

best_params = study.best_params
best_mse = study.best_value
print(f"Best hyperparameters: {best_params}")
print(f"Best Mean Squared Error: {best_mse}")

best_kf = KalmanFilter(transition_matrices=[best_params['transition_matrix']], 
                       observation_matrices=[best_params['observation_matrix']])
best_kf = best_kf.em(train_data['market_price'], n_iter=best_params['n_iter'])
(filtered_state_means, _) = best_kf.filter(test_data['market_price'])


predictions_df = pd.DataFrame({
    'Datetime': test_data.index,
    'Actual_Price': test_data['market_price'].values,
    'Kalman_Predicted': filtered_state_means.flatten(),
})
output_path = r"D:\Downloads\All_Prediction_Models.csv"
predictions_df.to_csv(output_path, index=False)

print(f"Predictions saved to '{output_path}'")

def plot_predictions(actual, predictions, title):
    plt.figure(figsize=(12, 6))
    plt.plot(actual, label="Actual Price")
    for label, pred in predictions.items():
        plt.plot(pred, label=label)
    plt.legend()
    plt.xlabel("Time")
    plt.ylabel("Price")
    plt.title(title)
    plt.grid()
    plt.show()

plot_predictions(
    test_data['market_price'].values,
    {
        "Kalman (Optimized)": filtered_state_means.flatten(),
    },
    "Comparison of Prediction Models (Optimized Kalman)"
)


start_date = '09/09/2023 00:00'
end_date = '13/09/2023 00:00'
date_mask = (predictions_df['Datetime'] >= start_date) & (predictions_df['Datetime'] <= end_date)
filtered_df = predictions_df[date_mask]

filtered_df['Datetime'] = pd.to_datetime(filtered_df['Datetime'])

def plot_individual_prediction(df, actual_col, predicted_col, title):
    plt.figure(figsize=(12, 6))
    plt.plot(df['Datetime'], df[actual_col], label="Actual Price", color="blue")
    plt.plot(df['Datetime'], df[predicted_col], label=predicted_col, linestyle="--", color="orange")
    plt.legend()
    plt.xlabel("Datetime")
    plt.ylabel("Price")
    plt.title(title)
    plt.grid()
    plt.show()

plot_individual_prediction(filtered_df, 'Actual_Price', 'Kalman_Predicted', "Optimized Kalman Filter vs Actual Prices")