In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from xgboost import XGBRegressor
from pykalman import KalmanFilter
import warnings
import optuna
from sklearn.metrics import mean_squared_error

warnings.filterwarnings("ignore")

file_path = r"D:\Downloads\Germany_Simplified_Data_Short.csv"
germany_data = pd.read_csv(file_path)
germany_data['Datetime'] = pd.to_datetime(germany_data['Datetime'], format='%d/%m/%Y %H:%M')
germany_data.set_index('Datetime', inplace=True)

def feature_engineering(df):
    df['lag_price_1'] = df['market_price'].shift(1)
    df['lag_price_2'] = df['market_price'].shift(2)
    df['7d_moving_avg'] = df['market_price'].rolling(window=7).mean()
    df['volatility'] = df['market_price'].rolling(window=7).std()
    return df.dropna()

data = pd.DataFrame({
    'market_price': germany_data['Price (EUR/MWHE)'],
    'temperature': germany_data['Temperature (Celcius)']
})
data = feature_engineering(data)

train_data, test_data = train_test_split(data, test_size=0.66, shuffle=False)

x_train, x_test = train_data.drop(['market_price'], axis=1), test_data.drop(['market_price'], axis=1)
y_train, y_test = train_data['market_price'], test_data['market_price']

def objective(trial):
    n_estimators = trial.suggest_int('n_estimators', 50, 200)  
    learning_rate = trial.suggest_loguniform('learning_rate', 0.01, 0.3)  
    max_depth = trial.suggest_int('max_depth', 3, 10)  
    min_child_weight = trial.suggest_int('min_child_weight', 1, 10)  
    subsample = trial.suggest_uniform('subsample', 0.6, 1.0)  
    colsample_bytree = trial.suggest_uniform('colsample_bytree', 0.6, 1.0)  
    gamma = trial.suggest_uniform('gamma', 0, 1)  
    model = XGBRegressor(n_estimators=n_estimators, learning_rate=learning_rate,
                         max_depth=max_depth, min_child_weight=min_child_weight, 
                         subsample=subsample, colsample_bytree=colsample_bytree, gamma=gamma)
    model.fit(x_train, y_train)
    y_pred = model.predict(x_test)
    mse = mean_squared_error(y_test, y_pred)
    
    return mse 

study = optuna.create_study(direction='minimize')

study.optimize(objective, n_trials=100)

best_params = study.best_params
best_score = study.best_value

print(f"Best hyperparameters found: {best_params}")
print(f"Best MSE (mean squared error): {best_score}")

best_model = XGBRegressor(**best_params)
best_model.fit(x_train, y_train)

xgb_predictions = best_model.predict(x_test)

mse = mean_squared_error(y_test, xgb_predictions)
rmse = mse**0.5
print(f"Mean Squared Error on test set: {mse}")
print(f"Root Mean Squared Error on test set: {rmse}")

predictions_df = pd.DataFrame({
    'Datetime': test_data.index,
    'Actual_Price': test_data['market_price'].values,
    'XGBoost_Predicted': xgb_predictions,
})
output_path = r"D:\Downloads\Prediction_Data_XGB_Optuna.csv"
predictions_df.to_csv(output_path, index=False)

print(f"Predictions saved to '{output_path}'")

def plot_predictions(actual, predictions, title):
    plt.figure(figsize=(12, 6))
    plt.plot(actual, label="Actual Price")
    plt.plot(predictions, label="Predicted Price", linestyle="--")
    plt.legend()
    plt.xlabel("Time")
    plt.ylabel("Price")
    plt.title(title)
    plt.grid()
    plt.show()

plot_predictions(
    test_data['market_price'].values,
    xgb_predictions,
    "XGBoost vs Actual Prices (Optuna Tuning)"
)

start_date = '2023-02-28 05:00:00'
end_date = '2023-03-03 05:00:00'
date_mask = (predictions_df['Datetime'] >= start_date) & (predictions_df['Datetime'] <= end_date)
filtered_df = predictions_df[date_mask]

filtered_df['Datetime'] = pd.to_datetime(filtered_df['Datetime'])

def plot_individual_prediction(df, actual_col, predicted_col, title):
    plt.figure(figsize=(12, 6))
    plt.plot(df['Datetime'], df[actual_col], label="Actual Price", color="blue")
    plt.plot(df['Datetime'], df[predicted_col], label=predicted_col, linestyle="--", color="orange")
    plt.legend()
    plt.xlabel("Datetime")
    plt.ylabel("Price")
    plt.title(title)
    plt.grid()
    plt.show()


plot_individual_prediction(filtered_df, 'Actual_Price', 'XGBoost_Predicted', "XGBoost vs Actual Prices (Optuna Tuning)")