In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from statsmodels.tsa.statespace.sarimax import SARIMAX
from xgboost import XGBRegressor
import warnings
import time
from scipy import stats

warnings.filterwarnings("ignore")

def feature_engineering(df):
    df['lag_price_1'] = df['market_price'].shift(1)
    df['lag_price_2'] = df['market_price'].shift(2)
    df['7d_moving_avg'] = df['market_price'].rolling(window=7).mean()
    df['volatility'] = df['market_price'].rolling(window=7).std()
    df['lag_temp_1'] = df['temperature'].shift(1)
    df['lag_temp_2'] = df['temperature'].shift(2)
    df['7d_moving_temp_avg'] = df['temperature'].rolling(window=7).mean()
    return df.dropna()

# Load and preprocess data
train_data = pd.read_csv(r"D:\Downloads\Germany_Volatile_Train_Data_Small.csv")
train_data.index = pd.to_datetime(train_data['Datetime'], format='%Y-%m-%d %H:%M:%S')
test_data = pd.read_csv(r"D:\Downloads\Germany_Volatile_Test_Data.csv")
test_data.index = pd.to_datetime(test_data['Datetime'], format='%Y-%m-%d %H:%M:%S')

# Create data with required columns and apply feature engineering
train_data = pd.DataFrame({
    'market_price': train_data['Price (EUR/MWHE)'],
    'temperature': train_data['Temperature (Celcius)']
})
train_data = feature_engineering(train_data)

test_data = pd.DataFrame({
    'market_price': test_data['Price (EUR/MWHE)'],
    'temperature': test_data['Temperature (Celcius)']
})
test_data = feature_engineering(test_data)

# SARIMAX Grid Search
start_time = time.time()
def sarimax_grid_search(df, p_vals, d_vals, q_vals, seasonal_order):
    best_aic = np.inf
    best_params = None
    best_model_fit = None

    for p in p_vals:
        for d in d_vals:
            for q in q_vals:
                try:
                    model = SARIMAX(df['market_price'], exog=df[['temperature']], order=(p, d, q), seasonal_order=seasonal_order)
                    model_fit = model.fit(disp=False)
                    if model_fit.aic < best_aic:
                        best_aic = model_fit.aic
                        best_params = (p, d, q)
                        best_model_fit = model_fit
                except:
                    continue
    return best_model_fit, best_params
h
best_model, best_order = sarimax_grid_search(
    train_data,
    p_vals=[0, 1, 2],
    d_vals=[0, 1],
    q_vals=[0, 1, 2],
    seasonal_order=(1, 1, 1, 24)
)

end_time = time.time()
elapsed_time = end_time - start_time

start_time_2 = time.time()
sarimax_predictions = best_model.forecast(steps=len(test_data), exog=test_data[['temperature']])
end_time_2 = time.time()
elapsed_time_2 = end_time_2 - start_time_2

mape = np.mean(np.abs((test_data['market_price'] - sarimax_predictions) / test_data['market_price'])) * 100
mae = np.mean(np.abs(test_data['market_price'] - sarimax_predictions))
correlation_coefficient, p_value = stats.pearsonr(filtered_test_data['market_price'], filtered_predictions)

print(f"Best SARIMAX Order: {best_order}")
print(f"MAPE: {mape:.6f}%")
print(f"MAE: {mae:.6f}")
print(f"R Value: {correlation_coefficient:.6f}")
print(f"Elapsed Time for training: {elapsed_time:.6f} seconds")
print(f"Elapsed Time for predictions: {elapsed_time_2:.6f} seconds")

plt.figure(figsize=(12, 6))
plt.plot(test_data.index, test_data['market_price'], label='Actual Price', color='blue')
plt.plot(test_data.index, sarimax_predictions, label='Predicted Price', color='orange', linestyle='--')
plt.legend()
plt.xlabel('Datetime')
plt.ylabel('Price (EUR/MWHE)')
plt.title('SAR+MAX Predicted vs. Actual Prices')
plt.grid()
plt.show()