# XGBoost Energy Price Prediction (Time Series with Lag Features)
This notebook demonstrates how to use XGBoost for time series forecasting of energy prices using dummy data and lag features (1, 2, 3, and 24 hours).

In [None]:
import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt

In [None]:
# 1. Create dummy historical data for 7 days (168 hours)
np.random.seed(42)
hours = pd.date_range(start='2024-01-01', periods=168, freq='H')

df = pd.DataFrame({
    'datetime': hours,
    'hour': hours.hour,
    'temp': 10 + 10*np.sin(np.linspace(0, 3*np.pi, 168)) + np.random.randn(168),
    'load': 100 + 20*np.cos(np.linspace(0, 4*np.pi, 168)) + np.random.randn(168)*5,
})

# Simulated price (dependent on temp and load)
df['price'] = 50 + 0.5 * df['temp'] + 0.8 * df['load'] + np.random.randn(168)*2

In [None]:
# 2. Create lag features
for lag in [1, 2, 3, 24]:
    df[f'price_lag{lag}'] = df['price'].shift(lag)
    df[f'load_lag{lag}'] = df['load'].shift(lag)
    df[f'temp_lag{lag}'] = df['temp'].shift(lag)

# Drop NA rows created by shifting
df.dropna(inplace=True)

In [None]:
# 3. Train/Test split — last 24 hours = test
train_df = df[:-24]
test_df = df[-24:]

In [None]:
# 4. Features and target
features = [col for col in df.columns if 'lag' in col or col == 'hour']
target = 'price'

X_train = train_df[features]
y_train = train_df[target]
X_test = test_df[features]
y_test = test_df[target]

In [None]:
# 5. Train XGBoost
model = xgb.XGBRegressor(n_estimators=100, learning_rate=0.1, max_depth=4)
model.fit(X_train, y_train)

In [None]:
# 6. Predict next 24 hours
y_pred = model.predict(X_test)

In [None]:
# 7. Evaluate
rmse = mean_squared_error(y_test, y_pred, squared=False)
print(f"24-hour RMSE: {rmse:.2f}")

In [None]:
# 8. Plot
plt.figure(figsize=(10, 4))
plt.plot(test_df['datetime'], y_test, label='Actual Price')
plt.plot(test_df['datetime'], y_pred, label='Predicted Price')
plt.legend()
plt.title('Energy Price Prediction - Next 24 Hours (with 4 Lags)')
plt.xlabel('Datetime')
plt.ylabel('Price')
plt.grid(True)
plt.show()