In [28]:
import pandas as pd
from statsmodels.tsa.statespace.sarimax import SARIMAX

# Load data
df = pd.read_csv("energy.csv", parse_dates=["timestamp"], index_col="timestamp")

# Ensure hourly frequency and fill missing data
df = df.asfreq('h').ffill()

# Select the target variable
series = df["load"]

# Optionally reduce data size for faster testing
series = series[-200:]  # Last 200 data points

# Define forecast horizon and split
HORIZON = 3
train = series[:-HORIZON]
test = series[-HORIZON:]

# Configure and fit SARIMAX model
order = (1, 1, 1)
seasonal_order = (0, 1, 1, 24)

model = SARIMAX(train, order=order, seasonal_order=seasonal_order)
results = model.fit(disp=False)  # disp=False to reduce console spam

# Forecast
forecast = results.forecast(steps=HORIZON)

# Output results
print("Forecast:")
print(forecast)

Forecast:
2014-12-31 21:00:00    3615.858345
2014-12-31 22:00:00    3310.050372
2014-12-31 23:00:00    3063.156375
Freq: h, Name: predicted_mean, dtype: float64
