# SARIMA Model

This notebook implements the SARIMA model for time series forecasting of household electricity consumption.

In [None]:

import pandas as pd
import numpy as np
from statsmodels.tsa.statespace.sarimax import SARIMAX
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt

data = pd.read_csv('../data/preprocessed_household_power_consumption.csv', index_col='datetime', parse_dates=True)

train_data = data['Global_active_power'].iloc[:-30]
test_data = data['Global_active_power'].iloc[-30:]

sarima_model = SARIMAX(train_data, order=(1, 1, 1), seasonal_order=(1, 1, 1, 12))
sarima_result = sarima_model.fit()

sarima_forecast = sarima_result.forecast(steps=30)

plt.figure(figsize=(14, 7))
plt.plot(train_data.index, train_data, label='Train')
plt.plot(test_data.index, test_data, label='Test')
plt.plot(test_data.index, sarima_forecast, label='SARIMA Forecast')
plt.xlabel('Datetime')
plt.ylabel('Global Active Power')
plt.title('SARIMA Model Forecast')
plt.legend()
plt.show()

sarima_rmse = np.sqrt(mean_squared_error(test_data, sarima_forecast))
print(f'SARIMA RMSE: {sarima_rmse}')