# ARIMA Model

This notebook implements the ARIMA model for time series forecasting of household electricity consumption.

In [None]:

import pandas as pd
import numpy as np
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt

data = pd.read_csv('../data/preprocessed_household_power_consumption.csv', index_col='datetime', parse_dates=True)

train_data = data['Global_active_power'].iloc[:-30]
test_data = data['Global_active_power'].iloc[-30:]

arima_model = ARIMA(train_data, order=(5, 1, 0))
arima_result = arima_model.fit()

arima_forecast = arima_result.forecast(steps=30)

plt.figure(figsize=(14, 7))
plt.plot(train_data.index, train_data, label='Train')
plt.plot(test_data.index, test_data, label='Test')
plt.plot(test_data.index, arima_forecast, label='ARIMA Forecast')
plt.xlabel('Datetime')
plt.ylabel('Global Active Power')
plt.title('ARIMA Model Forecast')
plt.legend()
plt.show()

arima_rmse = np.sqrt(mean_squared_error(test_data, arima_forecast))
print(f'ARIMA RMSE: {arima_rmse}')