In [None]:
!pip install --upgrade pip
!pip install pystan~=2.19.1.1
!pip install prophet


Collecting pip
  Using cached pip-25.1.1-py3-none-any.whl.metadata (3.6 kB)
Using cached pip-25.1.1-py3-none-any.whl (1.8 MB)



[notice] A new release of pip is available: 24.2 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip
ERROR: To modify pip, please run the following command:
C:\Users\Lenovo\AppData\Local\Programs\Python\Python312\python.exe -m pip install --upgrade pip


In [4]:
# Step 1: Import Libraries

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import mean_absolute_error, mean_squared_error
from statsmodels.tsa.arima.model import ARIMA
from prophet import Prophet
from xgboost import XGBRegressor
from sklearn.model_selection import train_test_split

ModuleNotFoundError: No module named 'prophet'

In [None]:
# Step 2: Load and Preprocess Data
df = pd.read_csv('household_power_consumption.txt', sep=';', low_memory=False, na_values='?')

# Combine Date and Time into datetime
df['Datetime'] = pd.to_datetime(df['Date'] + ' ' + df['Time'], format="%d/%m/%Y %H:%M:%S")
df.set_index('Datetime', inplace=True)

# Convert to numeric and drop NA
df['Global_active_power'] = pd.to_numeric(df['Global_active_power'], errors='coerce')
df = df[['Global_active_power']].dropna()

# Resample to hourly mean
data_hourly = df.resample('H').mean()

In [None]:
# Step 3: Visualize Time Series
plt.figure(figsize=(12, 5))
data_hourly['Global_active_power'].plot()
plt.title('Hourly Global Active Power Usage')
plt.ylabel('kW')
plt.grid()
plt.show()

In [None]:
# Step 4: Feature Engineering for XGBoost
df_feat = data_hourly.copy()
df_feat['hour'] = df_feat.index.hour
df_feat['dayofweek'] = df_feat.index.dayofweek
df_feat['month'] = df_feat.index.month
df_feat['lag1'] = df_feat['Global_active_power'].shift(1)
df_feat['lag24'] = df_feat['Global_active_power'].shift(24)
df_feat = df_feat.dropna()

# Split into train/test
train = df_feat.loc[:'2009-10-31']
test = df_feat.loc['2009-11-01':]

X_train = train.drop('Global_active_power', axis=1)
y_train = train['Global_active_power']
X_test = test.drop('Global_active_power', axis=1)
y_test = test['Global_active_power']

In [None]:
# Step 5: ARIMA Model
arima_model = ARIMA(data_hourly['Global_active_power'][:'2009-10-31'], order=(5,1,0))
arima_fit = arima_model.fit()
arima_pred = arima_fit.predict(start=len(data_hourly[:'2009-10-31']), 
                               end=len(data_hourly[:'2009-10-31'])+len(test)-1, 
                               typ="levels")

In [None]:
# Step 6: Prophet Model
df_prophet = data_hourly.reset_index().rename(columns={'Datetime': 'ds', 'Global_active_power': 'y'})
train_prophet = df_prophet[df_prophet['ds'] <= '2009-10-31']
test_prophet = df_prophet[df_prophet['ds'] > '2009-10-31']

prophet_model = Prophet()
prophet_model.fit(train_prophet)
future = prophet_model.make_future_dataframe(periods=len(test), freq='H')
forecast = prophet_model.predict(future)
prophet_pred = forecast.set_index('ds').loc['2009-11-01':]['yhat']

In [None]:
# Step 7: XGBoost Model
xgb_model = XGBRegressor(n_estimators=100)
xgb_model.fit(X_train, y_train)
xgb_pred = xgb_model.predict(X_test)

In [None]:
# Step 8: Evaluation
def evaluate(true, pred, model_name):
    mae = mean_absolute_error(true, pred)
    rmse = np.sqrt(mean_squared_error(true, pred))
    print(f'{model_name} -> MAE: {mae:.3f}, RMSE: {rmse:.3f}')

evaluate(y_test, arima_pred, 'ARIMA')
evaluate(y_test, prophet_pred.values, 'Prophet')
evaluate(y_test, xgb_pred, 'XGBoost')

In [None]:
# Step 9: Plot Actual vs Predicted
plt.figure(figsize=(15,5))
plt.plot(y_test.index, y_test.values, label='Actual')
plt.plot(y_test.index, arima_pred, label='ARIMA')
plt.plot(y_test.index, prophet_pred.values, label='Prophet')
plt.plot(y_test.index, xgb_pred, label='XGBoost')
plt.legend()
plt.title("Actual vs Forecasted Energy Usage")
plt.xlabel("Time")
plt.ylabel("Global Active Power (kW)")
plt.grid()
plt.show()