In [None]:
# =========================
# Task 3: Energy Consumption Forecasting (Simple Version)
# =========================

# -------------------------
# 1️⃣ Import Libraries
# -------------------------
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from statsmodels.tsa.arima.model import ARIMA
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error

# -------------------------
# 2️⃣ Load Dataset
# -------------------------
df = pd.read_csv(
    "household_power_consumption.txt",
    sep=';',
    parse_dates={'Datetime': ['Date', 'Time']},
    infer_datetime_format=True,
    na_values='?',
    low_memory=False
)

# Keep only relevant column and drop missing values
df = df[['Datetime', 'Global_active_power']].dropna()
df.set_index('Datetime', inplace=True)
df = df.sort_index()  # ensure chronological order

# -------------------------
# 3️⃣ Simple Feature Engineering
# -------------------------
df['hour'] = df.index.hour
df['dayofweek'] = df.index.dayofweek
df['month'] = df.index.month

# -------------------------
# 4️⃣ Train-Test Split
# -------------------------
train_size = int(len(df) * 0.8)
train = df.iloc[:train_size]
test = df.iloc[train_size:]

# -------------------------
# 5️⃣ ARIMA Forecast
# -------------------------
arima_model = ARIMA(train['Global_active_power'], order=(1,1,1))
arima_fit = arima_model.fit()
arima_forecast = arima_fit.forecast(steps=len(test))

# -------------------------
# 6️⃣ Random Forest Forecast
# -------------------------
features = ['hour', 'dayofweek', 'month']
X_train = train[features]
y_train = train['Global_active_power']
X_test = test[features]
y_test = test['Global_active_power']

rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)
rf_pred = rf_model.predict(X_test)

# -------------------------
# 7️⃣ Evaluation
# -------------------------
def evaluate(y_true, y_pred, model_name):
    mae = mean_absolute_error(y_true, y_pred)
    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
    print(f"{model_name} -> MAE: {mae:.3f}, RMSE: {rmse:.3f}")

evaluate(test['Global_active_power'], arima_forecast, "ARIMA")
evaluate(y_test, rf_pred, "Random Forest")

# -------------------------
# 8️⃣ Visualization
# -------------------------
plt.figure(figsize=(12,5))
plt.plot(test.index, test['Global_active_power'], label='Actual', color='black')
plt.plot(test.index, arima_forecast, label='ARIMA Forecast', alpha=0.7)
plt.plot(test.index, rf_pred, label='Random Forest Forecast', alpha=0.7)
plt.xlabel("Datetime")
plt.ylabel("Global Active Power (kilowatts)")
plt.title("Energy Consumption Forecast vs Actual")
plt.legend()
plt.show()


  df = pd.read_csv(
  df = pd.read_csv(
  df = pd.read_csv(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
