In [2]:
# Cell 1: Environment Setup & Data Acquisition
import numpy as np
import pandas as pd
from statsmodels.tsa.arima.model import ARIMA
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.stattools import adfuller
import matplotlib.pyplot as plt
import zipfile
import os
import warnings
warnings.filterwarnings('ignore')

print("🔧 Setting up environment for Time Series Forecasting Experiment...")
print("📚 Core libraries imported successfully!")

try:
    import tensorflow as tf
    from tensorflow.keras.models import Sequential
    from tensorflow.keras.layers import LSTM, Dense
    from tensorflow.keras.callbacks import EarlyStopping
    tf_available = True
    print("🧠 TensorFlow imported successfully!")
except Exception as e:
    tf_available = False
    print(f"⚠️  TensorFlow not available: {e}")
    print("💡 Resolution Strategy: Using alternative neural network implementation for LSTM")

print("📂 Loading data...")

try:
    data = pd.read_csv('/kaggle/input/hourly-energy-consumption/AEP_hourly.csv')
    data['Datetime'] = pd.to_datetime(data['Datetime'])
    data.set_index('Datetime', inplace=True)
    weekly_data = data.resample('W').mean()
    weekly_data.rename(columns={weekly_data.columns[0]: 'Value'}, inplace=True)
    print("✅ AEP_hourly.csv loaded and resampled to weekly frequency!")
    
except Exception as e:
    print(f"ERROR: {e}")
    print("💡 Resolution Strategy: Generating synthetic weekly time series data as fallback")
    
    np.random.seed(42)
    dates = pd.date_range(start='2018-01-01', periods=260, freq='W')
    week_nums = dates.isocalendar().week
    trend = np.linspace(15, 85, 260)
    seasonal_component = 20 * np.sin(2 * np.pi * week_nums / 52) + 5 * np.cos(2 * np.pi * week_nums / 26)
    noise = np.random.normal(loc=0, scale=4, size=260)
    data_values = trend + seasonal_component + noise
    weekly_data = pd.DataFrame({'Value': data_values}, index=dates)
    print("✅ Synthetic weekly time series data with trend and seasonality generated!")

print("📂 Environment setup complete! Data loaded successfully.")
print(f"📊 Dataset shape: {weekly_data.shape}")
print(f"📅 Date range: {weekly_data.index.min()} to {weekly_data.index.max()}")

🔧 Setting up environment for Time Series Forecasting Experiment...
📚 Core libraries imported successfully!
🧠 TensorFlow imported successfully!
📂 Loading data...
✅ AEP_hourly.csv loaded and resampled to weekly frequency!
📂 Environment setup complete! Data loaded successfully.
📊 Dataset shape: (723, 1)
📅 Date range: 2004-10-03 00:00:00 to 2018-08-05 00:00:00


In [3]:
# Cell 2: Exploratory Data Analysis & Preprocessing
print("📊 Starting Exploratory Data Analysis...")

plt.figure(figsize=(12, 6))
plt.plot(weekly_data.index, weekly_data['Value'], linewidth=1.5, color='#2E86AB')
plt.title('Weekly Time Series Data - Original', fontsize=16, fontweight='bold')
plt.xlabel('Date')
plt.ylabel('Value')
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.savefig('eda_plot.png', dpi=300, bbox_inches='tight')
plt.close()
print("📈 Time series plot saved as 'eda_plot.png'")

print("🔍 Performing time series decomposition...")
decomposition = seasonal_decompose(weekly_data['Value'], model='additive', period=52)

fig, axes = plt.subplots(4, 1, figsize=(12, 10))
decomposition.observed.plot(ax=axes[0], title='Original', color='#2E86AB')
decomposition.trend.plot(ax=axes[1], title='Trend', color='#A23B72')
decomposition.seasonal.plot(ax=axes[2], title='Seasonal', color='#F18F01')
decomposition.resid.plot(ax=axes[3], title='Residual', color='#C73E1D')
for ax in axes:
    ax.grid(True, alpha=0.3)
plt.tight_layout()
plt.savefig('decomposition_plot.png', dpi=300, bbox_inches='tight')
plt.close()
print("📉 Decomposition plot saved as 'decomposition_plot.png'")

print("🔬 Checking stationarity with Augmented Dickey-Fuller test...")
adf_result = adfuller(weekly_data['Value'])
print(f"📊 ADF Statistic: {adf_result[0]:.6f}")
print(f"📊 p-value: {adf_result[1]:.6f}")
print(f"📊 Critical Values: {adf_result[4]}")

if adf_result[1] <= 0.05:
    print("✅ Data is stationary (p-value <= 0.05)")
else:
    print("⚠️  Data is non-stationary (p-value > 0.05) - differencing may be needed")

print("✂️ Splitting data into train/test sets...")
train_size = int(len(weekly_data) * 0.8)
train_data = weekly_data[:train_size]
test_data = weekly_data[train_size:]

print(f"📊 Training set size: {len(train_data)} weeks")
print(f"📊 Testing set size: {len(test_data)} weeks")

print("⚖️ Scaling data for LSTM model...")
scaler = MinMaxScaler()
train_scaled = scaler.fit_transform(train_data[['Value']])
test_scaled = scaler.transform(test_data[['Value']])

print("📊 EDA and preprocessing completed successfully!")

📊 Starting Exploratory Data Analysis...
📈 Time series plot saved as 'eda_plot.png'
🔍 Performing time series decomposition...
📉 Decomposition plot saved as 'decomposition_plot.png'
🔬 Checking stationarity with Augmented Dickey-Fuller test...
📊 ADF Statistic: -3.457055
📊 p-value: 0.009161
📊 Critical Values: {'1%': -3.4396995339981444, '5%': -2.8656659438580796, '10%': -2.5689671530263554}
✅ Data is stationary (p-value <= 0.05)
✂️ Splitting data into train/test sets...
📊 Training set size: 578 weeks
📊 Testing set size: 145 weeks
⚖️ Scaling data for LSTM model...
📊 EDA and preprocessing completed successfully!


In [4]:
# Cell 3: Standalone ARIMA Model
print("🔮 Building standalone ARIMA model...")

try:
    arima_model = ARIMA(train_data['Value'], order=(5, 1, 0))
    arima_fitted = arima_model.fit()
    print("✅ ARIMA model trained successfully!")
    print(f"📊 ARIMA Model Summary: {arima_fitted.summary().tables[1]}")
    
    arima_forecast = arima_fitted.forecast(steps=len(test_data))
    arima_forecast_series = pd.Series(arima_forecast, index=test_data.index)
    
    arima_rmse = np.sqrt(mean_squared_error(test_data['Value'], arima_forecast))
    arima_mae = mean_absolute_error(test_data['Value'], arima_forecast)
    
    print(f"📈 ARIMA RMSE: {arima_rmse:.4f}")
    print(f"📈 ARIMA MAE: {arima_mae:.4f}")
    
    plt.figure(figsize=(12, 6))
    plt.plot(train_data.index, train_data['Value'], label='Training Data', color='#2E86AB', alpha=0.7)
    plt.plot(test_data.index, test_data['Value'], label='Actual Test Data', color='#A23B72', linewidth=2)
    plt.plot(test_data.index, arima_forecast, label='ARIMA Forecast', color='#F18F01', linewidth=2, linestyle='--')
    plt.title('ARIMA Model Forecast', fontsize=16, fontweight='bold')
    plt.xlabel('Date')
    plt.ylabel('Value')
    plt.legend()
    plt.grid(True, alpha=0.3)
    plt.tight_layout()
    plt.savefig('arima_forecast.png', dpi=300, bbox_inches='tight')
    plt.close()
    print("📊 ARIMA forecast plot saved as 'arima_forecast.png'")
    
except Exception as e:
    print(f"ERROR: {e}")
    print("💡 Resolution Strategy: Check data quality and try different ARIMA orders")
    arima_rmse, arima_mae = float('inf'), float('inf')

print("📉 ARIMA model evaluation completed!")

🔮 Building standalone ARIMA model...
✅ ARIMA model trained successfully!
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
ar.L1         -0.2087      0.038     -5.563      0.000      -0.282      -0.135
ar.L2         -0.1442      0.041     -3.555      0.000      -0.224      -0.065
ar.L3          0.0156      0.042      0.368      0.713      -0.067       0.098
ar.L4          0.0037      0.043      0.086      0.932      -0.081       0.088
ar.L5          0.0486      0.041      1.184      0.236      -0.032       0.129
sigma2      1.119e+06   6.15e+04     18.191      0.000    9.99e+05    1.24e+06
📈 ARIMA RMSE: 2277.9092
📈 ARIMA MAE: 1784.3821
📊 ARIMA forecast plot saved as 'arima_forecast.png'
📉 ARIMA model evaluation completed!


In [5]:
# Cell 4: Standalone LSTM Model
print("🧠 Building standalone LSTM model...")

def create_sequences(data, seq_length):
    print(f"🔄 Creating sequences with length {seq_length}...")
    X, y = [], []
    for i in range(len(data) - seq_length):
        X.append(data[i:(i + seq_length)])
        y.append(data[i + seq_length])
    return np.array(X), np.array(y)

seq_length = 12
if tf_available:
    try:
        X_train, y_train = create_sequences(train_scaled.flatten(), seq_length)
        X_test, y_test = create_sequences(test_scaled.flatten(), seq_length)
        
        X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
        X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))
        
        lstm_model = Sequential([
            LSTM(50, return_sequences=True, input_shape=(seq_length, 1)),
            LSTM(50, return_sequences=False),
            Dense(25),
            Dense(1)
        ])
        
        lstm_model.compile(optimizer='adam', loss='mse')
        print("✅ LSTM model architecture defined!")
        
        early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
        
        history = lstm_model.fit(
            X_train, y_train,
            epochs=100,
            batch_size=32,
            validation_split=0.2,
            callbacks=[early_stopping],
            verbose=0
        )
        
        print("✅ LSTM model trained successfully!")
        
        lstm_predictions_scaled = lstm_model.predict(X_test)
        lstm_predictions = scaler.inverse_transform(lstm_predictions_scaled)
        
        test_actual_for_lstm = test_data['Value'].iloc[seq_length:].values
        
        lstm_rmse = np.sqrt(mean_squared_error(test_actual_for_lstm, lstm_predictions.flatten()))
        lstm_mae = mean_absolute_error(test_actual_for_lstm, lstm_predictions.flatten())
        
        print(f"🧠 LSTM RMSE: {lstm_rmse:.4f}")
        print(f"🧠 LSTM MAE: {lstm_mae:.4f}")
        
        lstm_test_dates = test_data.index[seq_length:]
        
        plt.figure(figsize=(12, 6))
        plt.plot(train_data.index, train_data['Value'], label='Training Data', color='#2E86AB', alpha=0.7)
        plt.plot(lstm_test_dates, test_actual_for_lstm, label='Actual Test Data', color='#A23B72', linewidth=2)
        plt.plot(lstm_test_dates, lstm_predictions.flatten(), label='LSTM Forecast', color='#C73E1D', linewidth=2, linestyle='--')
        plt.title('LSTM Model Forecast', fontsize=16, fontweight='bold')
        plt.xlabel('Date')
        plt.ylabel('Value')
        plt.legend()
        plt.grid(True, alpha=0.3)
        plt.tight_layout()
        plt.savefig('lstm_forecast.png', dpi=300, bbox_inches='tight')
        plt.close()
        print("📊 LSTM forecast plot saved as 'lstm_forecast.png'")
        
    except Exception as e:
        print(f"ERROR: {e}")
        print("💡 Resolution Strategy: Check TensorFlow installation and data shapes")
        lstm_rmse, lstm_mae = float('inf'), float('inf')

else:
    print("⚠️  TensorFlow not available - implementing simple neural network alternative...")
    from sklearn.neural_network import MLPRegressor
    
    try:
        X_train, y_train = create_sequences(train_scaled.flatten(), seq_length)
        X_test, y_test = create_sequences(test_scaled.flatten(), seq_length)
        
        X_train_flat = X_train.reshape(X_train.shape[0], -1)
        X_test_flat = X_test.reshape(X_test.shape[0], -1)
        
        mlp_model = MLPRegressor(hidden_layer_sizes=(100, 50), max_iter=500, random_state=42)
        mlp_model.fit(X_train_flat, y_train)
        
        mlp_predictions_scaled = mlp_model.predict(X_test_flat)
        mlp_predictions = scaler.inverse_transform(mlp_predictions_scaled.reshape(-1, 1))
        
        test_actual_for_mlp = test_data['Value'].iloc[seq_length:].values
        
        lstm_rmse = np.sqrt(mean_squared_error(test_actual_for_mlp, mlp_predictions.flatten()))
        lstm_mae = mean_absolute_error(test_actual_for_mlp, mlp_predictions.flatten())
        
        print(f"🧠 MLP (LSTM alternative) RMSE: {lstm_rmse:.4f}")
        print(f"🧠 MLP (LSTM alternative) MAE: {lstm_mae:.4f}")
        
        mlp_test_dates = test_data.index[seq_length:]
        
        plt.figure(figsize=(12, 6))
        plt.plot(train_data.index, train_data['Value'], label='Training Data', color='#2E86AB', alpha=0.7)
        plt.plot(mlp_test_dates, test_actual_for_mlp, label='Actual Test Data', color='#A23B72', linewidth=2)
        plt.plot(mlp_test_dates, mlp_predictions.flatten(), label='MLP Forecast', color='#C73E1D', linewidth=2, linestyle='--')
        plt.title('MLP Neural Network Forecast (LSTM Alternative)', fontsize=16, fontweight='bold')
        plt.xlabel('Date')
        plt.ylabel('Value')
        plt.legend()
        plt.grid(True, alpha=0.3)
        plt.tight_layout()
        plt.savefig('lstm_forecast.png', dpi=300, bbox_inches='tight')
        plt.close()
        print("📊 MLP forecast plot saved as 'lstm_forecast.png'")
        
    except Exception as e:
        print(f"ERROR: {e}")
        print("💡 Resolution Strategy: Check scikit-learn installation and data preparation")
        lstm_rmse, lstm_mae = float('inf'), float('inf')

print("🧠 Neural network model evaluation completed!")

🧠 Building standalone LSTM model...
🔄 Creating sequences with length 12...
🔄 Creating sequences with length 12...


I0000 00:00:1756845428.206322      36 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 15513 MB memory:  -> device: 0, name: Tesla P100-PCIE-16GB, pci bus id: 0000:00:04.0, compute capability: 6.0


✅ LSTM model architecture defined!


I0000 00:00:1756845433.271822     103 cuda_dnn.cc:529] Loaded cuDNN version 90300


✅ LSTM model trained successfully!
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 60ms/step
🧠 LSTM RMSE: 1229.7209
🧠 LSTM MAE: 1013.8979
📊 LSTM forecast plot saved as 'lstm_forecast.png'
🧠 Neural network model evaluation completed!


In [6]:
# Cell 5: Hybrid ARIMA-LSTM Model
print("✨ Building Hybrid ARIMA-LSTM model...")

try:
    print("🔮 Step 1: Training ARIMA on full training data...")
    arima_model_hybrid = ARIMA(train_data['Value'], order=(5, 1, 0))
    arima_fitted_hybrid = arima_model_hybrid.fit()
    
    print("📊 Step 2: Calculating ARIMA residuals on training data...")
    arima_train_predictions = arima_fitted_hybrid.fittedvalues
    arima_residuals = train_data['Value'] - arima_train_predictions
    
    print("⚖️ Step 3: Scaling residuals for neural network...")
    residual_scaler = MinMaxScaler()
    residuals_scaled = residual_scaler.fit_transform(arima_residuals.values.reshape(-1, 1))
    
    if tf_available:
        print("🧠 Step 4: Training LSTM on ARIMA residuals...")
        X_residual, y_residual = create_sequences(residuals_scaled.flatten(), seq_length)
        X_residual = X_residual.reshape((X_residual.shape[0], X_residual.shape[1], 1))
        
        residual_lstm = Sequential([
            LSTM(30, return_sequences=True, input_shape=(seq_length, 1)),
            LSTM(30, return_sequences=False),
            Dense(15),
            Dense(1)
        ])
        
        residual_lstm.compile(optimizer='adam', loss='mse')
        residual_lstm.fit(X_residual, y_residual, epochs=50, batch_size=16, verbose=0)
        
        print("✅ Residual LSTM trained successfully!")
    else:
        print("🧠 Step 4: Training MLP on ARIMA residuals...")
        X_residual, y_residual = create_sequences(residuals_scaled.flatten(), seq_length)
        X_residual_flat = X_residual.reshape(X_residual.shape[0], -1)
        
        residual_mlp = MLPRegressor(hidden_layer_sizes=(50, 25), max_iter=300, random_state=42)
        residual_mlp.fit(X_residual_flat, y_residual)
        
        print("✅ Residual MLP trained successfully!")
    
    print("🔮 Step 5: Generating hybrid forecasts...")
    arima_test_forecast = arima_fitted_hybrid.forecast(steps=len(test_data))
    
    test_residuals_scaled = residual_scaler.transform(np.random.normal(0, 0.1, (len(test_data), 1)))
    
    if len(test_residuals_scaled) >= seq_length:
        if tf_available:
            X_test_residual = []
            for i in range(len(test_residuals_scaled) - seq_length + 1):
                X_test_residual.append(test_residuals_scaled[i:i+seq_length].flatten())
            X_test_residual = np.array(X_test_residual).reshape(-1, seq_length, 1)
            residual_forecast_scaled = residual_lstm.predict(X_test_residual)
        else:
            X_test_residual = []
            for i in range(len(test_residuals_scaled) - seq_length + 1):
                X_test_residual.append(test_residuals_scaled[i:i+seq_length].flatten())
            X_test_residual = np.array(X_test_residual)
            residual_forecast_scaled = residual_mlp.predict(X_test_residual)
        
        residual_forecast = residual_scaler.inverse_transform(residual_forecast_scaled.reshape(-1, 1))
        
        hybrid_forecast = arima_test_forecast[:len(residual_forecast)] + residual_forecast.flatten()
        
        hybrid_test_actual = test_data['Value'].iloc[:len(hybrid_forecast)]
        
        hybrid_rmse = np.sqrt(mean_squared_error(hybrid_test_actual, hybrid_forecast))
        hybrid_mae = mean_absolute_error(hybrid_test_actual, hybrid_forecast)
        
        print(f"✨ Hybrid Model RMSE: {hybrid_rmse:.4f}")
        print(f"✨ Hybrid Model MAE: {hybrid_mae:.4f}")
        
        hybrid_test_dates = test_data.index[:len(hybrid_forecast)]
        
        plt.figure(figsize=(12, 6))
        plt.plot(train_data.index, train_data['Value'], label='Training Data', color='#2E86AB', alpha=0.7)
        plt.plot(hybrid_test_dates, hybrid_test_actual, label='Actual Test Data', color='#A23B72', linewidth=2)
        plt.plot(hybrid_test_dates, hybrid_forecast, label='Hybrid Forecast', color='#6A994E', linewidth=2, linestyle='--')
        plt.title('Hybrid ARIMA-LSTM Model Forecast', fontsize=16, fontweight='bold')
        plt.xlabel('Date')
        plt.ylabel('Value')
        plt.legend()
        plt.grid(True, alpha=0.3)
        plt.tight_layout()
        plt.savefig('hybrid_forecast.png', dpi=300, bbox_inches='tight')
        plt.close()
        print("📊 Hybrid forecast plot saved as 'hybrid_forecast.png'")
    
    else:
        print("⚠️  Insufficient test data for hybrid model sequences")
        hybrid_rmse, hybrid_mae = float('inf'), float('inf')
        
except Exception as e:
    print(f"ERROR: {e}")
    print("💡 Resolution Strategy: Check model compatibility and data alignment")
    hybrid_rmse, hybrid_mae = float('inf'), float('inf')

print("✨ Hybrid model evaluation completed!")

✨ Building Hybrid ARIMA-LSTM model...
🔮 Step 1: Training ARIMA on full training data...
📊 Step 2: Calculating ARIMA residuals on training data...
⚖️ Step 3: Scaling residuals for neural network...
🧠 Step 4: Training LSTM on ARIMA residuals...
🔄 Creating sequences with length 12...
✅ Residual LSTM trained successfully!
🔮 Step 5: Generating hybrid forecasts...
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 61ms/step
✨ Hybrid Model RMSE: 2385.5827
✨ Hybrid Model MAE: 1860.2496
📊 Hybrid forecast plot saved as 'hybrid_forecast.png'
✨ Hybrid model evaluation completed!


In [7]:
# Cell 6: Final Comparison & Packaging
print("🏆 Generating final model comparison...")

print("\n" + "="*60)
print("📊          FORECASTING EXPERIMENT RESULTS")
print("="*60)
print(f"{'Model':<20} {'RMSE':<15} {'MAE':<15}")
print("-"*60)
print(f"{'ARIMA':<20} {arima_rmse:<15.4f} {arima_mae:<15.4f}")
print(f"{'LSTM/MLP':<20} {lstm_rmse:<15.4f} {lstm_mae:<15.4f}")
print(f"{'Hybrid':<20} {hybrid_rmse:<15.4f} {hybrid_mae:<15.4f}")
print("="*60)

models = {'ARIMA': arima_rmse, 'LSTM/MLP': lstm_rmse, 'Hybrid': hybrid_rmse}
best_model = min(models, key=models.get)
best_rmse = models[best_model]

print(f"\n🏆 WINNER: {best_model} model with RMSE of {best_rmse:.4f}")

if best_model == 'Hybrid':
    print("✨ The hybrid approach successfully improved forecasting accuracy!")
elif best_model == 'ARIMA':
    print("📉 Traditional ARIMA performed best for this dataset!")
else:
    print("🧠 Neural network approach achieved the highest accuracy!")

print("\n📦 Packaging results...")
try:
    with zipfile.ZipFile('forecasting_experiment_results.zip', 'w') as zipf:
        plot_files = ['eda_plot.png', 'decomposition_plot.png', 'arima_forecast.png', 
                     'lstm_forecast.png', 'hybrid_forecast.png']
        
        for plot_file in plot_files:
            if os.path.exists(plot_file):
                zipf.write(plot_file)
                print(f"📎 Added {plot_file} to results package")
            else:
                print(f"⚠️  {plot_file} not found, skipping...")
    
    print("✅ All results packaged in 'forecasting_experiment_results.zip'")
    
except Exception as e:
    print(f"ERROR: {e}")
    print("💡 Resolution Strategy: Check file permissions and available disk space")

print("\n🎉 TIME SERIES FORECASTING EXPERIMENT COMPLETED SUCCESSFULLY!")
print("📊 All models trained, evaluated, and compared")
print("📈 Performance metrics calculated and visualized") 
print("📦 Results packaged for easy distribution")
print("✨ Experiment artifacts ready for analysis and deployment!")

🏆 Generating final model comparison...

📊          FORECASTING EXPERIMENT RESULTS
Model                RMSE            MAE            
------------------------------------------------------------
ARIMA                2277.9092       1784.3821      
LSTM/MLP             1229.7209       1013.8979      
Hybrid               2385.5827       1860.2496      

🏆 WINNER: LSTM/MLP model with RMSE of 1229.7209
🧠 Neural network approach achieved the highest accuracy!

📦 Packaging results...
📎 Added eda_plot.png to results package
📎 Added decomposition_plot.png to results package
📎 Added arima_forecast.png to results package
📎 Added lstm_forecast.png to results package
📎 Added hybrid_forecast.png to results package
✅ All results packaged in 'forecasting_experiment_results.zip'

🎉 TIME SERIES FORECASTING EXPERIMENT COMPLETED SUCCESSFULLY!
📊 All models trained, evaluated, and compared
📈 Performance metrics calculated and visualized
📦 Results packaged for easy distribution
✨ Experiment artifacts read