In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics import mean_absolute_error, mean_squared_error
from statsmodels.tsa.arima.model import ARIMA
import os
import warnings
import time

warnings.filterwarnings('ignore')

print("--- Training Classical Model: ARIMA (with Timing) ---")

# --- Custom Functions ---
def calculate_mape(y_true, y_pred):
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    non_zero_mask = y_true != 0
    return np.mean(np.abs((y_true[non_zero_mask] - y_pred[non_zero_mask]) / y_true[non_zero_mask])) * 100

def calculate_forecast_bias(y_true, y_pred):
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    return np.mean(y_true - y_pred)

# --- Load Data ---
try:
    current_dir = os.getcwd()
    base_path = os.path.dirname(current_dir)
    data_path = os.path.join(base_path, 'data', 'processed', 'featured_dataset.csv')
    df = pd.read_csv(data_path, parse_dates=['date'])
    df = df.sort_values('date')
    print("Dataset loaded successfully.")
except Exception as e:
    print(f"Error loading data: {e}")
    exit()

ts_data = df.groupby('date')['usage_cpu'].sum()

# --- Data Split ---
train_size = int(len(ts_data) * 0.7)
train_series, test_series = ts_data[0:train_size], ts_data[train_size:]

# --- Train and Evaluate ---
print("\nTraining ARIMA model...")
history = [x for x in train_series]
predictions = []

start_time = time.time() # Start timing
for t in range(len(test_series)):
    model = ARIMA(history, order=(5,1,0))
    model_fit = model.fit()
    output = model_fit.forecast()
    yhat = output[0]
    predictions.append(yhat)
    history.append(test_series.iloc[t])
end_time = time.time() # End timing

training_time = end_time - start_time
inference_speed = training_time / len(test_series) # Avg time per prediction

mae = mean_absolute_error(test_series, predictions)
rmse = np.sqrt(mean_squared_error(test_series, predictions))
mape = calculate_mape(test_series, predictions)
bias = calculate_forecast_bias(test_series, predictions)
print("ARIMA Training Complete.")

# --- Display Results ---
print("\n--- ARIMA Model Performance ---")
print(f"  - MAE: {mae:.2f}")
print(f"  - RMSE: {rmse:.2f}")
print(f"  - MAPE: {mape:.2f}%")
print(f"  - Forecast Bias: {bias:.2f}")
print(f"  - Total Training/Prediction Time: {training_time:.2f} seconds")
print(f"  - Average Inference Speed: {inference_speed * 1000:.2f} ms/prediction")



--- Training Classical Model: ARIMA (with Timing) ---
Dataset loaded successfully.

Training ARIMA model...
ARIMA Training Complete.

--- ARIMA Model Performance ---
  - MAE: 45.58
  - RMSE: 55.45
  - MAPE: 5.11%
  - Forecast Bias: 1.88
  - Total Training/Prediction Time: 2.64 seconds
  - Average Inference Speed: 94.37 ms/prediction
