In [2]:
import pandas as pd
import numpy as np
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
import os
import time
import warnings

warnings.filterwarnings('ignore')

print("--- Training Deep Learning Model: LSTM (with Timing) ---")

# --- Custom Functions ---
def calculate_mape(y_true, y_pred):
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    non_zero_mask = y_true != 0
    return np.mean(np.abs((y_true[non_zero_mask] - y_pred[non_zero_mask]) / y_true[non_zero_mask])) * 100

def calculate_forecast_bias(y_true, y_pred):
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    return np.mean(y_true - y_pred)
    
def create_dataset(dataset, look_back=7):
    dataX, dataY = [], []
    for i in range(len(dataset) - look_back):
        a = dataset[i:(i + look_back), 0]
        dataX.append(a)
        dataY.append(dataset[i + look_back, 0])
    return np.array(dataX), np.array(dataY)

# --- 1. Load Data ---
try:
    current_dir = os.getcwd()
    base_path = os.path.dirname(current_dir)
    data_path = os.path.join(base_path, 'data', 'processed', 'featured_dataset.csv')
    df = pd.read_csv(data_path, parse_dates=['date'])
    df = df.sort_values('date')
    print("Dataset loaded successfully.")
except Exception as e:
    print(f"Error loading data: {e}")
    exit()

ts_data = df.groupby('date')['usage_cpu'].sum().values.reshape(-1, 1)

# --- 2. Prepare and Split Data ---
scaler = MinMaxScaler(feature_range=(0, 1))
ts_scaled = scaler.fit_transform(ts_data)
look_back = 7
X, y = create_dataset(ts_scaled, look_back)
X = np.reshape(X, (X.shape[0], X.shape[1], 1))

train_size = int(len(X) * 0.7)
X_train, X_test = X[0:train_size], X[train_size:len(X)]
y_train, y_test = y[0:train_size], y[train_size:len(y)]
print(f"Data prepared with {len(X_train)} training sequences.")

# --- 3. Build and Train LSTM Model ---
print("\nTraining LSTM model... (This will take several minutes)")
lstm_model = Sequential([ LSTM(50, input_shape=(look_back, 1)), Dense(1) ])
lstm_model.compile(loss='mean_squared_error', optimizer='adam')

start_time = time.time()
lstm_model.fit(X_train, y_train, epochs=50, batch_size=1, verbose=0)
end_time = time.time()
training_time = end_time - start_time
print("LSTM Training Complete.")

# --- 4. Evaluate Model and Measure Inference Speed ---
inference_start_time = time.time()
predictions_scaled = lstm_model.predict(X_test)
inference_end_time = time.time()
inference_speed = (inference_end_time - inference_start_time) / len(X_test)

predictions = scaler.inverse_transform(predictions_scaled)
y_true_unscaled = scaler.inverse_transform(y_test.reshape(-1, 1))

predictions_flat = predictions.flatten()
y_true_flat = y_true_unscaled.flatten()

mae = mean_absolute_error(y_true_flat, predictions_flat)
rmse = np.sqrt(mean_squared_error(y_true_flat, predictions_flat))
mape = calculate_mape(y_true_flat, predictions_flat)
bias = calculate_forecast_bias(y_true_flat, predictions_flat)

# --- 5. Display Results ---
print("\n--- LSTM Model Performance ---")
print(f"  - MAE: {mae:.2f}")
print(f"  - RMSE: {rmse:.2f}")
print(f"  - MAPE: {mape:.2f}%")
print(f"  - Forecast Bias: {bias:.2f}")
print(f"  - Training Time: {training_time:.2f} seconds")
print(f"  - Average Inference Speed: {inference_speed * 1000:.2f} ms/prediction")



--- Training Deep Learning Model: LSTM (with Timing) ---
Dataset loaded successfully.
Data prepared with 58 training sequences.

Training LSTM model... (This will take several minutes)
LSTM Training Complete.
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 132ms/step

--- LSTM Model Performance ---
  - MAE: 45.56
  - RMSE: 53.49
  - MAPE: 5.14%
  - Forecast Bias: -14.50
  - Training Time: 8.27 seconds
  - Average Inference Speed: 6.68 ms/prediction
