In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os

import tensorflow as tf
import keras_tuner as kt
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping

from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.preprocessing import MinMaxScaler

import joblib

In [None]:
PROJECT_PATH='../'
PROJECT_NAME='LSTM_Multi-Variable_CPU_Request'

WINDOW_SIZE=30
FORECAST_HORIZON=5

In [None]:
df = pd.read_csv(PROJECT_PATH+'/LSTM_Ready_Dataset_Old.csv')

In [None]:
# =====================================================================
# SETUP & DATA LOADING
# =====================================================================

cluster_cpu_req = pd.DataFrame(
    {'cluster_cpu_req': df['node_cpu_req_aj-aung-k8s-worker1'] + df['node_cpu_req_aj-aung-k8s-worker2']}
)

cluster_cpu_cap = pd.DataFrame(
    {'cluster_cpu_cap': df['node_cpu_cap_aj-aung-k8s-worker1'] + df['node_cpu_cap_aj-aung-k8s-worker2']}
)

cluster_mem_req = pd.DataFrame(
    {'cluster_mem_req': df['node_mem_req_aj-aung-k8s-worker1'] + df['node_mem_req_aj-aung-k8s-worker2']}
)

cluster_mem_cap = pd.DataFrame(
    {'cluster_mem_cap': df['node_mem_cap_aj-aung-k8s-worker1'] + df['node_mem_cap_aj-aung-k8s-worker2']}
)

features = pd.concat([cluster_cpu_req, cluster_cpu_cap, cluster_mem_req, cluster_mem_cap, df['cluster_pods_pending']], axis=1)

target = cluster_cpu_req

raw_data = features.to_numpy()
target_data = target.to_numpy().reshape(-1, 1)

In [None]:
# =====================================================================
# CHRONOLOGICAL SPLIT (70/15/15)
# =====================================================================

n = len(raw_data)
train_idx = int(n * 0.7)
val_idx = int(n * 0.85)

# Split features (for X)
X_train_raw = raw_data[:train_idx]
X_val_raw = raw_data[train_idx:val_idx]
X_test_raw = raw_data[val_idx:]

# Split target (for y)
y_train_raw = target_data[:train_idx]
y_val_raw = target_data[train_idx:val_idx]
y_test_raw = target_data[val_idx:]

In [None]:
# =====================================================================
# DUAL SCALING (CRITICAL STEP)
# =====================================================================

# Scaler 1: For Inputs (Scales CPU, Mem, Pods together)
scaler_inputs = MinMaxScaler(feature_range=(0, 1))
X_train_scaled = scaler_inputs.fit_transform(X_train_raw)
X_val_scaled = scaler_inputs.transform(X_val_raw)
X_test_scaled = scaler_inputs.transform(X_test_raw)

# Scaler 2: For Target (Scales ONLY CPU)
scaler_target = MinMaxScaler(feature_range=(0, 1))
y_train_scaled = scaler_target.fit_transform(y_train_raw)
y_val_scaled = scaler_target.transform(y_val_raw)
y_test_scaled = scaler_target.transform(y_test_raw)

# SAVE BOTH SCALERS
joblib.dump(scaler_inputs, PROJECT_PATH+'/Multi-Variable/Multi-Var_Scaler_Inputs.pkl')
joblib.dump(scaler_target, PROJECT_PATH+'/Multi-Variable/Multi-Var_Scaler_Target.pkl')
print("Success: Both scalers saved.")

In [None]:
# =====================================================================
# SLIDING WINDOW (MULTIVARIATE)
# =====================================================================

# Window Size 30 Minutes # Forecast 5 Minutes
def multivariate_data(dataset, target, start_index, end_index, history_size, target_size):
    data = []
    labels = []

    start_index = start_index + history_size
    if end_index is None:
        end_index = len(dataset) - target_size

    for i in range(start_index, end_index):
        indices = range(i-history_size, i)
        # Gather all features for the window
        data.append(dataset[indices])
        # Gather only the target for the label
        labels.append(target[i+target_size])

    return np.array(data), np.array(labels)

# Create the 3D arrays
# Note: We pass the SCALED data here
X_train, y_train = multivariate_data(X_train_scaled, y_train_scaled, 0, None, WINDOW_SIZE, FORECAST_HORIZON)
X_val, y_val = multivariate_data(X_val_scaled, y_val_scaled, 0, None, WINDOW_SIZE, FORECAST_HORIZON)
X_test, y_test = multivariate_data(X_test_scaled, y_test_scaled, 0, None, WINDOW_SIZE, FORECAST_HORIZON)

print(f"Train Shape: {X_train.shape}")
print(f"Target Shape: {y_train.shape}")

In [None]:
# =====================================================================
# KERAS TUNER
# =====================================================================

def build_model(hp):
    model = Sequential()

    hp_units_1 = hp.Int('units_layer_1', min_value=16, max_value=128, step=16)
    num_layers = hp.Int('num_layers', 1, 2)
    return_seq = True if num_layers == 2 else False

    model.add(LSTM(units=hp_units_1, return_sequences=return_seq, input_shape=(X_train.shape[1], X_train.shape[2])))

    hp_dropout = hp.Float('dropout_rate', min_value=0.1, max_value=0.4, step=0.1)
    model.add(Dropout(hp_dropout))

    if num_layers == 2:
        hp_units_2 = hp.Int('units_layer_2', min_value=16, max_value=64, step=16)
        model.add(LSTM(units = hp_units_2, return_sequences = False))
        model.add(Dropout(hp_dropout))

    model.add(Dense(1))

    hp_learning_rate = hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])
    model.compile(optimizer=Adam(learning_rate=hp_learning_rate), loss='mse', metrics=['mae'])

    return model

tuner = kt.RandomSearch(
    build_model,
    objective='val_loss',
    max_trials=10,
    executions_per_trial=1,
    directory=PROJECT_PATH+'/Multi-Variable/Tuning',
    project_name=PROJECT_NAME
)

early_stop = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

print("\nStarting the hyperparameter search...")
tuner.search(X_train, y_train, epochs=100, validation_data=(X_val, y_val), callbacks=[early_stop])

In [None]:
# =====================================================================
# BUILD, TRAIN, LOG, AND SAVE THE BEST MODEL
# =====================================================================
best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]

# Log the Hyperparameters
with open(PROJECT_PATH + "/Multi-Variable/Best_Hyperparameters.log", "w") as log_file:
    log_file.write("--- Optimal KerasTuner Hyperparameters ---\n")
    for param, value in best_hps.values.items():
        log_file.write(f"{param}: {value}\n")
print("\nSuccess: Hyperparameters logged to 'Best_Hyperparameters.log'")

# Build and train ONE final time
print("Training the final optimal model...")
best_model = tuner.hypermodel.build(best_hps)
history = best_model.fit(X_train, y_train, epochs=100, validation_data=(X_val, y_val), callbacks=[early_stop], verbose=2)

best_model.save(PROJECT_PATH+"/Multi-Variable/Best_Multi_Var_Model.keras")
print("Success: Model saved to 'Best_Multi_Var_Model.keras'")

In [None]:
# =====================================================================
# EVALUATION & PLOTTING
# =====================================================================

# The Final Exam: Test on hidden data
print("\nRunning final evaluation on unseen test data...")
scaled_predictions = best_model.predict(X_test)

# Inverse transform to get real vCore counts
real_predictions = scaler_target.inverse_transform(scaled_predictions)
real_actuals = scaler_target.inverse_transform(y_test.reshape(-1, 1))

# Official metrics
rmse = np.sqrt(mean_squared_error(real_actuals, real_predictions))
mae = mean_absolute_error(real_actuals, real_predictions)

print("\n--- OFFICIAL AUTOSCALER PERFORMANCE ---")
print(f"Test RMSE: {rmse:.2f} vCores")
print(f"Test MAE:  {mae:.2f} vCores")
print(f"On average, the model's 5-minute forecast is off by {mae:.2f} vCores.")

In [None]:
plt.figure(figsize=(25, 10))
plt.plot(history.history['loss'], label='Training Loss (MSE)', color='blue')
plt.plot(history.history['val_loss'], label='Validation Loss (MSE)', color='orange')
plt.title('LSTM Learning Curve: Training vs. Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss (Mean Squared Error)')
plt.legend(loc='upper right')
plt.grid(True, linestyle='--', alpha=0.7)
plt.tight_layout()
plt.show()

In [None]:
plt.figure(figsize=(25, 10))
plt.plot(real_actuals, label='Actual vCores', color='blue', alpha=0.6)
plt.plot(real_predictions, label='Predicted vCores (5m ahead)', color='red', linestyle='--', alpha=0.8)
plt.title('Test Data: Actual vs Predicted CPU Requests')
plt.xlabel('Time Steps')
plt.ylabel('vCores')
plt.legend(loc='upper right')
plt.grid(True, linestyle='--', alpha=0.5)
plt.tight_layout()
plt.show()