# Cloud Forecasting with CHAOS-1 Resilience Tests
This notebook performs multivariate LSTM forecasting on cloud metrics and applies Chaos-1 tests to validate robustness.

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout
import os
import warnings
warnings.filterwarnings('ignore')

In [3]:
df = pd.read_csv('vmCloud_data.csv')
df['timestamp'] = pd.to_datetime(df['timestamp'])
df = df.sort_values('timestamp')
df = df[['timestamp', 'cpu_usage', 'memory_usage', 'network_traffic', 'power_consumption']].dropna()
df = df.rename(columns={
    'cpu_usage': 'Actual_CPU',
    'memory_usage': 'Actual_RAM',
    'network_traffic': 'Actual_Disk_IO',
    'power_consumption': 'Actual_Power'
})
df.head()

Unnamed: 0,timestamp,Actual_CPU,Actual_RAM,Actual_Disk_IO,Actual_Power
969844,2023-01-01 00:00:15,30.719806,89.199261,604.263594,325.820755
898414,2023-01-01 00:00:31,14.019569,74.367251,600.520958,368.735533
197262,2023-01-01 00:00:36,93.148608,68.979072,621.709971,158.018066
1505883,2023-01-01 00:00:40,99.077509,71.444353,760.88481,196.374567
1189161,2023-01-01 00:01:09,89.889006,5.710089,453.161356,310.077375


In [4]:
# CHAOS-1 TEST: Drop a critical input column
try:
    chaos_df = df.copy()
    chaos_df.drop('Actual_CPU', axis=1, inplace=True)
    scaler = MinMaxScaler()
    chaos_scaled = scaler.fit_transform(chaos_df)
    print("CHAOS TEST: Unexpectedly succeeded")
except Exception as e:
    print("CHAOS TEST RESULT: ✅ Handled missing column error")
    print("Error:", e)

CHAOS TEST RESULT: ✅ Handled missing column error
Error: The DType <class 'numpy.dtypes.DateTime64DType'> could not be promoted by <class 'numpy.dtypes.Float64DType'>. This means that no common DType exists for the given inputs. For example they cannot be stored in a single array unless the dtype is `object`. The full list of DTypes is: (<class 'numpy.dtypes.DateTime64DType'>, <class 'numpy.dtypes.Float64DType'>, <class 'numpy.dtypes.Float64DType'>, <class 'numpy.dtypes.Float64DType'>)


In [5]:
# CHAOS-1 TEST: Inject NaNs into the data
try:
    df_nan = df.copy()
    df_nan.iloc[5:10, 1] = np.nan
    df_nan = df_nan.dropna()
    print("CHAOS TEST: ✅ NaNs handled correctly")
except Exception as e:
    print("CHAOS TEST RESULT: ❌ Failed to handle NaNs")
    print("Error:", e)

CHAOS TEST: ✅ NaNs handled correctly


In [6]:
scaler = MinMaxScaler()
scaled = scaler.fit_transform(df[['Actual_CPU', 'Actual_RAM', 'Actual_Disk_IO', 'Actual_Power']])
df_scaled = pd.DataFrame(scaled, columns=['Actual_CPU', 'Actual_RAM', 'Actual_Disk_IO', 'Actual_Power'])
df_scaled['timestamp'] = df['timestamp'].values
df_scaled.set_index('timestamp', inplace=True)

In [7]:
def create_sequences(data, window_size):
    X, y = [], []
    for i in range(len(data) - window_size):
        X.append(data[i:i+window_size])
        y.append(data[i+window_size])
    return np.array(X), np.array(y)
window_size = 20
X, y = create_sequences(df_scaled.values, window_size)

In [8]:
model = Sequential()
model.add(LSTM(32, return_sequences=True, input_shape=(X.shape[1], X.shape[2])))
model.add(Dropout(0.2))
model.add(LSTM(32))
model.add(Dropout(0.2))
model.add(Dense(4))
model.compile(optimizer='adam', loss='mse')
model.fit(X, y, epochs=2, batch_size=16)

Epoch 1/2
[1m73786/73786[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m894s[0m 12ms/step - loss: 0.0842
Epoch 2/2
[1m73786/73786[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m879s[0m 12ms/step - loss: 0.0833


<keras.src.callbacks.history.History at 0x2e6d778e720>

In [9]:
# CHAOS-1 TEST: Try saving to a protected directory
try:
    output_df.to_excel('/root/protected_output.xlsx', index=False)
except PermissionError as e:
    print("CHAOS TEST RESULT: ✅ Permission denied caught")
except Exception as e:
    print("CHAOS TEST RESULT: ❌ Unexpected export error")
    print("Error:", e)

CHAOS TEST RESULT: ❌ Unexpected export error
Error: name 'output_df' is not defined


In [10]:
# Assuming you already trained the model and have X, y, scaler defined

future_steps = 100
last_sequence = X[-1]
forecast = []
current_seq = last_sequence.copy()

for _ in range(future_steps):
    pred = model.predict(current_seq[np.newaxis, :, :])[0]
    forecast.append(pred)
    current_seq = np.vstack((current_seq[1:], pred))

# Inverse transform predictions and actuals
forecast = scaler.inverse_transform(forecast)
actuals = scaler.inverse_transform(y[-future_steps:])

# Combine actual vs predicted in one DataFrame
output_df = pd.DataFrame(np.hstack((actuals, forecast)), columns=[
    'Actual_CPU', 'Predicted_CPU',
    'Actual_RAM', 'Predicted_RAM',
    'Actual_Disk_IO', 'Predicted_Disk_IO',
    'Actual_Power', 'Predicted_Power'
])
output_df.reset_index(drop=True, inplace=True)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 880ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 66ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 57ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 68ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 68ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 67ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 59ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 54ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 55ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 54ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 51ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 59ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 56ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5

In [11]:
output_df.to_excel('cloud_forecast_actual_vs_predicted_with_chaos.xlsx', index=False)
print("Output saved to cloud_forecast_actual_vs_predicted_with_chaos.xlsx")

Output saved to cloud_forecast_actual_vs_predicted_with_chaos.xlsx


In [None]:

# ---------------- TRANSFORMER MODEL IMPLEMENTATION ----------------

import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset, random_split
from sklearn.model_selection import train_test_split

# Prepare data for Transformer (keeping time-series structure intact)
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
data_scaled = scaler.fit_transform(df[['Actual_CPU', 'Actual_RAM', 'Actual_Disk_IO', 'Actual_Power']])
sequence_length = 10

def create_sequences(data, seq_len):
    xs, ys = [], []
    for i in range(len(data) - seq_len):
        x = data[i:i+seq_len]
        y = data[i+seq_len]
        xs.append(x)
        ys.append(y)
    return np.array(xs), np.array(ys)

X, y = create_sequences(data_scaled, sequence_length)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

# Convert to tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32)

train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32)

# Positional Encoding Layer
class PositionalEncoding(nn.Module):
    def __init__(self, d_model, max_len=5000):
        super(PositionalEncoding, self).__init__()
        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-np.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0)
        self.register_buffer('pe', pe)

    def forward(self, x):
        x = x + self.pe[:, :x.size(1)]
        return x

# Transformer Model Definition
class TransformerModel(nn.Module):
    def __init__(self, input_size, d_model=64, nhead=4, num_layers=2, dim_feedforward=128):
        super(TransformerModel, self).__init__()
        self.input_proj = nn.Linear(input_size, d_model)
        self.pos_encoder = PositionalEncoding(d_model)
        encoder_layers = nn.TransformerEncoderLayer(d_model, nhead, dim_feedforward)
        self.transformer_encoder = nn.TransformerEncoder(encoder_layers, num_layers)
        self.decoder = nn.Linear(d_model, input_size)

    def forward(self, src):
        src = self.input_proj(src)
        src = self.pos_encoder(src)
        output = self.transformer_encoder(src)
        return self.decoder(output[:, -1, :])

# Initialize and train Transformer
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = TransformerModel(input_size=X_train.shape[2]).to(device)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

for epoch in range(5):  # Keep epochs small for notebook demo
    model.train()
    total_loss = 0
    for xb, yb in train_loader:
        xb, yb = xb.to(device), yb.to(device)
        optimizer.zero_grad()
        output = model(xb)
        loss = criterion(output, yb)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"Epoch {epoch+1}, Loss: {total_loss/len(train_loader):.4f}")


# ---------------- TRANSFORMER OUTPUT GENERATION ----------------

model.eval()
predictions = []
actuals = []

with torch.no_grad():
    for xb, yb in test_loader:
        xb = xb.to(device)
        output = model(xb)
        predictions.append(output.cpu().numpy())
        actuals.append(yb.numpy())

# Combine predictions and actuals
predictions = np.concatenate(predictions, axis=0)
actuals = np.concatenate(actuals, axis=0)

# Rescale back to original values
predictions_rescaled = scaler.inverse_transform(predictions)
actuals_rescaled = scaler.inverse_transform(actuals)

# Plot actual vs predicted
import matplotlib.pyplot as plt

plt.figure(figsize=(12, 6))
plt.plot(actuals_rescaled[:, 0], label='Actual CPU')
plt.plot(predictions_rescaled[:, 0], label='Predicted CPU')
plt.title('Transformer Model Forecast vs Actual')
plt.xlabel('Time Step')
plt.ylabel('CPU Usage')
plt.legend()
plt.grid(True)
plt.show()


Epoch 1, Loss: 0.0838
Epoch 2, Loss: 0.0835
Epoch 3, Loss: 0.0834
Epoch 4, Loss: 0.0834


In [None]:

# ---------------- TRANSFORMER MODEL EVALUATION ----------------

model.eval()
with torch.no_grad():
    preds, actuals = [], []
    for xb, yb in test_loader:
        xb = xb.to(device)
        output = model(xb)
        preds.append(output.cpu().numpy())
        actuals.append(yb.numpy())

preds = np.concatenate(preds, axis=0)
actuals = np.concatenate(actuals, axis=0)

# Inverse transform predictions and actuals for interpretability
preds_rescaled = scaler.inverse_transform(preds)
actuals_rescaled = scaler.inverse_transform(actuals)

import matplotlib.pyplot as plt

plt.figure(figsize=(12, 6))
plt.plot(preds_rescaled[:, 0], label='Predicted CPU')
plt.plot(actuals_rescaled[:, 0], label='Actual CPU')
plt.title("Transformer Model - CPU Usage Prediction")
plt.xlabel("Time Step")
plt.ylabel("CPU Usage")
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()


# ---------------- EXPORT TRANSFORMER OUTPUT TO EXCEL ----------------

# Prepare dataframe for Transformer predictions
transformer_df = pd.DataFrame({
    'Transformer_Actual_CPU': actuals_rescaled[:, 0],
    'Transformer_Predicted_CPU': predictions_rescaled[:, 0],
    'Transformer_Actual_RAM': actuals_rescaled[:, 1],
    'Transformer_Predicted_RAM': predictions_rescaled[:, 1],
    'Transformer_Actual_Disk_IO': actuals_rescaled[:, 2],
    'Transformer_Predicted_Disk_IO': predictions_rescaled[:, 2],
    'Transformer_Actual_Power': actuals_rescaled[:, 3],
    'Transformer_Predicted_Power': predictions_rescaled[:, 3],
})

# Check if previous file exists with LSTM and CNN-LSTM output
output_file_path = "forecast_results.xlsx"
from openpyxl import load_workbook

try:
    # Try appending to existing Excel workbook
    with pd.ExcelWriter(output_file_path, engine='openpyxl', mode='a', if_sheet_exists='replace') as writer:
        transformer_df.to_excel(writer, index=False, sheet_name='Transformer_Output')
    print("Transformer output added to existing Excel file.")
except FileNotFoundError:
    # If file not found, create new workbook
    with pd.ExcelWriter(output_file_path, engine='openpyxl') as writer:
        transformer_df.to_excel(writer, index=False, sheet_name='Transformer_Output')
    print("New Excel file created with Transformer output.")
