In [4]:
import pandas as pd
import numpy as np
import glob
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import uuid

# Set random seed for reproducibility
torch.manual_seed(42)

# Optimize for CPU: Use all available CPU cores
torch.set_num_threads(torch.get_num_threads())

# Step 1: Load and Merge All CSV Files
file_list = glob.glob("./fastStorage/2013-8/*")
df = pd.concat((pd.read_csv(f, delimiter=";", skipinitialspace=True) for f in file_list[:10]), ignore_index=True)
df.columns = df.columns.str.strip()

print(df.shape)

# Step 2: Convert Timestamp to Datetime & Sort
df["Timestamp [ms]"] = pd.to_datetime(df["Timestamp [ms]"], unit='ms')
df = df.sort_values("Timestamp [ms]").reset_index(drop=True)

# Optional: Resample to 1-second intervals (helps if timestamps are irregular)
df.set_index("Timestamp [ms]", inplace=True)
df = df.resample("1s").mean().interpolate()
df.reset_index(inplace=True)

print(df.shape)

# Step 3: Feature Engineering
df["CPU Utilization Per Core"] = df["CPU usage [MHZ]"] / df["CPU capacity provisioned [MHZ]"]
df["Memory Utilization [%]"] = df["Memory usage [KB]"] / df["Memory capacity provisioned [KB]"]
df["Disk Total Throughput [KB/s]"] = df["Disk read throughput [KB/s]"] + df["Disk write throughput [KB/s]"]
df["Network Total Throughput [KB/s]"] = df["Network received throughput [KB/s]"] + df["Network transmitted throughput [KB/s]"]

# Step 4: Select Features
features = [
    "CPU cores", "CPU capacity provisioned [MHZ]", "CPU usage [MHZ]", "CPU usage [%]",
    "Memory capacity provisioned [KB]", "Memory usage [KB]", "Disk read throughput [KB/s]",
    "Disk write throughput [KB/s]", "Network received throughput [KB/s]", "Network transmitted throughput [KB/s]",
    "CPU Utilization Per Core", "Memory Utilization [%]", "Disk Total Throughput [KB/s]", "Network Total Throughput [KB/s]"
]
df.replace([np.inf, -np.inf], np.nan, inplace=True)
df.dropna(inplace=True)

# Step 5: Normalize Data
scaler = MinMaxScaler()
df[features] = scaler.fit_transform(df[features])

print(df.shape)

# Step 6: Create Sequences
def create_sequences(data, seq_length=5):
    sequences, targets = [], []
    for i in range(len(data) - seq_length):
        sequences.append(data[i:i+seq_length])
        targets.append(data[i+seq_length])
    return np.array(sequences), np.array(targets)

seq_length = 5
X, y = create_sequences(df[features].values, seq_length)

print(X.shape, y.shape)

# Step 7: Temporal Train-Test-Validation Split
split_index = int(len(X) * 0.8)
val_index = int(len(X) * 0.64)
X_train, X_val, X_test = X[:val_index], X[val_index:split_index], X[split_index:]
y_train, y_val, y_test = y[:val_index], y[val_index:split_index], y[split_index:]

print(X_train.shape, X_val.shape, X_test.shape)
print(y_train.shape, y_val.shape, y_test.shape)

# Step 8: Define PyTorch Dataset
class TimeSeriesDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X, dtype=torch.float32)
        self.y = torch.tensor(y, dtype=torch.float32)
    
    def __len__(self):
        return len(self.X)
    
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

# Create DataLoaders with multiple workers for CPU efficiency
train_dataset = TimeSeriesDataset(X_train, y_train)
val_dataset = TimeSeriesDataset(X_val, y_val)
test_dataset = TimeSeriesDataset(X_test, y_test)

train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True, num_workers=4, pin_memory=True)
val_loader = DataLoader(val_dataset, batch_size=16, shuffle=False, num_workers=4, pin_memory=True)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False, num_workers=4, pin_memory=True)

# Step 9: Define LSTM Model
class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_sizes=[128, 64, 32], output_size=14, dropout=0.3):
        super(LSTMModel, self).__init__()
        self.lstm1 = nn.LSTM(input_size, hidden_sizes[0], batch_first=True)
        self.dropout1 = nn.Dropout(dropout)
        self.lstm2 = nn.LSTM(hidden_sizes[0], hidden_sizes[1], batch_first=True)
        self.dropout2 = nn.Dropout(dropout)
        self.lstm3 = nn.LSTM(hidden_sizes[1], hidden_sizes[2], batch_first=True)
        self.fc = nn.Linear(hidden_sizes[2], output_size)
    
    def forward(self, x):
        out, _ = self.lstm1(x)
        out = self.dropout1(out)
        out, _ = self.lstm2(out)
        out = self.dropout2(out)
        out, _ = self.lstm3(out)
        out = self.fc(out[:, -1, :])
        return out

# Initialize model, loss, and optimizer
device = torch.device("cpu")  # Explicitly use CPU
model = LSTMModel(input_size=X.shape[2], output_size=y.shape[1]).to(device)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.0005)

# Step 10: Training Loop
epochs = 150
train_losses = []
val_losses = []

for epoch in range(epochs):
    model.train()
    train_loss = 0.0
    for X_batch, y_batch in train_loader:
        X_batch, y_batch = X_batch.to(device), y_batch.to(device)
        optimizer.zero_grad()
        outputs = model(X_batch)
        loss = criterion(outputs, y_batch)
        loss.backward()
        optimizer.step()
        train_loss += loss.item() * X_batch.size(0)
    train_loss /= len(train_loader.dataset)
    train_losses.append(train_loss)
    
    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for X_batch, y_batch in val_loader:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)
            outputs = model(X_batch)
            loss = criterion(outputs, y_batch)
            val_loss += loss.item() * X_batch.size(0)
    val_loss /= len(val_loader.dataset)
    val_losses.append(val_loss)
    
    print(f"Epoch {epoch+1}/{epochs}, Train Loss: {train_loss:.6f}, Val Loss: {val_loss:.6f}")

# Save model
torch.save(model.state_dict(), "enhanced_lstm_model_full_dataset.pth")

# Step 11: Predictions
model.eval()
y_pred = []
with torch.no_grad():
    for X_batch, _ in test_loader:
        X_batch = X_batch.to(device)
        outputs = model(X_batch)
        y_pred.append(outputs.cpu().numpy())
y_pred = np.concatenate(y_pred, axis=0)

# --- LOSS CURVES ---
plt.figure(figsize=(10, 5))
plt.plot(train_losses, label='Training Loss')
plt.plot(val_losses, label='Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Model Loss Over Epochs')
plt.legend()
plt.grid()
plt.savefig('loss_curves.png')
plt.close()

# --- ACTUAL vs PREDICTED VISUALIZATION ---
plt.figure(figsize=(18, 14))
for i in range(len(features)):
    plt.subplot(5, 3, i+1)
    plt.plot(y_test[:, i][::10], label=f"Actual {features[i]}")
    plt.plot(y_pred[:, i][::10], label=f"Predicted {features[i]}", linestyle="dashed")
    plt.xlabel("Time")
    plt.ylabel(features[i])
    plt.legend()
    plt.title(f"LSTM Prediction vs Actual - {features[i]}")
plt.tight_layout()
plt.savefig('actual_vs_predicted.png')
plt.close()

# --- ERROR DISTRIBUTION ---
errors = y_test - y_pred
plt.figure(figsize=(15, 6))
sns.boxplot(data=errors)
plt.xticks(range(len(features)), features, rotation=45, ha="right")
plt.title("Prediction Error Distribution Across Features")
plt.ylabel("Prediction Error")
plt.grid()
plt.savefig('error_distribution.png')
plt.close()

# --- CPU Usage [%] Focused Plot ---
plt.figure(figsize=(10, 5))
plt.plot(y_test[:, 3], label="Actual CPU Usage [%]")
plt.plot(y_pred[:, 3], label="Predicted CPU Usage [%]", linestyle="dashed")
plt.xlabel("Time")
plt.ylabel("CPU Usage [%]")
plt.legend()
plt.title("LSTM Prediction vs Actual (CPU Usage [%])")
plt.savefig('cpu_usage_focused.png')
plt.close()

(97575, 11)
(2593, 11)
(2593, 15)
(2588, 5, 14) (2588, 14)
(1656, 5, 14) (414, 5, 14) (518, 5, 14)
(1656, 14) (414, 14) (518, 14)
Epoch 1/150, Train Loss: 0.069421, Val Loss: 0.027100
Epoch 2/150, Train Loss: 0.019208, Val Loss: 0.012914
Epoch 3/150, Train Loss: 0.014434, Val Loss: 0.011255
Epoch 4/150, Train Loss: 0.012506, Val Loss: 0.009658
Epoch 5/150, Train Loss: 0.011601, Val Loss: 0.009119
Epoch 6/150, Train Loss: 0.010786, Val Loss: 0.008532
Epoch 7/150, Train Loss: 0.010511, Val Loss: 0.008608
Epoch 8/150, Train Loss: 0.009481, Val Loss: 0.007564
Epoch 9/150, Train Loss: 0.009172, Val Loss: 0.007517
Epoch 10/150, Train Loss: 0.008568, Val Loss: 0.007242
Epoch 11/150, Train Loss: 0.008363, Val Loss: 0.007143
Epoch 12/150, Train Loss: 0.008142, Val Loss: 0.006898
Epoch 13/150, Train Loss: 0.007841, Val Loss: 0.006747
Epoch 14/150, Train Loss: 0.007678, Val Loss: 0.006950
Epoch 15/150, Train Loss: 0.007699, Val Loss: 0.007024
Epoch 16/150, Train Loss: 0.007532, Val Loss: 0.007023