# Import necessary libraries

In [10]:

import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, BatchNormalization, Dropout
from tensorflow.keras.optimizers import Adam
from collections import deque
import random
import shutil
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping

1. Preprocessing

In [11]:
# Load the dataset
data_path = 'merged_cloud_metrics.csv'
df = pd.read_csv(data_path)

# Define warmup steps (initial steps to skip)
warmup_steps = 24
current_step = warmup_steps

# Define action and observation spaces
n_actions_per_resource = 3
n_resources = 3
action_space_size = n_actions_per_resource ** n_resources
observation_space_shape = (6,)  # 6 metrics: EC2_CPU, EC2_Memory, RDS_CPU, RDS_Memory, ECS_CPU, ECS_Memory

2. Environment Setup

In [12]:
# Function to get the current state
def get_state(data, step):
    current_data = data.iloc[step]
    return np.array([
        current_data['EC2_CPUUtilization'],
        current_data['EC2_MemoryUtilization'],
        current_data['RDS_CPUUtilization'],
        current_data['RDS_FreeableMemory'],
        current_data['ECS_CPUUtilization'],
        current_data['ECS_MemoryUtilization']
    ], dtype=np.float32)

# Function to decode action index into scaling actions
def decode_action(action_idx):
    actions = []
    temp_idx = action_idx
    for _ in range(n_resources):
        resource_action = temp_idx % n_actions_per_resource
        temp_idx //= n_actions_per_resource
        actions.append(resource_action - 1)  # Convert to [-1, 0, 1]
    return np.array(actions, dtype=np.float32)

# Function to calculate reward
def calculate_reward(data, step, action):
    current_data = data.iloc[step]
    cpu_penalty = -abs(current_data['EC2_CPUUtilization'] - 70)
    memory_penalty = -abs(current_data['EC2_MemoryUtilization'] - 70)
    scaling_penalty = -np.sum(np.abs(action)) * 10
    performance_reward = 100 if (30 <= current_data['EC2_CPUUtilization'] <= 80) else -50
    return cpu_penalty + memory_penalty + scaling_penalty + performance_reward

3. DQN Agent Setup

In [13]:
# Hyperparameters
state_size = 6
action_size = 27  # 3 actions per resource, 3 resources
memory = deque(maxlen=2000)
gamma = 0.95  # Discount rate
epsilon = 1.0  # Exploration rate
epsilon_min = 0.01
epsilon_decay = 0.995
learning_rate = 0.001
batch_size = 32
target_update_frequency = 10  # Update target model every 10 episodes

# Build the DQN model
model = Sequential([
    Dense(64, input_dim=state_size, activation='relu'),
    BatchNormalization(),
    Dropout(0.2),
    Dense(64, activation='relu'),
    BatchNormalization(),
    Dropout(0.2),
    Dense(action_size, activation='linear')
])
model.compile(loss='mse', optimizer=Adam(learning_rate=learning_rate))

# Target model (for stability)
target_model = Sequential([
    Dense(64, input_dim=state_size, activation='relu'),
    BatchNormalization(),
    Dropout(0.2),
    Dense(64, activation='relu'),
    BatchNormalization(),
    Dropout(0.2),
    Dense(action_size, activation='linear')
])
target_model.set_weights(model.get_weights())

# Learning rate scheduler
reduce_lr = ReduceLROnPlateau(monitor='loss', factor=0.2, patience=5, min_lr=0.0001)

# Early stopping
early_stopping = EarlyStopping(monitor='loss', patience=10, restore_best_weights=True)

# Function to update the target model
def update_target_model():
    target_model.set_weights(model.get_weights())

# Function to choose an action
def choose_action(state):
    if np.random.rand() <= epsilon:
        return np.random.randint(action_size)
    act_values = model.predict(state, verbose=0)
    return int(np.argmax(act_values[0]))

# Function to replay experiences with prioritized experience replay
def replay_experiences():
    if len(memory) < batch_size:
        return
    minibatch = random.sample(memory, batch_size)
    states = np.zeros((batch_size, state_size))
    targets = np.zeros((batch_size, action_size))

    for i, (state, action, reward, next_state, done) in enumerate(minibatch):
        target = reward if done else reward + gamma * np.amax(target_model.predict(next_state, verbose=0)[0])
        target_f = model.predict(state, verbose=0)[0]
        target_f[action] = target
        states[i] = state[0]
        targets[i] = target_f

    model.fit(states, targets, epochs=1, verbose=0, batch_size=batch_size, callbacks=[reduce_lr, early_stopping])

    # Decay epsilon
    global epsilon
    if epsilon > epsilon_min:
        epsilon *= epsilon_decay

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


4. Training Loop

In [14]:
# Training loop
episodes = 1000
for episode in range(episodes):
    state = get_state(df, current_step)
    state = np.reshape(state, [1, state_size])
    total_reward = 0

    for time in range(len(df) - warmup_steps - 1):
        action_idx = choose_action(state)
        action = decode_action(action_idx)
        next_state = get_state(df, current_step + 1)
        next_state = np.reshape(next_state, [1, state_size])
        reward = calculate_reward(df, current_step, action)
        done = current_step >= len(df) - 2

        memory.append((state, action_idx, reward, next_state, done))
        state = next_state
        total_reward += reward
        current_step += 1

        if done:
            break

        replay_experiences()

    if episode % target_update_frequency == 0:
        update_target_model()

    print(f"Episode: {episode + 1}/{episodes}, Total Reward: {total_reward}")

KeyboardInterrupt: 

5. Save the Model

In [None]:
# Save the trained model
model.save('cloud_rl_model.h5')

# Move the model to the desired directory
# shutil.move('cloud_rl_model.h5', '/content/drive/MyDrive/FYPDataset/')
# print("Model saved and moved to the desired directory.")

6. Test accuracy

In [None]:
import matplotlib.pyplot as plt

# Load the trained model
model = tf.keras.models.load_model('cloud_rl_model.h5')

# Reset environment
current_step = warmup_steps
state = get_state(df, current_step)
state = np.reshape(state, [1, state_size])

# Lists to store results
actual_actions = []
predicted_actions = []
rewards = []
cpu_utilization = []
memory_utilization = []

# Simulate the environment using the trained model
for time in range(len(df) - warmup_steps - 1):
    # Get the actual action (ground truth, if available)
    # For demonstration, assume the optimal action is to do nothing (action index = 1)
    actual_action = 1  # Replace with actual ground truth if available
    actual_actions.append(actual_action)

    # Predict action using the trained model
    action_idx = np.argmax(model.predict(state, verbose=0)[0])
    predicted_actions.append(action_idx)

    # Decode the predicted action
    action = decode_action(action_idx)

    # Take the action and get the next state
    next_state = get_state(df, current_step + 1)
    next_state = np.reshape(next_state, [1, state_size])
    reward = calculate_reward(df, current_step, action)

    # Store results
    rewards.append(reward)
    cpu_utilization.append(df.iloc[current_step]['EC2_CPUUtilization'])
    memory_utilization.append(df.iloc[current_step]['EC2_MemoryUtilization'])

    # Update state and step
    state = next_state
    current_step += 1

    # Stop if done
    if current_step >= len(df) - 2:
        break

# Calculate accuracy (if ground truth is available)
if len(actual_actions) > 0:
    accuracy = np.mean(np.array(actual_actions) == np.array(predicted_actions))
    print(f"Accuracy: {accuracy * 100:.2f}%")

# Plot results
plt.figure(figsize=(14, 10))

# Plot Actual vs. Predicted Actions
plt.subplot(3, 1, 1)
plt.plot(actual_actions, label='Actual Actions', marker='o')
plt.plot(predicted_actions, label='Predicted Actions', marker='x')
plt.title('Actual vs. Predicted Actions')
plt.xlabel('Time Step')
plt.ylabel('Action')
plt.legend()

# Plot CPU Utilization
plt.subplot(3, 1, 2)
plt.plot(cpu_utilization, label='CPU Utilization', color='orange')
plt.title('CPU Utilization Over Time')
plt.xlabel('Time Step')
plt.ylabel('CPU Utilization (%)')
plt.legend()

# Plot Memory Utilization
plt.subplot(3, 1, 3)
plt.plot(memory_utilization, label='Memory Utilization', color='green')
plt.title('Memory Utilization Over Time')
plt.xlabel('Time Step')
plt.ylabel('Memory Utilization (%)')
plt.legend()

plt.tight_layout()
plt.show()

# Print average reward
print(f"Average Reward: {np.mean(rewards):.2f}")