In [3]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import random
from collections import deque
from sklearn.metrics import mean_absolute_error, mean_squared_error

# Define RL Environment
class ResourceScalingEnv:
    def __init__(self):
        self.state_size = 3  # EC2, RDS, ECS predicted usage
        self.action_size = 3  # Scale up, scale down, no action
        self.state = np.zeros(self.state_size)
        self.reward = 0
    
    def reset(self):
        self.state = np.random.rand(self.state_size)  # Start with random usage
        return self.state
    
    def step(self, action):
        # Simulate impact of action
        if action == 0:  # Scale Up
            self.state += np.random.uniform(0.01, 0.05, self.state_size)
            self.reward = -self.state.sum()  # More usage, more cost
        elif action == 1:  # Scale Down
            self.state -= np.random.uniform(0.01, 0.05, self.state_size)
            self.reward = self.state.sum()  # Less cost, but risk of under-scaling
        else:  # No Action
            self.reward = -abs(self.state.sum() - 0.5)  # Penalty for over/under allocation
        
        self.state = np.clip(self.state, 0, 1)  # Ensure valid range
        return self.state, self.reward

# Define Deep Q-Network
class DQN(nn.Module):
    def __init__(self, state_size, action_size):
        super(DQN, self).__init__()
        self.fc1 = nn.Linear(state_size, 64)
        self.fc2 = nn.Linear(64, 64)
        self.fc3 = nn.Linear(64, action_size)
    
    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        return self.fc3(x)

# Train DQN Agent
class DQNAgent:
    def __init__(self, state_size, action_size):
        self.state_size = state_size
        self.action_size = action_size
        self.memory = deque(maxlen=2000)
        self.gamma = 0.95  # Discount factor
        self.epsilon = 1.0  # Exploration-exploitation balance
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.995
        self.learning_rate = 0.001
        self.model = DQN(state_size, action_size)
        self.optimizer = optim.Adam(self.model.parameters(), lr=self.learning_rate)
        self.criterion = nn.MSELoss()
    
    def remember(self, state, action, reward, next_state):
        self.memory.append((state, action, reward, next_state))
    
    def act(self, state):
        if np.random.rand() <= self.epsilon:
            return random.choice([0, 1, 2])
        state_tensor = torch.FloatTensor(state).float()
        with torch.no_grad():
            action_values = self.model(state_tensor)
        return torch.argmax(action_values).item()
    
    def replay(self, batch_size):
        if len(self.memory) < batch_size:
            return
        minibatch = random.sample(self.memory, batch_size)
        for state, action, reward, next_state in minibatch:
            target = reward + self.gamma * torch.max(self.model(torch.FloatTensor(next_state).float())).item()
            predicted_target = self.model(torch.FloatTensor(state))[action]
            loss = self.criterion(predicted_target, torch.tensor(target).float())
            self.optimizer.zero_grad()
            loss.backward()
            self.optimizer.step()
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

# Main Training Loop
def train_rl_agent(episodes=500):
    env = ResourceScalingEnv()
    agent = DQNAgent(env.state_size, env.action_size)
    for episode in range(episodes):
        state = env.reset()
        total_reward = 0
        for _ in range(10):  # Simulate 10 steps per episode
            action = agent.act(state)
            next_state, reward = env.step(action)
            agent.remember(state, action, reward, next_state)
            state = next_state
            total_reward += reward
        agent.replay(32)
        print(f"Episode {episode+1}/{episodes}, Total Reward: {total_reward:.2f}")
    torch.save(agent.model.state_dict(), "dqn_scaling_model.pth")
    print("DQN model saved!")

# Evaluate RL Model
def evaluate_rl_agent(episodes=100):
    env = ResourceScalingEnv()
    agent = DQNAgent(env.state_size, env.action_size)
    agent.model.load_state_dict(torch.load("dqn_scaling_model.pth"))
    agent.model.eval()
    
    total_rewards = []
    action_counts = {0: 0, 1: 0, 2: 0}
    
    for episode in range(episodes):
        state = env.reset()
        total_reward = 0
        for _ in range(10):
            action = agent.act(state)
            next_state, reward = env.step(action)
            total_reward += reward
            action_counts[action] += 1
            state = next_state
        total_rewards.append(total_reward)
    
    avg_reward = np.mean(total_rewards)
    action_distribution = {k: v / sum(action_counts.values()) for k, v in action_counts.items()}
    print(f"Average Reward: {avg_reward:.4f}")
    print(f"Action Distribution: {action_distribution}")
    return avg_reward, action_distribution

# Run training
train_rl_agent()

# Evaluate model
evaluate_rl_agent()


Episode 1/500, Total Reward: -7.55
Episode 2/500, Total Reward: 0.68
Episode 3/500, Total Reward: -5.28
Episode 4/500, Total Reward: -11.52
Episode 5/500, Total Reward: -9.99
Episode 6/500, Total Reward: 2.15
Episode 7/500, Total Reward: 1.08
Episode 8/500, Total Reward: -10.65
Episode 9/500, Total Reward: 0.75
Episode 10/500, Total Reward: -2.30
Episode 11/500, Total Reward: 5.30
Episode 12/500, Total Reward: -5.92
Episode 13/500, Total Reward: -3.04
Episode 14/500, Total Reward: -4.19
Episode 15/500, Total Reward: 6.88
Episode 16/500, Total Reward: -7.13
Episode 17/500, Total Reward: -1.49
Episode 18/500, Total Reward: 1.69
Episode 19/500, Total Reward: -3.39
Episode 20/500, Total Reward: -3.88
Episode 21/500, Total Reward: -6.12
Episode 22/500, Total Reward: -0.12
Episode 23/500, Total Reward: -0.82
Episode 24/500, Total Reward: -1.80
Episode 25/500, Total Reward: -8.95
Episode 26/500, Total Reward: -1.54
Episode 27/500, Total Reward: -1.15
Episode 28/500, Total Reward: -1.08
Episod

(-4.454542870408306, {0: 0.334, 1: 0.322, 2: 0.344})

In [None]:
def test_rl_agent(episodes=100):
    env = ResourceScalingEnv()
    agent = DQNAgent(env.state_size, env.action_size)
    agent.model.load_state_dict(torch.load("dqn_scaling_model.pth"))
    agent.model.eval()  # Set model to evaluation mode

    total_rewards = []
    action_counts = {0: 0, 1: 0, 2: 0}  # Track action distribution

    for episode in range(episodes):
        state = env.reset()
        total_reward = 0

        for _ in range(10):  # Simulate 10 steps per episode
            action = agent.act(state)
            next_state, reward = env.step(action)

            total_reward += reward
            action_counts[action] += 1  # Track action usage
            state = next_state  # Update state

        total_rewards.append(total_reward)
    
    avg_reward = np.mean(total_rewards)
    action_distribution = {k: v / sum(action_counts.values()) for k, v in action_counts.items()}

    print(f"Average Reward: {avg_reward:.4f}")
    print(f"Action Distribution: {action_distribution}")

    return avg_reward, action_distribution

# Run the test function
test_rl_agent()
