In [1]:
# %%
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
import torch.optim as optim
import gym
from gym import spaces
from collections import deque
import random

# %%
# Load dataset
df = pd.read_csv("metrics-with-output.csv")

# %%
# Preprocessing
df['cpu_usage'] = df['cpu_usage'].str.rstrip('%').astype(float) / 100.0
df['memory_usage'] = df['memory_usage'].str.rstrip('%').astype(float) / 100.0

def clean_bandwidth(value):
    if 'GB/s' in value:
        return float(value.replace('GB/s', ''))
    elif 'MB/s' in value:
        return float(value.replace('MB/s', '')) / 1024
    else:
        return float(value)

df['bandwidth_inbound'] = df['bandwidth_inbound'].apply(clean_bandwidth)
df['bandwidth_outbound'] = df['bandwidth_outbound'].apply(clean_bandwidth)
df['tps'] = df['tps'].str.rstrip(' req/s').astype(float)
df['tps_error'] = df['tps_error'].str.rstrip(' req/s').astype(float)
df['response_time'] = df['response_time'].replace({' ms': '*1e-3', ' s': '*1'}, regex=True).map(pd.eval).astype(float)

# Encode Status
from sklearn.preprocessing import LabelEncoder
label_encoder = LabelEncoder()
df['Status'] = label_encoder.fit_transform(df['Status'])

# %%
# Pembersihan dan penghapusan data yang tidak dipakai
dataset = df.drop(df.columns[[0, 1]], axis=1)

# %%
# Split data into features and target
X = dataset.drop('Status', axis=1)
y = dataset['Status']

# Normalize the data
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

# Split into train and test
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# %%
# DQN Network
class QNetwork(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(QNetwork, self).__init__()
        self.fc1 = nn.Linear(input_dim, 128)
        self.fc2 = nn.Linear(128, 128)
        self.fc3 = nn.Linear(128, output_dim)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        return self.fc3(x)

# %%
class DQNAgent:
    def __init__(self, state_dim, action_dim, lr=0.001, gamma=0.99, epsilon=1.0, epsilon_decay=0.995, epsilon_min=0.01, batch_size=64):
        self.state_dim = state_dim
        self.action_dim = action_dim
        self.lr = lr
        self.gamma = gamma
        self.epsilon = epsilon
        self.epsilon_decay = epsilon_decay
        self.epsilon_min = epsilon_min
        self.batch_size = batch_size
        self.memory = deque(maxlen=10000)

        self.q_network = QNetwork(state_dim, action_dim).to(device)
        self.target_network = QNetwork(state_dim, action_dim).to(device)
        self.optimizer = optim.Adam(self.q_network.parameters(), lr=lr)
        self.loss_fn = nn.MSELoss()
        
        # Copy the weights to the target network
        self.target_network.load_state_dict(self.q_network.state_dict())

    def select_action(self, state):
        if np.random.rand() < self.epsilon:
            return random.choice(range(self.action_dim))
        state = torch.FloatTensor(state).unsqueeze(0).to(device)
        q_values = self.q_network(state)
        return torch.argmax(q_values).item()

    def store_transition(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def replay(self):
        if len(self.memory) < self.batch_size:
            return

        batch = random.sample(self.memory, self.batch_size)
        states, actions, rewards, next_states, dones = zip(*batch)

        states = torch.FloatTensor(states).to(device)
        actions = torch.LongTensor(actions).unsqueeze(1).to(device)
        rewards = torch.FloatTensor(rewards).to(device)
        next_states = torch.FloatTensor(next_states).to(device)
        dones = torch.FloatTensor(dones).to(device)

        q_values = self.q_network(states).gather(1, actions).squeeze()
        with torch.no_grad():
            max_next_q_values = self.target_network(next_states).max(1)[0]
            target_q_values = rewards + self.gamma * max_next_q_values * (1 - dones)

        loss = self.loss_fn(q_values, target_q_values)

        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()

        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

    def update_target_network(self):
        self.target_network.load_state_dict(self.q_network.state_dict())

# %%
# Custom Environment
class CustomEnv(gym.Env):
    def __init__(self):
        super(CustomEnv, self).__init__()

        # Load data from the dataset
        self.df = dataset

        # Define action and observation space
        self.action_space = spaces.Discrete(2)
        self.observation_space = spaces.Box(low=0, high=1, shape=(7,), dtype=np.float64)

        self.current_step = 0

    def reset(self):
        self.current_step = 0
        return self._get_observation()
    
    def step(self, action):
        self.current_step += 1
        done = self.current_step >= len(self.df) - 1

        reward = self._get_reward(action)
        next_state = self._get_observation()
        
        return next_state, reward, done, {}
    
    def _get_observation(self):
        return self.df.iloc[self.current_step][['cpu_usage', 'memory_usage', 'bandwidth_inbound', 'bandwidth_outbound', 'tps', 'tps_error', 'response_time']].values
    
    def _get_reward(self, action):
        if action == self.df.iloc[self.current_step]['Status']:
            return 1.0
        else:
            return -1.0

# %%
# Training DQN
env = CustomEnv()
state_dim = env.observation_space.shape[0]
action_dim = env.action_space.n
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

dqn_agent = DQNAgent(state_dim, action_dim)

num_episodes = 1000
update_target_steps = 10
episode_rewards = []

for episode in range(num_episodes):
    state = env.reset()
    total_reward = 0
    done = False

    while not done:
        action = dqn_agent.select_action(state)
        next_state, reward, done, _ = env.step(action)
        dqn_agent.store_transition(state, action, reward, next_state, done)
        state = next_state
        total_reward += reward

        dqn_agent.replay()

    dqn_agent.update_target_network()

    episode_rewards.append(total_reward)
    if episode % update_target_steps == 0:
        print(f"Episode {episode}: Total Reward: {total_reward}")

# %%
# Plotting rewards
plt.plot(range(num_episodes), episode_rewards)
plt.xlabel('Episode')
plt.ylabel('Total Reward')
plt.title('Training DQN')
plt.show()

# %%
# Evaluating DQN
def evaluate_dqn(agent, env, n_eval_episodes=10):
    rewards = []
    for episode in range(n_eval_episodes):
        state = env.reset()
        total_reward = 0
        done = False
        while not done:
            action = agent.select_action(state)
            state, reward, done, _ = env.step(action)
            total_reward += reward
        rewards.append(total_reward)
    return np.mean(rewards), np.std(rewards)

mean_reward, std_reward = evaluate_dqn(dqn_agent, env)
print(f"Mean Reward: {mean_reward} +/- {std_reward}")


  states = torch.FloatTensor(states).to(device)


Episode 0: Total Reward: 5292.0
Episode 10: Total Reward: 6006.0
Episode 20: Total Reward: 6120.0
Episode 30: Total Reward: 6190.0
Episode 40: Total Reward: 6316.0
Episode 50: Total Reward: 6270.0
Episode 60: Total Reward: 6366.0
Episode 70: Total Reward: 6316.0
Episode 80: Total Reward: 6432.0
Episode 90: Total Reward: 6304.0
Episode 100: Total Reward: 6260.0
Episode 110: Total Reward: 6244.0
Episode 120: Total Reward: 6236.0
Episode 130: Total Reward: 6250.0
Episode 140: Total Reward: 6256.0
Episode 150: Total Reward: 6354.0
Episode 160: Total Reward: 6248.0
Episode 170: Total Reward: 6318.0
Episode 180: Total Reward: 6356.0
Episode 190: Total Reward: 6294.0
Episode 200: Total Reward: 6292.0
Episode 210: Total Reward: 6284.0
Episode 220: Total Reward: 6258.0
Episode 230: Total Reward: 6306.0
Episode 240: Total Reward: 6290.0
Episode 250: Total Reward: 6232.0
Episode 260: Total Reward: 6236.0
