In [1]:
import numpy as np
import random
from scipy.special import erfcinv

import tensorflow as tf
from tensorflow.keras import models, layers, optimizers
from collections import deque

import os
import matplotlib.pyplot as plt

In [13]:
# Edge Computing Environment
class EdgeComputingEnvironment:
    def __init__(self, M=15, area_size=100, D_m=1354, eta_m_range=(100, 300), F_max_ue=1.5, P_max=23, B=5e6, T_max=10e-3, F_max_es=30, S_max_es=60, epsilon=10e-7, E_max=3, theta=1e-26, L=8, phi=0.02e-3, N0=-174, f_es_dev=0.02, f_ue_dev=0.02):
        """
        Initialize the edge computing environment with given parameters.
        """
        self.M = M  # Number of users
        self.area_size = area_size  # Size of the area in which users are distributed
        self.D_m = D_m  # Task data size
        self.eta_m_range = eta_m_range  # Range of computation requirements
        self.F_max_ue = F_max_ue * 1e9  # Maximum frequency of user equipment
        self.P_max = 10 ** (P_max / 10)  # Convert maximum transmission power from dB to Watts
        self.B = B  # Bandwidth
        self.T_max = T_max  # Maximum tolerable delay
        self.F_max_es = F_max_es * 1e9  # Maximum frequency of edge server
        self.S_max_es = S_max_es * 1e3  # Maximum storage size of edge server
        self.epsilon = epsilon  # Error tolerance for rate calculation
        self.E_max = E_max * 1e-3  # Maximum energy consumption
        self.theta = theta  # Energy coefficient
        self.L = L  # Number of antennas
        self.phi = phi  # Transmission time interval
        self.R_min = 1e6  # Minimum data rate
        self.N0 = N0  # Noise power in dBm
        self.N0 = 10 ** ((N0 - 30) / 10)  # Convert noise power from dBm/Hz to Watts/Hz
        self.PL_d = lambda d: -35.3 - (37.6 * np.log10(d))  # Path loss model
        self.f_es_dev = f_es_dev  #The deviation between the estimated value and the actual value of the processing rate of the ES
        self.f_ue_dev = f_ue_dev  #The deviation between the estimated value and the actual value of the processing rate of the UE
        self.penalty = 0.25
        self.treshhold = -0.5
        self.penalties = [0,0,0,0,0]

        self.user_device_params = []  # List to store parameters for each user device
        self.initialize_user_device_params()  # Initialize user device parameters

        self.server_params = self.initialize_server_params()  # Initialize server parameters

        self.cache = []  # Cache to store tasks
        self.current_cache_size = 0  # Current size of the cache
        self.transmitting_tasks = []  # List to store transmitting tasks
        self.processing_tasks = []  # List to store processing tasks
        self.current_time = 0  # Current simulation time

        # Initialize bandwidth and computation attributes
        self.total_bandwidth = 0 # Initialize total bandwidth
        self.total_computation = 0 # Initialize total computation

    def initialize_user_device_params(self):
        """
        Initialize parameters for each user device.
        Randomly generates user-specific parameters such as path loss.
        """
        for device_id in range(self.M):
            d = np.random.uniform(1, self.area_size / 2)  # Distance to server
            PL_dB = self.PL_d(d)
            g_m = 10 ** (PL_dB / 10)  # Convert path loss from dB to linear scale
            h_bar = np.random.randn(1, self.L) + 1j * np.random.randn(1, self.L)  # Channel gain

            self.user_device_params.append({
                'device_id': device_id,  # Assign a unique ID to each device
                'd': d,
                'g_m': g_m,
                'h_bar': h_bar,
            })

    def initialize_server_params(self):
        """
        Initialize parameters for the edge server.
        """
        return {
            'S_max_es': self.S_max_es  # Maximum storage size
        }

    def calculate_gamma_m(self, b_m, p_m, user_id):
        """
        Calculate the signal-to-noise ratio (SNR) for a given user.

        Parameters:
        - b_m (float): Bandwidth allocation
        - p_m (float): Transmission power
        - user_id (int): ID of the user

        Returns:
        - gamma_m (array): SNR values for the user's communication channel
        """
        h_m = np.sqrt(self.user_device_params[user_id]['g_m']) * self.user_device_params[user_id]['h_bar']  # Channel gain
        gamma_m = (p_m * np.linalg.norm(h_m, axis=1) ** 2) / (b_m * self.B * self.N0)  # SNR
        
        return gamma_m

    def calculate_uplink_rate(self, b_m, p_m, user_id):
        """
        Calculate the uplink data rate for a given user.

        Parameters:
        - b_m (float): Bandwidth allocation
        - p_m (float): Transmission power
        - user_id (int): ID of the user

        Returns:
        - R_m (float): Uplink data rate in bits/second
        """
        gamma_m = self.calculate_gamma_m(b_m, p_m, user_id)  # Calculate the SINR for the m-th user
        V_m = 1 - (1 / (1 + gamma_m) ** 2)  # Intermediate variable for rate calculation
        Q_inv = np.sqrt(2) * erfcinv(2 * self.epsilon)  # Calculate the inverse of the Q-function for the outage probability
        R_m = (self.B / np.log(2)) * ((b_m * np.log(1 + gamma_m)) - ((np.sqrt((b_m * V_m) / (self.phi * self.B))) * Q_inv))  # Uplink data rate

        return R_m

    def calculate_delay(self, alpha_m, cache_hit, b_m, p_m, D_m, f_ue_m, f_es_m, f_ue_est, f_es_est, eta_m, user_id):
        """
        Calculate the end-to-end delay for a given task.

        Parameters:
        - alpha_m (float): Offloading decision
        - cache_hit (int): Split factor (0 or 1)
        - b_m (float): Bandwidth allocation
        - p_m (float): Transmission power
        - D_m (int): Data size
        - f_ue_m (float): Computation capability of the user device
        - f_es_m (float): Computation capability of the edge server
        - f_ue_est (float): Estimation error for the user device's computation capability
        - f_es_est (float): Estimation error for the edge server's computation capability
        - eta_m (float): Computational intensity
        - user_id (int): ID of the user

        Returns:
        - T_e2e (float): End-to-end delay in seconds
        """
        actual_f_ue_m = f_ue_m - f_ue_est  # Actual processing rate of the user device
        actual_f_es_m = f_es_m - f_es_est  # Actual processing rate of the Edge server

        if cache_hit == 1:
            T_es = (eta_m * D_m) / actual_f_es_m  # Only edge server processing delay
            T_e2e = T_es

        else:
            T_ue = (alpha_m * eta_m * D_m) / actual_f_ue_m  # User device processing delay
            R_m = self.calculate_uplink_rate(b_m, p_m, user_id)  # Uplink data rate
            T_tr = (D_m * 8) / R_m  # Transmission delay
            T_es = ((1 - alpha_m) * eta_m * D_m) / actual_f_es_m  # Edge server processing delay
            T_e2e = T_ue + T_tr + T_es  # Total end-to-end delay
        return T_e2e

    def calculate_transmission_delay(self, b_m, p_m, D_m, user_id):
        """
        Calculate the transmission delay for a given task.

        Parameters:
        - b_m (float): Bandwidth allocation
        - p_m (float): Transmission power
        - D_m (int): Data size
        - user_id (int): ID of the user

        Returns:
        - T_co (float): Transmission delay in seconds
        """
        R_m = self.calculate_uplink_rate(b_m, p_m, user_id)  # Calculate uplink data rate
        T_co =  (D_m * 8) / R_m   # Transmission delay calculation based on task size and uplink rate

        return T_co

    def calculate_server_processing_delay(self, alpha_m, cache_hit, D_m, f_es_m, f_es_est, eta_m):
        """
        Calculate the processing delay at the edge server for a given task.

        Parameters:
        - alpha_m (float): Offloading decision
        - D_m (int): Data size
        - cache_hit (0,1): 1 = Exist in cache and 0 not exist in cache
        - f_es_m (float): Computation capability of the edge server
        - f_es_est (float): Estimation error for the edge server's computation capability
        - eta_m (float): Computational intensity

        Returns:
        - T_es (float): Processing delay at the edge server in seconds
        """

        if cache_hit == 0:
            T_es = ((1 - alpha_m) * eta_m * D_m) / (f_es_m - f_es_est)  # Processing delay at the edge server

        else:
            T_es = (eta_m * D_m) / (f_es_m - f_es_est)

        return T_es

    def calculate_energy_consumption(self, s_m, b_m, alpha_m, p_m, D_m, f_ue_m, f_ue_est, eta_m, user_id):
        """
        Calculate the energy consumption for a given task.

        Parameters:
        - alpha_m (float): Offloading decision
        - s_m (int): Split factor (0 or 1)
        - f_ue_m (float): Computation capability of the user device
        - b_m (float): Bandwidth allocation
        - p_m (float): Transmission power
        - f_ue_est (float): Estimation error for the user device's computation capability
        - eta_m (float): Computational intensity

        Returns:
        - E_total (float): Total energy consumption in Joules
        """
        R_m = self.calculate_uplink_rate(b_m, p_m, user_id)  # Calculate uplink data rate

        actual_f_ue_m = f_ue_m - f_ue_est  # Calculate the actual processing rate of the UE

        E_ue = alpha_m * (self.theta / 2) * eta_m * D_m * (actual_f_ue_m ** 2)  # Energy consumption at the user device
        E_tx = ((1 - alpha_m) * (D_m * 8) * p_m) / R_m  # Transmission energy

        if s_m == 1:  # Task is in cache
            E_total = 0  # No energy consumed when task is in cache
        else:
            E_total = E_ue + E_tx  # Total energy consumption

        return E_total

    def manage_cache(self, task_info, task_delay):
        """
        Manage the cache for storing and retrieving tasks.

        Parameters:
        - task_info (tuple): Task parameters to identify the task
        - task_delay (float): Delay of the task

        Returns:
        - bool: True if the task is found in the cache, False otherwise
        """
        if task_delay == 0:
            for task in self.cache:
                if task_info == task[0]:  # Check if the task is already in cache
                    return True
            return False

        task_size = task_info['D_m'] * 8  # Task size
        Server_Max_Capacity = self.server_params['S_max_es']  # Server maximum capacity

        if (task_size + self.current_cache_size) <= Server_Max_Capacity:
            self.cache.append((task_info, task_delay))  # Add task to cache
            self.current_cache_size += task_size  # Update cache size
            return True
        else:
            sorted_cache = sorted(self.cache, key=lambda x: x[1], reverse=True)  # Sort tasks by delay in descending order

            while (task_size + self.current_cache_size) > Server_Max_Capacity:
                if not sorted_cache:
                    break  # Exit loop if sorted_cache is empty
                last_task = sorted_cache.pop()  # Remove the last task from sorted_cache
                self.cache.remove(last_task)  # Remove the task from the cache
                self.current_cache_size -= last_task[0]['D_m'] * 8  # Update current cache size

            self.cache.append((task_info, task_delay))  # Add task to cache
            self.current_cache_size += task_size  # Update cache size

            return True

    def step(self, actions, tasks, users_id):
        """
        Perform a simulation step for the given action.

        Parameters:
        - action (array): Array of action for each user
        - tasks (array): Array of task for each user

        Returns:
        - tuple: (task_rewards, next_state, done)
        """
        # Initialize cumulative metrics for the step
        task_rewards = []  # List to store reward for each task
        state_info = []  # List to store individual task and device state information
        done = False

        for action, task, user_id in zip(actions, tasks, users_id):
            # Determine if the task is a cache hit or miss
            cache_hit = 1 if self.manage_cache(task, 0) else 0

            alpha_m = action[0]
            b_m = action[1]
            p_m = action[2]
            f_ue_m = action[3]
            f_es_m = action[4]

            f_ue_est = f_ue_m * self.f_ue_dev  
            f_es_est = f_es_m * self.f_es_dev  

            # Calculate the end-to-end delay for the task
            delay = self.calculate_delay(
                alpha_m, cache_hit, b_m, p_m,
                task['D_m'], f_ue_m, f_es_m, f_ue_est,
                f_es_est, task['eta_m'], user_id
            )

            delay = delay[0] if isinstance(delay, np.ndarray) else delay

            # Calculate the energy consumption for the task
            energy = self.calculate_energy_consumption(
                cache_hit, b_m, alpha_m, p_m, task['D_m'], f_ue_m,
                f_es_est, task['eta_m'], user_id
            )

            energy = energy[0] if isinstance(energy, np.ndarray) else energy

            # Calculate the uplink data rate for the user
            R_m = self.calculate_uplink_rate(b_m, p_m, user_id)

            # Manage task transmission and processing times
            if cache_hit == 0:
                transmission_end_time = self.current_time + self.calculate_transmission_delay(b_m, p_m,task['D_m'], user_id)
                processing_end_time = transmission_end_time + self.calculate_server_processing_delay(alpha_m, cache_hit, task['D_m'], f_es_m, f_es_est, task['eta_m'])

                self.transmitting_tasks.append((self.current_time, transmission_end_time, b_m))
                self.processing_tasks.append((transmission_end_time, processing_end_time, f_es_m))

                # Update cache with the task if it becomes eligible
                self.manage_cache(task, delay)
            else:
                # For cache hit, only processing delay is considered
                processing_end_time = self.current_time + self.calculate_server_processing_delay(alpha_m, cache_hit, task['D_m'], f_es_m, f_es_est, task['eta_m'])
                process = f_es_m * (1 - alpha_m)
                self.processing_tasks.append((self.current_time, processing_end_time, process))
                
            # Calculate total bandwidth and computation resource usage at current time
            self.total_bandwidth = sum(b for _, end_time, b in self.transmitting_tasks if end_time > self.current_time)
            self.total_computation = sum(f for _, end_time, f in self.processing_tasks if end_time > self.current_time)

            # Free resources for tasks that have completed transmission or processing
            self.transmitting_tasks = [(start_time, end_time, b) for start_time, end_time, b in self.transmitting_tasks if end_time > self.current_time]
            self.processing_tasks = [(start_time, end_time, f) for start_time, end_time, f in self.processing_tasks if end_time > self.current_time]

            # Calculate reward
            task_reward  = -energy - delay

            # Apply penalties for exceeding resource limits
            if delay > task['T_max']:
                task_reward -= self.penalty/2
                self.penalties[0] += 1
            if energy > self.E_max:
                task_reward -= self.penalty*2
                done = True
                self.penalties[1] += 1
            if R_m < self.R_min:
                task_reward -= self.penalty*2
                done = True
                self.penalties[2] += 1
            if self.total_bandwidth > 1:
                task_reward -= self.penalty*2
                done = True
                self.penalties[3] += 1
            if self.total_computation > self.F_max_es:
                task_reward -= self.penalty*2
                done = True
                self.penalties[4] += 1

            if energy > self.E_max and R_m < self.R_min and self.total_bandwidth and self.total_computation > self.F_max_es :
                task_reward += self.penalty*2

            # Check if the cumulative reward is below a certain threshold
            # if task_reward < self.treshhold :
            #     done = True

            # Store metrics and state information for the task
            task_rewards.append(task_reward)

            state_info.append({
                'device_id': user_id,
                'task': task,  # Include task information directly
                'delay': delay,
                'energy': energy,
                'Occupied bandwidth': self.total_bandwidth,
                'Occupied computation': self.total_computation,
                #'cache_size': self.current_cache_size  # Add cache size
            })

        # Increment current simulation time
        self.current_time += 0.020 

        # Prepare the next state
        next_state = state_info
        
        return task_rewards, next_state, done

    def reset(self):
        """
        Reset the environment to its initial state.
        """
        self.cache = [] 
        self.current_cache_size = 0  
        self.transmitting_tasks = [] 
        self.processing_tasks = [] 
        self.current_time = 0  
        self.initialize_user_device_params()
        self.total_bandwidth = 0  
        self.total_computation = 0  
        self.server_params = self.initialize_server_params() 
        self.penalties = [0,0,0,0,0]
        device_state_info = {user_id: {
            'Occupied bandwidth':  self.total_bandwidth,
            'Occupied computation': self.total_computation,
            'energy': 0,
            'delay' : 0,
            'device_id' : None,
            'task' : {
            'eta_m': 0,
            'T_max': 0,
            'D_m': 0
        }
        } for user_id in range(self.M)}

        return device_state_info

    def render(self):
        print(self.penalties)
        # print(f"Total Bandwidth Used: {self.total_bandwidth}")
        # print(f"Total Computation Used: {self.total_computation}")
        # print(f"Current Cache Size: {self.current_cache_size}")
        # print(f"Number of Transmitting Tasks: {len(self.transmitting_tasks)}")
        # print(f"Number of Processing Tasks (Not Exist In Cache): {len(self.processing_tasks)}")

In [18]:
# Deep Q-Network (DQN) Agent
class DQNAgent:
    def __init__(self, env, num_users, alpha=0.001, gamma=0.95, epsilon=1.0, batch_size=64, max_steps_per_episode=20):
        self.env = env  # Environment for the agent
        self.num_users = num_users  # Number of users/devices in the environment
        self.num_tasks = 15 #np.random.randint(self.num_users / 2, self.num_users + 1)
        self.alpha = alpha  # Learning rate
        self.gamma = gamma  # Discount factor for future rewards
        self.epsilon = epsilon  # Exploration rate
        self.batch_size = batch_size
        self.max_steps_per_episode = max_steps_per_episode  # Maximum steps per episode
        self.save_interval = 100
        self.num_candidates = 1000  # Define the number of candidate actions to sample

        self.state_dim = 6
        self.action_dim = 5

        self.memory = deque(maxlen=20000)
        self.model = self.build_model()

    def build_model(self):
        # Define inputs for state and action
        input_state = layers.Input(shape=(self.state_dim,))
        input_action = layers.Input(shape=(self.action_dim,))
        
        # Concatenate state and action inputs
        concat = layers.Concatenate()([input_state, input_action])
        
        # Pass through dense layers
        dense1 = layers.Dense(64, activation='relu')(concat)
        dense2 = layers.Dense(64, activation='relu')(dense1)
        dense3 = layers.Dense(64, activation='relu')(dense2)
        output = layers.Dense(1, activation='linear')(dense3)  # Output the predicted reward
        
        # Create and compile the model
        model = models.Model(inputs=[input_state, input_action], outputs=output)
        model.compile(loss='mse', optimizer=optimizers.Adam(learning_rate=self.alpha))
        return model

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))
        
    def update_epsilon(self, episode, total_episodes):
        min_epsilon=0.1
        k = np.log(self.epsilon / min_epsilon) / total_episodes
        self.epsilon = min_epsilon + (self.epsilon - min_epsilon) * np.exp(-k * episode)  # Decay the exploration rate

    def act(self, state):
        if np.random.rand() <= self.epsilon:
            # Choose a random action from the predefined set (exploration)
            action = self.sample_random_action()
        else:
            # Define the number of candidate actions to sample
            num_candidates = self.num_candidates

            # Generate candidate actions from the predefined set
            candidate_actions = np.array([self.sample_random_action() for _ in range(num_candidates)])

            # Prepare the state array for batch prediction
            state_batch = np.tile(state, (num_candidates, 1))

            # Predict the reward for each candidate action
            predicted_rewards = self.model.predict([state_batch, candidate_actions])

            # Select the action with the highest predicted reward
            best_action_index = np.argmax(predicted_rewards)
            action = candidate_actions[best_action_index]
        return action

    def sample_random_action(self):
        # Generate random values for the action parameters
        alpha = np.round(np.random.choice(np.linspace(0, 1, 10)),2) 
        b = np.round(np.random.choice(np.linspace(0.01, (2 / self.num_users), 10)),2) 
        p = np.round(np.random.choice(np.linspace(1, self.env.P_max, 50))) 
        f_ue = np.round(np.random.choice(np.linspace(1e6, self.env.F_max_ue, 10)))
        f_es = np.round(np.random.choice(np.linspace(1e6, ((2 * self.env.F_max_es) / self.num_users), 10)))
        # Actions Space = 2500000

        return np.array([alpha, b, p, f_ue, f_es])

    def replay(self):
        if len(self.memory) < self.batch_size:
            return

        minibatch = random.sample(self.memory, self.batch_size)
        for state, action, reward, next_state, done in minibatch:
            state = np.array(state).reshape(1, -1)
            action = np.array(action).reshape(1, -1)
            next_state = np.array(next_state).reshape(1, -1)

            if done:
                target = reward
            else:
                # Sample a set of actions to evaluate
                num_samples = self.num_candidates
                candidate_actions = np.array([self.sample_random_action() for _ in range(num_samples)])

                # Prepare the state array for batch prediction
                state_batch = np.tile(next_state, (num_samples, 1))

                # Predict Q-values for the candidate actions
                predicted_q_values = self.model.predict([state_batch, candidate_actions])

                # Select the maximum Q-value
                max_q_value = np.max(predicted_q_values)

                # Calculate target value
                target = reward + self.gamma * max_q_value

            # Predict Q-values for the current state-action pair
            target_f = self.model.predict([state, action])
            target_scalar = target.item() if isinstance(target, np.ndarray) else target
            target_f[0, 0] = target_scalar  # Assumes single action dimension and output 

            # Train the model with the state, action, and updated target
            self.model.fit([state, action], target_f, epochs=1, verbose=0)

    def load(self, name):
        self.model.load_weights(name)

    def save(self, name):
        self.model.save_weights(name)

    def create_task(self):
        # Create a new task with random parameters within specified ranges
        eta_m =  np.round(np.random.choice(np.linspace(self.env.eta_m_range[0], self.env.eta_m_range[1], 50)))
        T_max_task = 10e-3  # Static according to article
        task_info = {
            'eta_m': eta_m,
            'T_max': T_max_task,
            'D_m': 1354  # Task data size
        }
        return task_info

    def train(self, num_episodes):
        # Lists to store average delay and energy values for each episode
        avg_delays = []
        avg_energies = []
        avg_rewards = []


        for episode in range(num_episodes):
            # Initialize device state information for all users at the beginning of each episode
            device_state_info = self.env.reset()

            # Initialize total delay and energy for this episode
            total_delay = 0
            total_energy = 0
            total_reward = 0

            # Initialize the number of tasks in this episode
            num_all_tasks = 0
            actual_steps = 0

            for step in range(self.max_steps_per_episode):
                # Initialize lists for actions, tasks, and user IDs
                actions = []
                tasks = []
                users_id = []

                num_all_tasks += self.num_tasks
                
                # Randomly distribute tasks to users
                task_distribution = np.random.choice(range(self.num_users), self.num_tasks, replace=True)

                # Initialize a list to store state representations for action selection
                states_choose_actions = []

                for user_id in task_distribution:
                    # Create a new task
                    task = self.create_task()
                    # Append the task to the tasks list
                    tasks.append(task)
                    # Append the user ID to the users_id list
                    users_id.append(user_id)

                    # Update device state information with the new task
                    device_state_info[user_id]['task'] = task
                    device_state_info[user_id]['device_id'] = user_id

                    # Create the state representation for action selection
                    state_choose_actions = [
                        device_state_info[user_id]['delay'],
                        device_state_info[user_id]['energy'],
                        user_id,
                        device_state_info[user_id]['task']['eta_m'],
                        device_state_info[user_id]['Occupied bandwidth'],
                        device_state_info[user_id]['Occupied computation']
                    ]

                    # Append the state representation to the list
                    states_choose_actions.append(state_choose_actions)

                # Select actions for each device with a task using the Q-table
                states = np.array(states_choose_actions)
                actions = [self.act(np.array(state).reshape(1, -1)) for state in states]

                # Execute the actions in the environment
                rewards, next_state_info, done = self.env.step(actions, tasks, users_id)

                # List of state representations used for action selection
                state_info_list = states_choose_actions

                counter_Users = 0

                for user_id in task_distribution:
                    # Get the current device information
                    device_info = state_info_list[counter_Users]
                    # Get the next device information
                    next_device_info = next_state_info[counter_Users]
                    # Get the action taken by the user
                    action = actions[counter_Users]
                    # Get the reward received by the user
                    reward = rewards[counter_Users]

                    # Extract delay and energy values from the next device information
                    delay = next_device_info.get('delay', 0)
                    energy = next_device_info.get('energy', 0)

                    next_state = np.array([
                        delay,
                        energy,
                        user_id,
                        next_device_info.get('task', {}).get('eta_m', 0),
                        next_device_info.get('Occupied bandwidth', 0),
                        next_device_info.get('Occupied computation', 0)
                    ]).reshape(1, -1)

                    self.remember(device_info, action, reward, next_state, done)

                    # Accumulate the total delay and energy for the episode
                    total_delay += delay
                    total_energy += energy
                    total_reward += reward

                    # Update device state information with the combined next state
                    device_state_info[user_id].update(next_device_info)

                    # Increment the counter for the number of users
                    counter_Users += 1
                    
                actual_steps += 1

                if done:
                    # Exit the loop if the episode is done
                    break
                
            self.env.render()

            # Calculate and store average delay and energy for the episode
            avg_delay = (total_delay / num_all_tasks) * 1000  # Convert to milliseconds
            avg_energy = total_energy / num_all_tasks
            avg_reward = total_reward / num_all_tasks
            avg_delays.append(avg_delay)
            avg_energies.append(avg_energy)
            avg_rewards.append(avg_reward)

            # Update epsilon for the epsilon-greedy strategy
            self.update_epsilon(episode, num_episodes)
            
            # Print the episode's results
            #if (episode + 1) % 50 == 0:
            print(f"Train : Episode {episode + 1}/{num_episodes} - Steps Count {actual_steps} - Avg Delay: {avg_delay}, Avg Energy: {avg_energy}, Avg Reward: {avg_reward}")
            print("-" * 100)
            
            self.replay()

            if (episode + 1) % self.save_interval == 0:
                self.save('Model.h5')

        # Optionally plot the results
        self.plot_results(avg_delays, avg_energies, avg_rewards)
    
    def test(self, num_test_steps):
        # Initialize total delay, alpha values, and rewards for the test
        total_delay = 0
        total_alpha = 0
        total_energy = 0
        total_reward = 0
        
        # Counter for actual steps
        num_all_tasks = 0
        actual_steps = 0
        
        # Set epsilon to 0 for testing (no exploration)
        self.epsilon = 0
        
        # Reset the environment and get initial state
        device_state_info = self.env.reset()

        for step in range(num_test_steps):
            # Initialize lists for actions, tasks, and user IDs
            actions = []
            tasks = []
            users_id = []

            num_all_tasks += self.num_tasks

            # Randomly distribute tasks to users
            task_distribution = np.random.choice(range(self.num_users), self.num_tasks, replace=True)

            # Initialize a list to store state representations for action selection
            states_choose_actions = []

            for user_id in task_distribution:
                # Create a new task
                task = self.create_task()
                # Append the task to the tasks list
                tasks.append(task)
                # Append the user ID to the users_id list
                users_id.append(user_id)

                # Update device state information with the new task
                device_state_info[user_id]['task'] = task
                device_state_info[user_id]['device_id'] = user_id

                # Create the state representation for action selection
                state_choose_actions = [
                        device_state_info[user_id]['delay'],
                        device_state_info[user_id]['energy'],
                        user_id,
                        device_state_info[user_id]['task']['eta_m'],
                        device_state_info[user_id]['Occupied bandwidth'],
                        device_state_info[user_id]['Occupied computation']
                    ]
                
                # Append the state representation to the list
                states_choose_actions.append(state_choose_actions)

            # Convert state representations to a NumPy array for the model
            states = np.array(states_choose_actions)

            # Select actions for each device using the DQN model
            actions = [self.act(np.array(state).reshape(1, -1)) for state in states]

            # Execute the actions in the environment and get the results
            rewards, next_state_info, done = self.env.step(actions, tasks, users_id)

            # Accumulate rewards
            total_reward += sum(rewards)
            
            counter_users = 0

            for user_id in task_distribution:
                # Get the next device information
                next_device_info = next_state_info[counter_users]

                # Extract delay and energy values from the next device information
                delay = next_device_info.get('delay', 0)
                energy =next_device_info.get('energy', 0)

                action = actions[counter_users]

                # Accumulate delay and alpha values
                total_delay += delay
                total_energy += energy
                total_alpha += action[0]

                # Update device state information with the next state
                device_state_info[user_id].update(next_device_info)

                counter_users += 1

            # Increment the actual steps counter
            actual_steps += 1

            if done:
                # Exit the loop if the environment signals that the episode is done
                break

        self.env.render()

        # Calculate and return the average delay and alpha for the test
        avg_delay = (total_delay / num_all_tasks) * 1000  # Convert to milliseconds
        avg_energy = total_energy / num_all_tasks
        avg_alpha = total_alpha / num_all_tasks
        avg_reward = total_reward / num_all_tasks


        # Print the episode's results
        print(f"Test : Steps Count {actual_steps} - Avg Delay: {avg_delay}, Avg Energy: {avg_energy}, Avg Reward: {avg_reward}, Avg Alpha: {avg_alpha}")
        print("-" * 100)

        # Optionally plot the results
        #self.plot_results(avg_delay, avg_energy, avg_reward)

        return avg_delay, avg_alpha


    def plot_results(self, avg_delays, avg_energies, avg_rewards):
        episodes = np.arange(1, len(avg_delays) + 1)

        plt.figure(figsize=(12, 6))

        plt.subplot(1, 3, 1)
        plt.plot(episodes, avg_delays, label='Avg Delay')
        plt.xlabel('Episode')
        plt.ylabel('Average Delay')
        plt.title('Average Delay per Episode')
        plt.legend()

        plt.subplot(1, 3, 2)
        plt.plot(episodes, avg_energies, label='Avg Energy')
        plt.xlabel('Episode')
        plt.ylabel('Average Energy')
        plt.title('Average Energy per Episode')
        plt.legend()

        plt.subplot(1, 3, 3)
        plt.plot(episodes, avg_rewards, label='Avg Reward')
        plt.xlabel('Episode')
        plt.ylabel('Average Reward')
        plt.title('Average Reward per Episode')
        plt.legend()

        plt.tight_layout()
        plt.show()

# Assuming you have your EdgeComputingEnvironment defined as per your code
env = EdgeComputingEnvironment()

# Define the number of users/devices
num_users = env.M

# Initialize the DQN agent
agent = DQNAgent(env, num_users)

# Load the model if you want to continue training
# agent.load("dqn_model.h5")

# Train the agent
num_episodes = 200  # Adjust the number of episodes as needed
agent.train(num_episodes)

# Save the final model
#agent.save("dqn_model.h5")


[0, 0, 9, 206.0, 0, 0]
[0, 0, 8, 165.0, 0, 0]
[0, 0, 10, 178.0, 0, 0]
[0, 0, 12, 292.0, 0, 0]
[0, 0, 13, 165.0, 0, 0]
[0, 0, 2, 116.0, 0, 0]
[0, 0, 11, 255.0, 0, 0]
[0, 0, 0, 288.0, 0, 0]
[0, 0, 13, 292.0, 0, 0]
[0, 0, 2, 271.0, 0, 0]
[0, 0, 3, 169.0, 0, 0]
[0, 0, 4, 129.0, 0, 0]
[0, 0, 7, 153.0, 0, 0]
[0, 0, 2, 173.0, 0, 0]
[0, 0, 13, 202.0, 0, 0]
{'device_id': 9, 'task': {'eta_m': 206.0, 'T_max': 0.01, 'D_m': 1354}, 'delay': 0.0010088358767156736, 'energy': 0.022506438569268126, 'Occupied bandwidth': 0.11, 'Occupied computation': 1334000000.0}
0.0010088358767156736 0.022506438569268126
{'device_id': 9, 'task': {'eta_m': 206.0, 'T_max': 0.01, 'D_m': 1354}, 'delay': 0.0010088358767156736, 'energy': 0.022506438569268126, 'Occupied bandwidth': 0.11, 'Occupied computation': 1334000000.0}
[[1.00883588e-03 2.25064386e-02 9.00000000e+00 2.06000000e+02
  1.10000000e-01 1.33400000e+09]]
{0: {'Occupied bandwidth': 0, 'Occupied computation': 0, 'energy': 0, 'delay': 0, 'device_id': 0, 'task': {'

KeyboardInterrupt: 

In [17]:
num_test_steps = 5
for i in range(1):
    agent.test(num_test_steps)

{'device_id': 14, 'task': {'eta_m': 173.0, 'T_max': 0.01, 'D_m': 1354}, 'delay': 0.0006606604256183288, 'energy': 0.03160063946898692, 'Occupied bandwidth': 0.12, 'Occupied computation': 2222666667.0}
{'device_id': 14, 'task': {'eta_m': 173.0, 'T_max': 0.01, 'D_m': 1354}, 'delay': 0.0006606604256183288, 'energy': 0.03160063946898692, 'Occupied bandwidth': 0.12, 'Occupied computation': 2222666667.0}
{0: {'Occupied bandwidth': 0, 'Occupied computation': 0, 'energy': 0, 'delay': 0, 'device_id': 0, 'task': {'eta_m': 263.0, 'T_max': 0.01, 'D_m': 1354}}, 1: {'Occupied bandwidth': 0, 'Occupied computation': 0, 'energy': 0, 'delay': 0, 'device_id': 1, 'task': {'eta_m': 137.0, 'T_max': 0.01, 'D_m': 1354}}, 2: {'Occupied bandwidth': 0, 'Occupied computation': 0, 'energy': 0, 'delay': 0, 'device_id': 2, 'task': {'eta_m': 108.0, 'T_max': 0.01, 'D_m': 1354}}, 3: {'Occupied bandwidth': 0, 'Occupied computation': 0, 'energy': 0, 'delay': 0, 'device_id': None, 'task': {'eta_m': 0, 'T_max': 0, 'D_m': 0

In [8]:
# Assuming you have the DQNAgent and EdgeComputingEnvironment classes defined as before
env = EdgeComputingEnvironment()

# Define your parameter ranges
S_max_es_values = [0, 20, 40, 60]  # in KB
E_max_values = [1.5, 2, 2.5, 3, 3.5, 4]  # in mJ

# Define the number of users/devices
num_users = env.M

# Initialize the DQN agent
agent = DQNAgent(env, num_users)

# Define the number of episodes for training and testing steps
num_train_episodes = 100  # or any suitable number for training
num_test_steps = 20  # or any suitable number for testing

# Create the folder for model files if it doesn't exist
model_folder = 'DQN_Models_Test1'
os.makedirs(model_folder, exist_ok=True)

# Storage for results
results = []

# Training phase
for S_max_es in S_max_es_values:
    for E_max in E_max_values:
        # Set the environment parameters
        env.S_max_es = S_max_es * 1e3  # Convert KB to bytes
        env.E_max = E_max * 1e-3  # Convert mJ to J

        # Train the agent
        agent.train(num_train_episodes)

        # Save the model for this configuration
        model_filename = os.path.join(model_folder, f'DQN_model_S{S_max_es}_E{E_max}.h5')
        agent.save(model_filename)

# Testing phase
for S_max_es in S_max_es_values:
    for E_max in E_max_values:
        # Load the model for this configuration
        model_filename = os.path.join(model_folder, f'DQN_model_S{S_max_es}_E{E_max}.h5')
        agent.load(model_filename)

        # Set the environment parameters
        env.S_max_es = S_max_es * 1e3  # Convert KB to bytes
        env.E_max = E_max * 1e-3  # Convert mJ to J

        # Test the agent and get the average delay and alpha
        avg_delay, avg_alpha = agent.test(num_test_steps)

        # Store the results
        results.append({
            'S_max_es': S_max_es,
            'E_max': E_max,
            'avg_delay': avg_delay,
        })

# Plotting the results
plt.figure(figsize=(10, 6))

for S_max_es in S_max_es_values:
    # Extract delays for each S_max_es value
    delays = [result['avg_delay'] for result in results if result['S_max_es'] == S_max_es]

    # Plot the delays
    plt.plot(E_max_values, delays, marker='o', label=f'S_max_es = {S_max_es} KB')

plt.xlabel('Maximum energy requirement, $E_{max}$ (mJ)')
plt.ylabel('Average delay (ms)')
plt.title('The impact of edge caching capacity ($S_{max}^{es}$) and UE\'s energy consumption budget ($E_{max}$) on average delay')
plt.legend()
plt.grid(True)
plt.show()


[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 51ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m 

KeyboardInterrupt: 

In [None]:
# Assuming you have the EdgeComputingEnvironment and DQNAgent defined as per your code
env = EdgeComputingEnvironment()

# Define the number of users/devices
num_users = env.M

# Define the different F_max_es values and deviation values
F_max_es_values = [30, 32, 34, 36, 38]
deviation_values = [0.0, 0.02]

# Initialize the DQN agent
agent = DQNAgent(env, num_users)

# Define the number of episodes for training and testing steps
num_train_episodes = 100  # or any suitable number for training
num_test_steps = 20  # or any suitable number for testing

# Create the folder for DQN model files if it doesn't exist
model_folder = 'DQN_Models_Test2'
os.makedirs(model_folder, exist_ok=True)

# Storage for results
results = []

# Train and save DQN models
for deviation in deviation_values:
    env.f_es_dev = deviation
    env.f_ue_dev = deviation

    for F_max_es in F_max_es_values:
        env.F_max_es = F_max_es * 1e9  # Convert GHz to Hz

        # Train the agent
        agent.train(num_train_episodes)

        # Save the model for this configuration
        model_filename = os.path.join(model_folder, f'DQN_model_F{F_max_es}_Dev{deviation}.h5')
        agent.save(model_filename)

# Test and collect metrics
for deviation in deviation_values:
    for F_max_es in F_max_es_values:
        # Load the model for this configuration
        model_filename = os.path.join(model_folder, f'DQN_model_F{F_max_es}_Dev{deviation}.h5')
        agent.load(model_filename)

        env.f_es_dev = deviation
        env.f_ue_dev = deviation
        env.F_max_es = F_max_es * 1e9  # Convert GHz to Hz

        # Test the agent and get the average delay and alpha
        avg_delay, avg_alpha = agent.test(num_test_steps)

        # Store the results
        results.append({
            'F_max_es': F_max_es,
            'f_dev': deviation,
            'avg_delay': avg_delay,
            'avg_alpha': avg_alpha
        })

# Prepare data for plotting
delays_data = {deviation: [] for deviation in deviation_values}
alphas_data = {deviation: [] for deviation in deviation_values}

# Fill the data lists with default values if missing
for F_max_es in F_max_es_values:
    for deviation in deviation_values:
        found = False
        for result in results:
            if result['F_max_es'] == F_max_es and result['f_dev'] == deviation:
                delays_data[deviation].append(result['avg_delay'][0] if isinstance(result['avg_delay'], np.ndarray) else result['avg_delay'])
                if deviation == 0:
                    alphas_data[deviation].append(result['avg_alpha'])
                found = True
                break
        if not found:
            delays_data[deviation].append(0)
            if deviation == 0:
                alphas_data[deviation].append(0)

# Plotting the results
fig, ax1 = plt.subplots(figsize=(10, 6))

# Plot for delay
bar_width = 0.35
index = np.arange(len(F_max_es_values))

for i, deviation in enumerate(deviation_values):
    ax1.bar(index + i * bar_width, delays_data[deviation], bar_width, label=f'Latency, $\hat{{f}} = {deviation}$', alpha=0.6)

ax1.set_xlabel("Maximum ES's processing rate (GHz)")
ax1.set_ylabel("Total latency (ms)")
ax1.set_title("The impact of ES's processing rate, deviation values, and offloading behavior")
ax1.set_xticks(index + bar_width / 2)
ax1.set_xticklabels(F_max_es_values)
ax1.legend(loc='upper left')
ax1.grid(True)

# Plot for average alphas
ax2 = ax1.twinx()
alphas = alphas_data[0]
ax2.plot(index + bar_width / 2, alphas, marker='o', color='red', label='Average offloading portions, $\hat{f} = 0$')

ax2.set_ylabel("Average UEs offloading portions")
ax2.legend(loc='upper right')

plt.show()
