In [1]:
import numpy as np
from scipy.special import erfcinv
import json
import matplotlib.pyplot as plt
import os
import copy

# Edge Computing Environment

class EdgeComputingEnvironment:
    def __init__(self, M=15, area_size=100, D_m=1354, eta_m_range=(100, 300), F_max_ue=1.5, P_max=23, B=5e6, T_max=10e-3, F_max_es=30, S_max_es=60, epsilon=10e-7, E_max=3, theta=1e-26, L=8, phi=0.02e-3, N0=-174, f_es_dev=0.02, f_ue_dev=0.02):
        """
        Initialize the edge computing environment with given parameters.
        """
        self.M = M  # Number of users
        self.area_size = area_size  # Size of the area in which users are distributed
        self.D_m = D_m  # Task data size
        self.eta_m_range = eta_m_range  # Range of computation requirements
        self.F_max_ue = F_max_ue * 1e9  # Maximum frequency of user equipment
        self.P_max = 10 ** (P_max / 10)  # Convert maximum transmission power from dB to Watts
        self.B = B  # Bandwidth
        self.T_max = T_max  # Maximum tolerable delay
        self.F_max_es = F_max_es * 1e9  # Maximum frequency of edge server
        self.S_max_es = S_max_es * 1e3  # Maximum storage size of edge server
        self.epsilon = epsilon  # Error tolerance for rate calculation
        self.E_max = E_max * 1e-3  # Maximum energy consumption
        self.theta = theta  # Energy coefficient
        self.L = L  # Number of antennas
        self.phi = phi  # Transmission time interval
        self.R_min = 1e6  # Minimum data rate
        self.N0 = N0  # Noise power in dBm
        self.N0 = 10 ** ((N0 - 30) / 10)  # Convert noise power from dBm/Hz to Watts/Hz
        self.PL_d = lambda d: 10 ** ((10 ** ((-35.3 - (37.6 * np.log10(d))) / 10)) / 10)  # Path loss model
        self.f_es_dev = f_es_dev  #The deviation between the estimated value and the actual value of the processing rate of the ES
        self.f_ue_dev = f_ue_dev  #The deviation between the estimated value and the actual value of the processing rate of the UE
        self.penalty = 0.25
        self.treshhold = -0.6 

        self.user_device_params = []  # List to store parameters for each user device
        self.initialize_user_device_params()  # Initialize user device parameters

        self.server_params = self.initialize_server_params()  # Initialize server parameters

        self.cache = []  # Cache to store tasks
        self.current_cache_size = 0  # Current size of the cache
        self.transmitting_tasks = []  # List to store transmitting tasks
        self.processing_tasks = []  # List to store processing tasks
        self.current_time = 0  # Current simulation time

        # Initialize bandwidth and computation attributes
        self.total_bandwidth = 0 # Initialize total bandwidth
        self.total_computation = 0 # Initialize total computation

    def initialize_user_device_params(self):
        """
        Initialize parameters for each user device.
        Randomly generates user-specific parameters such as path loss.
        """
        for device_id in range(self.M):
            d = np.random.uniform(1, self.area_size / 2)  # Distance to server
            g_m = np.array([self.PL_d(d)])  # Path loss
            h_bar = np.random.randn(1, self.L) + 1j * np.random.randn(1, self.L)  # Channel gain

            self.user_device_params.append({
                'device_id': device_id,  # Assign a unique ID to each device
                'd': d,
                'g_m': g_m,
                'h_bar': h_bar,
            })

    def initialize_server_params(self):
        """
        Initialize parameters for the edge server.
        """
        return {
            'S_max_es': self.S_max_es  # Maximum storage size
        }

    def calculate_gamma_m(self, b_m, p_m, user_id):
        """
        Calculate the signal-to-noise ratio (SNR) for a given user.

        Parameters:
        - b_m (float): Bandwidth allocation
        - p_m (float): Transmission power
        - user_id (int): ID of the user

        Returns:
        - gamma_m (array): SNR values for the user's communication channel
        """
        h_m = np.sqrt(self.user_device_params[user_id]['g_m'])[:, None] * self.user_device_params[user_id]['h_bar']  # Channel gain
        gamma_m = (p_m * np.linalg.norm(h_m, axis=1) ** 2) / (b_m * self.B * self.N0)  # SNR

        return gamma_m

    def calculate_uplink_rate(self, b_m, p_m, user_id):
        """
        Calculate the uplink data rate for a given user.

        Parameters:
        - b_m (float): Bandwidth allocation
        - p_m (float): Transmission power
        - user_id (int): ID of the user

        Returns:
        - R_m (float): Uplink data rate in bits/second
        """
        gamma_m = self.calculate_gamma_m(b_m, p_m, user_id)  # Calculate the SINR for the m-th user
        V_m = 1 - (1 / (1 + gamma_m) ** 2)  # Intermediate variable for rate calculation
        Q_inv = np.sqrt(2) * erfcinv(2 * self.epsilon)  # Calculate the inverse of the Q-function for the outage probability
        R_m = (self.B / np.log(2)) * ((b_m * np.log(1 + gamma_m)) - ((np.sqrt((b_m * V_m) / (self.phi * self.B))) * Q_inv))  # Uplink data rate

        return R_m

    def calculate_delay(self, alpha_m, cache_hit, b_m, p_m, D_m, f_ue_m, f_es_m, f_ue_est, f_es_est, eta_m, user_id):
        """
        Calculate the end-to-end delay for a given task.

        Parameters:
        - alpha_m (float): Offloading decision
        - cache_hit (int): Split factor (0 or 1)
        - b_m (float): Bandwidth allocation
        - p_m (float): Transmission power
        - D_m (int): Data size
        - f_ue_m (float): Computation capability of the user device
        - f_es_m (float): Computation capability of the edge server
        - f_ue_est (float): Estimation error for the user device's computation capability
        - f_es_est (float): Estimation error for the edge server's computation capability
        - eta_m (float): Computational intensity
        - user_id (int): ID of the user

        Returns:
        - T_e2e (float): End-to-end delay in seconds
        """
        actual_f_ue_m = f_ue_m - f_ue_est  # Actual processing rate of the user device
        actual_f_es_m = f_es_m - f_es_est  # Actual processing rate of the Edge server

        if cache_hit == 1:
            T_es = (eta_m * D_m) / actual_f_es_m  # Only edge server processing delay
            T_e2e = T_es

        else:
            T_ue = (alpha_m * eta_m * D_m) / actual_f_ue_m  # User device processing delay
            R_m = self.calculate_uplink_rate(b_m, p_m, user_id)  # Uplink data rate
            T_tr = (D_m * 8) / R_m  # Transmission delay
            T_es = ((1 - alpha_m) * eta_m * D_m) / actual_f_es_m  # Edge server processing delay
            T_e2e = T_ue + T_tr + T_es  # Total end-to-end delay
        return T_e2e

    def calculate_transmission_delay(self, b_m, p_m, D_m, user_id):
        """
        Calculate the transmission delay for a given task.

        Parameters:
        - b_m (float): Bandwidth allocation
        - p_m (float): Transmission power
        - D_m (int): Data size
        - user_id (int): ID of the user

        Returns:
        - T_co (float): Transmission delay in seconds
        """
        R_m = self.calculate_uplink_rate(b_m, p_m, user_id)  # Calculate uplink data rate
        T_co =  (D_m * 8) / R_m   # Transmission delay calculation based on task size and uplink rate

        return T_co

    def calculate_server_processing_delay(self, alpha_m, cache_hit, D_m, f_es_m, f_es_est, eta_m):
        """
        Calculate the processing delay at the edge server for a given task.

        Parameters:
        - alpha_m (float): Offloading decision
        - D_m (int): Data size
        - cache_hit (0,1): 1 = Exist in cache and 0 not exist in cache
        - f_es_m (float): Computation capability of the edge server
        - f_es_est (float): Estimation error for the edge server's computation capability
        - eta_m (float): Computational intensity

        Returns:
        - T_es (float): Processing delay at the edge server in seconds
        """

        if cache_hit == 0:
            T_es = ((1 - alpha_m) * eta_m * D_m) / (f_es_m - f_es_est)  # Processing delay at the edge server

        else:
            T_es = (eta_m * D_m) / (f_es_m - f_es_est)

        return T_es

    def calculate_energy_consumption(self, s_m, b_m, alpha_m, p_m, D_m, f_ue_m, f_ue_est, eta_m, user_id):
        """
        Calculate the energy consumption for a given task.

        Parameters:
        - alpha_m (float): Offloading decision
        - s_m (int): Split factor (0 or 1)
        - f_ue_m (float): Computation capability of the user device
        - b_m (float): Bandwidth allocation
        - p_m (float): Transmission power
        - f_ue_est (float): Estimation error for the user device's computation capability
        - eta_m (float): Computational intensity

        Returns:
        - E_total (float): Total energy consumption in Joules
        """
        R_m = self.calculate_uplink_rate(b_m, p_m, user_id)  # Calculate uplink data rate

        actual_f_ue_m = f_ue_m - f_ue_est  # Calculate the actual processing rate of the UE

        E_ue = alpha_m * (self.theta / 2) * eta_m * D_m * (actual_f_ue_m ** 2)  # Energy consumption at the user device
        E_tx = ((1 - alpha_m) * (D_m * 8) * p_m) / R_m  # Transmission energy

        if s_m == 1:  # Task is in cache
            E_total = 0  # No energy consumed when task is in cache
        else:
            E_total = E_ue + E_tx  # Total energy consumption

        return E_total

    def manage_cache(self, task_info, task_delay):
        """
        Manage the cache for storing and retrieving tasks.

        Parameters:
        - task_info (tuple): Task parameters to identify the task
        - task_delay (float): Delay of the task

        Returns:
        - bool: True if the task is found in the cache, False otherwise
        """
        if task_delay == 0:
            for task in self.cache:
                if task_info == task[0]:  # Check if the task is already in cache
                    return True
            return False

        task_size = task_info['D_m'] * 8  # Task size
        Server_Max_Capacity = self.server_params['S_max_es']  # Server maximum capacity

        if (task_size + self.current_cache_size) <= Server_Max_Capacity:
            self.cache.append((task_info, task_delay))  # Add task to cache
            self.current_cache_size += task_size  # Update cache size
            return True
        else:
            sorted_cache = sorted(self.cache, key=lambda x: x[1], reverse=True)  # Sort tasks by delay in descending order

            while (task_size + self.current_cache_size) > Server_Max_Capacity:
                if not sorted_cache:
                    break  # Exit loop if sorted_cache is empty
                last_task = sorted_cache.pop()  # Remove the last task from sorted_cache
                self.cache.remove(last_task)  # Remove the task from the cache
                self.current_cache_size -= last_task[0]['D_m'] * 8  # Update current cache size

            self.cache.append((task_info, task_delay))  # Add task to cache
            self.current_cache_size += task_size  # Update cache size

            return True

    def step(self, actions, tasks, users_id):
        """
        Perform a simulation step for the given action.

        Parameters:
        - action (array): Array of action for each user
        - tasks (array): Array of task for each user

        Returns:
        - tuple: (task_rewards, next_state, done)
        """
        # Initialize cumulative metrics for the step
        task_rewards = []  # List to store reward for each task
        state_info = []  # List to store individual task and device state information
        done = False

        for action, task, user_id in zip(actions, tasks, users_id):

            # Determine if the task is a cache hit or miss
            cache_hit = 1 if self.manage_cache(task, 0) else 0

            f_es_est = action['f_es_m'] * self.f_es_dev  # initialize f_es_est
            f_ue_est = action['f_ue_m'] * self.f_ue_dev  # initialize f_ue_est

            # Calculate the end-to-end delay for the task
            delay = self.calculate_delay(
                action['alpha_m'], cache_hit, action['b_m'], action['p_m'],
                task['D_m'], action['f_ue_m'], action['f_es_m'], f_ue_est,
                f_es_est, task['eta_m'], user_id
            )

            # Calculate the energy consumption for the task
            energy = self.calculate_energy_consumption(
                cache_hit, action['b_m'], action['alpha_m'], action['p_m'], task['D_m'], action['f_ue_m'],
                f_es_est, task['eta_m'], user_id
            )

            # Calculate the uplink data rate for the user
            R_m = self.calculate_uplink_rate(action['b_m'], action['p_m'], user_id)
            print(R_m)
            # Manage task transmission and processing times
            if cache_hit == 0:
                transmission_end_time = self.current_time + self.calculate_transmission_delay(action['b_m'], action['p_m'],task['D_m'], user_id)
                processing_end_time = transmission_end_time + self.calculate_server_processing_delay(action['alpha_m'], cache_hit, task['D_m'], action['f_es_m'], f_es_est, task['eta_m'])

                self.transmitting_tasks.append((self.current_time, transmission_end_time, action['b_m']))
                self.processing_tasks.append((transmission_end_time, processing_end_time, action['f_es_m']))

                # Update cache with the task if it becomes eligible
                self.manage_cache(task, delay)
            else:
                # For cache hit, only processing delay is considered
                processing_end_time = self.current_time + self.calculate_server_processing_delay(action['alpha_m'], cache_hit, task['D_m'], action['f_es_m'], f_es_est, task['eta_m'])
                process = action['f_es_m'] * (1 - action['alpha_m'])
                self.processing_tasks.append((self.current_time, processing_end_time, process))

            # Calculate total bandwidth and computation resource usage at current time
            self.total_bandwidth = sum(b for _, end_time, b in self.transmitting_tasks if end_time > self.current_time)
            self.total_computation = sum(f for _, end_time, f in self.processing_tasks if end_time > self.current_time)

            # Free resources for tasks that have completed transmission or processing
            self.transmitting_tasks = [(start_time, end_time, b) for start_time, end_time, b in self.transmitting_tasks if end_time > self.current_time]
            self.processing_tasks = [(start_time, end_time, f) for start_time, end_time, f in self.processing_tasks if end_time > self.current_time]

            # Calculate reward
            task_reward  = -delay - energy

            # Apply penalties for exceeding resource limits
            if delay > task['T_max']:
                task_reward -= self.penalty
            if energy > self.E_max:
                task_reward -= self.penalty
            if R_m < self.R_min:
                task_reward -= self.penalty
            if self.total_bandwidth > 1:
                task_reward -= self.penalty
            if self.total_computation > self.F_max_es:
                task_reward -= self.penalty

            # Check if the cumulative reward is below a certain threshold
            if task_reward < self.treshhold:
                done = True

            # Store metrics and state information for the task
            task_rewards.append(task_reward)

            state_info.append({
                'device_id': user_id,
                'task': task,  # Include task information directly
                'delay': delay,
                'energy': energy,
                'Occupied bandwidth': self.total_bandwidth,
                'Occupied computation': self.total_computation,
                'cache_size': self.current_cache_size  # Add cache size
            })

        # Increment current simulation time
        self.current_time += 0.060 

        # Prepare the next state
        next_state = state_info

        return task_rewards, next_state, done

    def reset(self):
        """
        Reset the environment to its initial state.
        """
        self.cache = []  # Clear cache
        self.current_cache_size = 0  # Reset cache size
        self.transmitting_tasks = []  # Clear transmitting tasks
        self.processing_tasks = []  # Clear processing tasks
        self.current_time = 0  # Reset current time
        self.initialize_user_device_params()  # Reinitialize user device parameters
        self.total_bandwidth = 0  # Reinitialize total bandwidth
        self.total_computation = 0  # Reinitialize total computation
        self.server_params = self.initialize_server_params()  # Reinitialize server parameters

        device_state_info = {user_id: {
            'Occupied bandwidth':  self.total_bandwidth,
            'Occupied computation': self.total_computation,
            'cache_size': self.current_cache_size , # Add cache size
            'device_id' : None,
            'task' : None
        } for user_id in range(self.M)}

        return device_state_info

    def render(self):
        print(f"Total Bandwidth Used: {self.total_bandwidth}")
        print(f"Total Computation Used: {self.total_computation}")
        print(f"Current Cache Size: {self.current_cache_size}")
        print(f"Number of Transmitting Tasks: {len(self.transmitting_tasks)}")
        print(f"Number of Processing Tasks (Not Exist In Cache): {len(self.processing_tasks)}")


In [10]:
# Q-Learning Algorithm

class QLearningAgent:
    def __init__(self, env, num_users, alpha=0.1, gamma=0.9, epsilon=1, max_steps_per_episode=20):
        self.env = env  # Environment for the agent
        self.num_users = num_users  # Number of users/devices in the environment
        self.alpha = alpha  # Learning rate
        self.gamma = gamma  # Discount factor for future rewards
        self.epsilon = epsilon  # Exploration rate
        self.q_table = {}  # Q-table to store Q-values for state-action pairs
        self.max_steps_per_episode = max_steps_per_episode  # Maximum steps per episode
        self.S_max_es = env.S_max_es  # Max cache size from environment
        self.F_max_es = env.F_max_es  # Max computation from environment

    def quantize_value(self, value, value_range, num_bins):
        """Quantize a value to one of the specified number of bins within the value range."""
        bin_edges = np.linspace(value_range[0], value_range[1], num_bins + 1)
        bin_index = np.digitize([value], bin_edges, right=True)
        bin_index = np.clip(bin_index[0] - 1, 0, num_bins - 1)  # Ensure the bin index is within range
        return bin_index

    def quantize_state(self, state):
        """Quantize the state parameters."""
        state['cache_size'] = self.quantize_value(state['cache_size'], (0, self.S_max_es), 10)
        state['Occupied computation'] = self.quantize_value(state['Occupied computation'], (0, (self.F_max_es * 2)), 10)
        state['Occupied bandwidth'] = self.quantize_value(state['Occupied bandwidth'], (0, 1), 10)
        state['task']['eta_m'] = self.quantize_value(state['task']['eta_m'], (self.env.eta_m_range[0], self.env.eta_m_range[1]), 10)
        return state

    def make_hashable(self, d):
        # Convert a dictionary or list into a hashable type (tuple)
        if isinstance(d, dict):
            return tuple(sorted((k, self.make_hashable(v)) for k, v in d.items()))
        if isinstance(d, list):
            return tuple(self.make_hashable(e) for e in d)
        if isinstance(d, np.ndarray):
            return tuple(d.tolist())
        return d

    def update_epsilon(self, episode, total_episodes):
        min_epsilon=0.06
        k = np.log(self.epsilon / min_epsilon) / total_episodes
        self.epsilon = min_epsilon + (self.epsilon - min_epsilon) * np.exp(-k * episode)  # Decay the exploration rate

    def get_action(self, state):
        state_copy = copy.deepcopy(state)
        state = self.quantize_state(state_copy)  # Quantize the state
        state_key = self.make_hashable(state)  # Convert state to a hashable key

        if state_key not in self.q_table:
            self.q_table[state_key] = self.initialize_q_values()  # Initialize Q-values for unseen states

        if np.random.uniform(0, 1) < self.epsilon:
            action = self.generate_random_action()  # Explore: select a random action
            action_key = tuple(action.items())
            if action_key not in self.q_table[state_key]:
                self.q_table[state_key][action_key] = -1  # Initialize unseen actions with a low Q-value
        else:
            numerical_q_values = {k: v for k, v in self.q_table[state_key].items() if isinstance(v, (int, float))}
            if numerical_q_values:
                action_key = max(numerical_q_values, key=numerical_q_values.get)  # Exploit: select action with highest Q-value
                action = dict(action_key)
            else:
                action = self.generate_random_action()
                action_key = tuple(action.items())
                if action_key not in self.q_table[state_key]:
                    self.q_table[state_key][action_key] = -1
        return action


    def initialize_q_values(self):
        q_values = {}
        random_action = self.generate_random_action()  # Generate a random action
        q_values[tuple(random_action.items())] = -1  # Initialize its Q-value with a low value
        return q_values

    
    def generate_random_action(self):
        # Generate random values for the action parameters
        alpha = np.round(np.random.choice(np.linspace(0, 1, 20)),2) 
        b = np.round(np.random.choice(np.linspace(0.01, 2/self.num_users, 10)),2) 
        p = np.round(np.random.choice(np.linspace(self.env.P_max / 10, self.env.P_max, 30))) 
        f_ue = np.round(np.random.choice(np.linspace(self.env.F_max_ue / 1000, self.env.F_max_ue, 100))) 
        f_es = np.round(np.random.choice(np.linspace(self.env.F_max_es / 1000, ((3 * self.env.F_max_es) / self.num_users), 100)))
        # Space Actions = 60000000
        action = {
            'alpha_m': alpha,
            'b_m': b,
            'p_m': p,
            'f_ue_m': f_ue,
            'f_es_m': f_es
        }

        return action

    def update_q_table(self, state, action, reward, next_state):

        state_copy = copy.deepcopy(state)
        next_state_copy = copy.deepcopy(next_state)

        state = self.quantize_state(state_copy)  # Quantize the state
        next_state = self.quantize_state(next_state_copy)  # Quantize the next state

        state_key = self.make_hashable(state)  # Convert state to a hashable key
        action_key = tuple(action.items())  # Convert action to a hashable key

        next_state_key = self.make_hashable(next_state)  # Convert next state to a hashable key

        if isinstance(reward, list) or isinstance(reward, np.ndarray):
            reward = reward[0]

        if state_key not in self.q_table:
            self.q_table[state_key] = self.initialize_q_values()  # Initialize Q-values for unseen states

        if action_key not in self.q_table[state_key]:
            self.q_table[state_key][action_key] = -1  # Initialize unseen actions with a low Q-value

        if next_state_key not in self.q_table:
            self.q_table[next_state_key] = self.initialize_q_values()  # Initialize Q-values for unseen next states

        current_q = self.q_table[state_key][action_key]  # Current Q-value
        max_next_q = max(self.q_table[next_state_key].values())  # Maximum Q-value for the next state

        # Update the Q-value using the Q-learning update rule
        self.q_table[state_key][action_key] = current_q + (self.alpha * (reward + (self.gamma * max_next_q) - current_q))

    def create_task(self):
        # Create a new task with random parameters within specified ranges
        eta_m =  np.round(np.random.choice(np.linspace(self.env.eta_m_range[0], self.env.eta_m_range[1], 100)))
        T_max_task = 10e-3  # Static according to article
        task_info = {
            'eta_m': eta_m,
            'T_max': T_max_task,
            'D_m': 1354  # Task data size
        }
        return task_info

    def train(self, num_episodes):
        # Lists to store average delay and energy values for each episode
        avg_delays = []
        avg_energies = []
        avg_rewards = []


        for episode in range(num_episodes):
            # Initialize device state information for all users at the beginning of each episode
            device_state_info = self.env.reset()

            # Initialize total delay and energy for this episode
            total_delay = 0
            total_energy = 0
            total_reward = 0

            # Initialize the number of tasks in this episode
            num_all_tasks = 0
            actual_steps = 0

            for step in range(self.max_steps_per_episode):
                # Initialize lists for actions, tasks, and user IDs
                actions = []
                tasks = []
                users_id = []

                # Generate a random number of tasks and assign them to devices
                num_tasks = np.random.randint(self.num_users / 2, self.num_users + 1)
                num_all_tasks += num_tasks
                
                # Randomly distribute tasks to users
                task_distribution = np.random.choice(range(self.num_users), num_tasks, replace=True)

                # Initialize a list to store state representations for action selection
                states_choose_actions = []

                for user_id in task_distribution:
                    # Create a new task
                    task = self.create_task()
                    # Append the task to the tasks list
                    tasks.append(task)
                    # Append the user ID to the users_id list
                    users_id.append(user_id)

                    # Update device state information with the new task
                    device_state_info[user_id]['task'] = task
                    device_state_info[user_id]['device_id'] = user_id

                    # Create the state representation for action selection
                    state_choose_actions = {
                        'cache_size': device_state_info[user_id]['cache_size'],
                        'device_id': user_id,
                        'task': device_state_info[user_id]['task'],
                        'Occupied bandwidth': device_state_info[user_id]['Occupied bandwidth'],
                        'Occupied computation': device_state_info[user_id]['Occupied computation']
                    }


                    # Append the state representation to the list
                    states_choose_actions.append(state_choose_actions)

                # Select actions for each device with a task using the Q-table
                actions = [self.get_action(state) for state in states_choose_actions]

                # Execute the actions in the environment
                rewards, next_state_info, done = self.env.step(actions, tasks, users_id)

                # List of state representations used for action selection
                state_info_list = states_choose_actions

                counter_Users = 0
                # Update Q-table and device state information for each device
                for user_id in task_distribution:
                    # Get the current device information
                    device_info = state_info_list[counter_Users]
                    # Get the next device information
                    next_device_info = next_state_info[counter_Users]
                    # Get the action taken by the user
                    action = actions[counter_Users]
                    # Get the reward received by the user
                    reward = rewards[counter_Users]

                    # Extract delay and energy values from the next device information
                    delay = next_device_info.pop('delay', 0)
                    energy = next_device_info.pop('energy', 0)

                    # Update the Q-table with the new state-action pair
                    self.update_q_table(device_info, action, reward, next_device_info)
                    
                    # Accumulate the total delay and energy for the episode
                    total_delay += delay
                    total_energy += energy
                    total_reward += reward

                    # Update device state information with the combined next state
                    device_state_info[user_id].update(next_device_info)
                    # Increment the counter for the number of users
                    counter_Users += 1
                    
                actual_steps += 1

                if done:
                    # Exit the loop if the episode is done
                    break

            # Calculate and store average delay and energy for the episode
            avg_delay = total_delay / num_all_tasks * 1000  # Convert to milliseconds
            avg_energy = total_energy / num_all_tasks
            avg_reward = total_reward / num_all_tasks
            avg_delays.append(avg_delay)
            avg_energies.append(avg_energy)
            avg_rewards.append(avg_reward)

            # Update epsilon for the epsilon-greedy strategy
            self.update_epsilon(episode, num_episodes)

            # Print the episode's results
            print(f"Train : Episode {episode + 1}/{num_episodes} - Steps Count {actual_steps} - Avg Delay: {avg_delay}, Avg Energy: {avg_energy}, Avg Reward: {avg_reward}")
            print("-" * 100)


        # Optionally plot the results
        #self.plot_results(avg_delays, avg_energies, avg_rewards)

    def test(self, num_test_steps):

        # Initialize total delay and alpha values for this test
        total_delay = 0
        total_alpha = 0
        total_reward = 0
        # Counter for number of all tasks
        num_all_tasks = 0
        actual_steps = 0

        # Set epsilon to 0 for testing (no exploration)
        self.epsilon = 0

        # Initialize device state information for all users
        device_state_info =  self.env.reset()

        for step in range(num_test_steps):
            # Initialize lists for actions, tasks, and user IDs
            actions = []
            tasks = []
            users_id = []

            # Generate a random number of tasks and assign them to devices
            num_tasks = np.random.randint(self.num_users / 2, self.num_users + 1)
            num_all_tasks += num_tasks

            # Randomly distribute tasks to users
            task_distribution = np.random.choice(range(self.num_users), num_tasks, replace=True)

            # Initialize a list to store state representations for action selection
            states_choose_actions = []

            for user_id in task_distribution:
                # Create a new task
                task = self.create_task()
                # Append the task to the tasks list
                tasks.append(task)
                # Append the user ID to the users_id list
                users_id.append(user_id)

                # Update device state information with the new task
                device_state_info[user_id]['task'] = task
                device_state_info[user_id]['device_id'] = user_id

                # Create the state representation for action selection
                state_choose_actions = {
                    'cache_size': device_state_info[user_id]['cache_size'],
                    'device_id': user_id,
                    'task': device_state_info[user_id]['task'],
                    'Occupied bandwidth': device_state_info[user_id]['Occupied bandwidth'],
                    'Occupied computation': device_state_info[user_id]['Occupied computation']
                }


                # Append the state representation to the list
                states_choose_actions.append(state_choose_actions)

            # Select actions for each device with a task using the Q-table
            actions = [self.get_action(state) for state in states_choose_actions]

            # Execute the actions in the environment
            rewards, next_state_info, done = self.env.step(actions, tasks, users_id)

            total_reward = sum(rewards)

            counter_Users = 0

            for user_id in task_distribution:
                # Get the next device information
                next_device_info = next_state_info[counter_Users]
                # Extract delay and energy values from the next device information
                delay = next_device_info.pop('delay', 0)
                next_device_info.pop('energy', 0)

                # Accumulate the total delay and alpha values for the test
                total_delay += delay
                total_alpha += actions[counter_Users]['alpha_m']

                # Update device state information with the combined next state
                device_state_info[user_id].update(next_device_info)
                counter_Users += 1

            # Increment the counter for actual iterations
            actual_steps += 1

            if done:
                # Exit the loop if the test is done
                break

        # Calculate and return the average delay and alpha for the test
        avg_delay = (total_delay / num_all_tasks) * 1000  # Convert to milliseconds
        avg_alpha = total_alpha / num_all_tasks
        avg_reward = total_reward / num_all_tasks


        # Print the episode's results
        print(f"Test : Steps Count {actual_steps} - Avg Delay: {avg_delay}, Avg Alpha: {avg_alpha}, Avg Reward: {avg_reward}")
        print("-" * 100)
        
        return avg_delay, avg_alpha

    def plot_results(self, avg_delays, avg_energies, avg_rewards):
        episodes = np.arange(1, len(avg_delays) + 1)

        plt.figure(figsize=(12, 6))

        plt.subplot(1, 3, 1)
        plt.plot(episodes, avg_delays, label='Avg Delay')
        plt.xlabel('Episode')
        plt.ylabel('Average Delay')
        plt.title('Average Delay per Episode')
        plt.legend()

        plt.subplot(1, 3, 2)
        plt.plot(episodes, avg_energies, label='Avg Energy')
        plt.xlabel('Episode')
        plt.ylabel('Average Energy')
        plt.title('Average Energy per Episode')
        plt.legend()

        plt.subplot(1, 3, 3)
        plt.plot(episodes, avg_rewards, label='Avg Reward')
        plt.xlabel('Episode')
        plt.ylabel('Average Reward')
        plt.title('Average Reward per Episode')
        plt.legend()

        plt.tight_layout()
        plt.show()

    def save_q_table(self, filename):
        def convert_keys_to_str(d):
            if isinstance(d, dict):
                return {str(k): convert_keys_to_str(v) for k, v in d.items()}
            elif isinstance(d, np.ndarray):
                return d.tolist()
            else:
                return d

        with open(filename, 'w') as f:
            json.dump(convert_keys_to_str(self.q_table), f)

    def load_q_table(self, filename):
        def convert_keys_to_tuple(d):
            if isinstance(d, dict):
                return {eval(k): convert_keys_to_tuple(v) for k, v in d.items()}
            else:
                return d

        with open(filename, 'r') as f:
            self.q_table = convert_keys_to_tuple(json.load(f))

# Assuming you have your EdgeComputingEnvironment defined as per your code
#env = EdgeComputingEnvironment()

# Define the number of users/devices
#num_users = env.M

# Initialize the Q-learning agent
#agent = QLearningAgent(env, num_users)

# Train the agent
#num_episodes = 5  # Adjust the number of episodes as needed
#agent.train(num_episodes)

# Show the Q-table
#agent.save_q_table('Q_table.json')

In [11]:
# learning and test for S_max_es and E_max

# Assuming you have the QLearningAgent and EdgeComputingEnvironment classes defined as before
env = EdgeComputingEnvironment()

# Define your parameter ranges
S_max_es_values = [0, 20, 40, 60]  # in KB
E_max_values = [1.5, 2, 2.5, 3, 3.5, 4]  # in mJ

# Define the number of users/devices
num_users = env.M

# Initialize the Q-learning agent
agent = QLearningAgent(env, num_users)

# Define the number of episodes for training and testing steps
num_train_episodes = 100  # or any suitable number for training
num_test_steps = 20  # or any suitable number for testing

# Create the folder for Q-table files if it doesn't exist
q_table_folder = 'Q-Tables_Test1'
os.makedirs(q_table_folder, exist_ok=True)

# Storage for results
results = []

# Training phase
for S_max_es in S_max_es_values:
    for E_max in E_max_values:
        # Set the environment parameters
        env.S_max_es = S_max_es * 1e3  # Convert KB to bytes
        env.E_max = E_max * 1e-3  # Convert mJ to J

        # Train the agent
        agent.train(num_train_episodes)

        # Save the Q-table for this configuration
        q_table_filename = os.path.join(q_table_folder, f'Q_table_S{S_max_es}_E{E_max}.json')
        agent.save_q_table(q_table_filename)

# Testing phase
for S_max_es in S_max_es_values:
    for E_max in E_max_values:
        # Load the Q-table for this configuration
        q_table_filename = os.path.join(q_table_folder, f'Q_table_S{S_max_es}_E{E_max}.json')
        agent.load_q_table(q_table_filename)

        # Set the environment parameters
        env.S_max_es = S_max_es * 1e3  # Convert KB to bytes
        env.E_max = E_max * 1e-3  # Convert mJ to J

        total_delay = 0

        # Test the agent and get the average delay
        avg_delay, avg_alpha = agent.test(num_test_steps)

        # Store the results
        results.append({
            'S_max_es': S_max_es,
            'E_max': E_max,
            'avg_delay': avg_delay,
        })

# Plotting the results
plt.figure(figsize=(10, 6))

for S_max_es in S_max_es_values:
    # Extract delays and inspect for any non-numerical values or sequences
    delays = [result['avg_delay'] for result in results if result['S_max_es'] == S_max_es]
    
    # Ensure all delays are numerical values (Optional)
    cleaned_delays = []
    for delay in delays:
        if isinstance(delay, (int, float, np.number)):  # Check if delay is a numerical value
            cleaned_delays.append(delay)
        elif isinstance(delay, (list, np.ndarray)):  # Check if delay is a list or array
            if len(delay) == 1:
                cleaned_delays.append(delay[0])  # Extract single element if it's a list/array of one element
            else:
                print(f"Warning: Found non-scalar delay value for S_max_es = {S_max_es}: {delay}")
        else:
            print(f"Warning: Found non-numerical delay value for S_max_es = {S_max_es}: {delay}")

    # Plot the cleaned delays
    plt.plot(E_max_values, cleaned_delays, marker='o', label=f'S_max_es = {S_max_es} KB')

plt.xlabel('Maximum energy requirement, $E_{max}$ (mJ)')
plt.ylabel('The total e2e latency (ms)')
plt.title('The impact of edge caching capacity ($S_{max}^{es}$) and UE\'s energy consumption budget ($E_{max}$) on e2e latency')
plt.legend()
plt.grid(True)
plt.show()



[26574799.42437796]
[37848750.39140202]
[32343970.22020715]
[22980465.22738296]
[25315066.64835401]
[17504191.77596872]
[14521153.26697739]
[11386597.17056477]
[2804822.34374014]
[11221736.17059497]
[5706312.38003419]
[25334336.92915022]
[36074522.76710299]
[37812108.95400804]
[34701005.4874596]
[31620840.06118387]
[25861605.1523819]
[32090717.05251335]
[33279192.54172947]
[26283172.93470039]
[11699083.91316881]
[17655510.5977482]
[33674885.60827759]
[14280183.47701686]
[11252112.10217047]
[14597916.97157885]
[31313169.95350116]
[37778272.54885509]
[5850179.7197879]
[5761131.06467284]
[5832780.6332666]
[25662022.41853548]
Train : Episode 1/100 - Steps Count 3 - Avg Delay: [1.15817203], Avg Energy: [0.05028876], Avg Reward: [-0.34832193]
----------------------------------------------------------------------------------------------------
[17063306.10921269]
[33943804.7209083]
[25982266.94607214]
[37200167.87253886]
[33648692.86311049]
[23292152.48267613]
[34488297.18594754]
[35251281.977

KeyboardInterrupt: 

In [4]:
# learning and test for F_max_es and deviation_values

# Assuming you have your EdgeComputingEnvironment and QLearningAgent defined as per your code
env = EdgeComputingEnvironment()

# Define the number of users/devices
num_users = env.M

# Define the different F_max_es values and f_es_dev, f_ue_dev values
F_max_es_values = [30, 32, 34, 36, 38]
deviation_values = [0.0, 0.02]

# Initialize the Q-learning agent
agent = QLearningAgent(env, num_users)

# Define the number of episodes for training and testing steps
num_train_episodes = 100  # or any suitable number for training
num_test_steps = 20  # or any suitable number for testing

# Create the folder for Q-table files if it doesn't exist
q_table_folder = 'Q-Tables_Tese2'
os.makedirs(q_table_folder, exist_ok=True)

# Storage for results
results = []

# Train and save Q-tables
for deviation in deviation_values:
    env.f_es_dev = deviation
    env.f_ue_dev = deviation

    for F_max_es in F_max_es_values:
        env.F_max_es = F_max_es * 1e9

        # Train the agent
        agent.train(num_train_episodes)

        # Save the Q-table for this configuration
        q_table_filename = os.path.join(q_table_folder, f'Q_table_F{F_max_es}_Dev{deviation}.json')
        agent.save_q_table(q_table_filename)

# Test and collect metrics
for deviation in deviation_values:
    for F_max_es in F_max_es_values:

        # Load the Q-table for this configuration
        q_table_filename = os.path.join(q_table_folder, f'Q_table_F{F_max_es}_Dev{deviation}.json')
        agent.load_q_table(q_table_filename)

        env.f_es_dev = deviation
        env.f_ue_dev = deviation
        env.F_max_es = F_max_es * 1e9

        # Test the agent and get the average delay and alpha
        avg_delay, avg_alpha = agent.test(num_test_steps)

        # Store the results
        results.append({
            'F_max_es': F_max_es,
            'f_dev': deviation,
            'avg_delay': avg_delay,
            'avg_alpha': avg_alpha
        })

# Prepare data for plotting
delays_data = {deviation: [] for deviation in deviation_values}
alphas_data = {deviation: [] for deviation in deviation_values}

# Fill the data lists with default values if missing
for F_max_es in F_max_es_values:
    for deviation in deviation_values:
        found = False
        for result in results:
            if result['F_max_es'] == F_max_es and result['f_dev'] == deviation:
                delays_data[deviation].append(result['avg_delay'][0] if isinstance(result['avg_delay'], np.ndarray) else result['avg_delay'])
                if deviation == 0:
                    alphas_data[deviation].append(result['avg_alpha'])
                found = True
                break
        if not found:
            delays_data[deviation].append(0)
            if deviation == 0:
                alphas_data[deviation].append(0)

# Plotting the results
fig, ax1 = plt.subplots(figsize=(10, 6))

# Plot for delay
bar_width = 0.35
index = np.arange(len(F_max_es_values))

for i, deviation in enumerate(deviation_values):
    ax1.bar(index + i * bar_width, delays_data[deviation], bar_width, label=f'Latency, $\hat{{f}} = {deviation}$', alpha=0.6)

ax1.set_xlabel("Maximum ES's processing rate (GHz)")
ax1.set_ylabel("The total latency (ms)")
ax1.set_title("The impact of ES's processing rate, deviation values, and offloading behavior")
ax1.set_xticks(index + bar_width / 2)
ax1.set_xticklabels(F_max_es_values)
ax1.legend(loc='upper left')
ax1.grid(True)

# Plot for average alphas
ax2 = ax1.twinx()
alphas = alphas_data[0]
ax2.plot(index + bar_width / 2, alphas, marker='o', color='red', label='Average offloading portions, $\hat{f} = 0$')

ax2.set_ylabel("Average UEs offloading portions")
ax2.legend(loc='upper right')

plt.show()


[36215213.62534444]
[25735785.87920674]
[32000836.85163995]
[29119679.57029104]
[36347484.43972605]
[22916575.62589505]
[23298834.56303016]
[28488922.1027994]
[19764479.95508612]
[37351917.85212491]
[28304404.55035065]
[26369998.45279995]
[28124245.4947768]
[25389971.26622317]
Train : Episode 1/100 - Steps Count 1 - Avg Delay: [0.72881996], Avg Energy: [0.01488462], Avg Reward: [-0.33704201]
----------------------------------------------------------------------------------------------------
[19955573.88327515]
[25599308.61501873]
[30952051.13526261]
[36745956.36260124]
[17391361.80924979]
[34643190.92813012]
[21875591.72601535]
[35626943.54282025]
[23099028.99463775]
[37788845.25186591]
[25615378.26428022]
[22894131.58299772]
[27621685.46767512]
[30775326.29289488]
[28195681.11701357]
[23091831.89019085]
[36928338.18491662]
[22065043.99434929]
[20288604.8359009]
[34904698.82736389]
[16895778.92344718]
[19957620.33945151]
[22851716.17534078]
[35038134.28016576]
[37724935.89229231]
[3444

KeyboardInterrupt: 