In [6]:
import numpy as np
from scipy.special import erfcinv
import json
import matplotlib.pyplot as plt
import os
import copy

class EdgeComputingEnvironment:
    def __init__(self, M=15, area_size=100, D_m=1354, eta_m_range=(100, 300), F_max_ue=1.5, P_max=23, B=5e6, T_max=10e-3, F_max_es=30, S_max_es=60, epsilon=10e-7, E_max=3, theta=10**-26, L=8, phi=0.02e-3, N0=-174, f_es_dev=0.02, f_ue_dev=0.02):

        self.M = M  
        self.area_size = area_size  
        self.D_m = D_m  
        self.eta_m_range = eta_m_range  
        self.F_max_ue = F_max_ue * 1e9  
        self.P_max = 10 ** (P_max / 10)  
        self.B = B  
        self.T_max = T_max  
        self.F_max_es = F_max_es * 1e9  
        self.S_max_es = S_max_es * 1e3  
        self.epsilon = epsilon  
        self.E_max = E_max * 1e-3 
        self.theta = theta
        self.L = L 
        self.phi = phi 
        self.R_min = 1e6  
        self.N0 = N0 
        self.N0 = 10 ** ((N0 - 30) / 10)
        self.PL_d = lambda d: 10 ** ((10 ** ((-35.3 - (37.6 * np.log10(d))) / 10)) / 10)
        self.f_es_dev = f_es_dev
        self.f_ue_dev = f_ue_dev 
        self.penalty = 100
        self.treshhold = -200

        self.user_device_params = [] 
        self.initialize_user_device_params()  

        self.server_params = self.initialize_server_params() 

        self.cache = [] 
        self.current_cache_size = 0  
        self.transmitting_tasks = [] 
        self.processing_tasks = [] 
        self.current_time = 0

        self.total_bandwidth = 0
        self.total_computation = 0 





    def initialize_user_device_params(self):

        for device_id in range(self.M):
            d = np.random.uniform(0, self.area_size / 2)  
            g_m = np.array([self.PL_d(d)]) 
            h_bar = np.random.randn(1, self.L) + 1j * np.random.randn(1, self.L)

            self.user_device_params.append({
                'device_id': device_id, 
                'd': d,
                'g_m': g_m,
                'h_bar': h_bar,
            })

    def initialize_server_params(self):

        return {
            'S_max_es': self.S_max_es 
        }

    def calculate_gamma_m(self, b_m, p_m, user_id):

        h_m = np.sqrt(self.user_device_params[user_id]['g_m'])[:, None] * self.user_device_params[user_id]['h_bar']  
        gamma_m = (p_m * np.linalg.norm(h_m, axis=1) ** 2) / (b_m * self.B * self.N0)  

        return gamma_m

    def calculate_uplink_rate(self, b_m, p_m, user_id):

        gamma_m = self.calculate_gamma_m(b_m, p_m, user_id) 
        V_m = 1 - (1 / (1 + gamma_m) ** 2)  
        Q_inv = np.sqrt(2) * erfcinv(2 * self.epsilon) 
        R_m = (self.B / np.log(2)) * ((b_m * np.log(1 + gamma_m)) - ((np.sqrt((b_m * V_m) / (self.phi * self.B))) * Q_inv)) 

        return R_m

    def calculate_delay(self, alpha_m, cache_hit, b_m, p_m, D_m, f_ue_m, f_es_m, f_ue_est, f_es_est, eta_m, user_id):

        actual_f_ue_m = f_ue_m - f_ue_est  
        actual_f_es_m = f_es_m - f_es_est

        if cache_hit == 1:
            T_es = (eta_m * D_m) / actual_f_es_m 
            T_e2e = T_es

        else:
            T_ue = (alpha_m * eta_m * D_m) / actual_f_ue_m 
            R_m = self.calculate_uplink_rate(b_m, p_m, user_id) 
            T_tr = (D_m * 8) / R_m 
            T_es = ((1 - alpha_m) * eta_m * D_m) / actual_f_es_m  
            T_e2e = T_ue + T_tr + T_es 
        return T_e2e

    def calculate_transmission_delay(self, b_m, p_m, D_m, user_id):

        R_m = self.calculate_uplink_rate(b_m, p_m, user_id)  
        T_co =  (D_m * 8) / R_m 

        return T_co

    def calculate_server_processing_delay(self, alpha_m, cache_hit, D_m, f_es_m, f_es_est, eta_m):

        if cache_hit == 0:
            T_es = ((1 - alpha_m) * eta_m * D_m) / (f_es_m - f_es_est)  

        else:
            T_es = (eta_m * D_m) / (f_es_m - f_es_est)

        return T_es

    def calculate_energy_consumption(self, s_m, b_m, alpha_m, p_m, D_m, f_ue_m, f_ue_est, eta_m, user_id):
        R_m = self.calculate_uplink_rate(b_m, p_m, user_id) 

        actual_f_ue_m = f_ue_m - f_ue_est  
        E_ue = alpha_m * (self.theta / 2 * eta_m * D_m * (actual_f_ue_m ** 2))  
        E_tx = ((1 - alpha_m) * (D_m * 8) * p_m) / R_m 

        if s_m == 1:  
            E_total = 0 
        else:
            E_total = E_ue + E_tx

        return E_total

    def manage_cache(self, task_info, task_delay):

        if task_delay == 0:
            for task in self.cache:
                if task_info == task[0]:
                    return True
            return False

        task_size = task_info['D_m'] * 8
        Server_Max_Capacity = self.server_params['S_max_es']  

        if (task_size + self.current_cache_size) <= Server_Max_Capacity:
            self.cache.append((task_info, task_delay))  
            self.current_cache_size += task_size
            return True
        else:
            sorted_cache = sorted(self.cache, key=lambda x: x[1], reverse=True) 

            while (task_size + self.current_cache_size) > Server_Max_Capacity:
                if not sorted_cache:
                    break  
                last_task = sorted_cache.pop() 
                self.cache.remove(last_task)
                self.current_cache_size -= last_task[0]['D_m'] * 8 

            self.cache.append((task_info, task_delay))
            self.current_cache_size += task_size 

            return True

    def step(self, actions, tasks, users_id):
        task_rewards = []
        state_info = []  
        done = False

        for action, task, user_id in zip(actions, tasks, users_id):
            cache_hit = 1 if self.manage_cache(task, 0) else 0

            alpha_m = action[0]
            b_m = action[1]
            p_m = action[2]
            F_max_ue = action[3]
            F_max_es = action[4]

            f_ue_est = F_max_ue * self.f_ue_dev  
            f_es_est = F_max_es * self.f_es_dev  

            delay = self.calculate_delay(
                alpha_m, cache_hit, b_m, p_m,
                task['D_m'], F_max_ue, F_max_es, f_ue_est,
                f_es_est, task['eta_m'], user_id
            )

            energy = self.calculate_energy_consumption(
                cache_hit, b_m, alpha_m, p_m, task['D_m'], F_max_ue,
                f_es_est, task['eta_m'], user_id
            )

            R_m = self.calculate_uplink_rate(b_m, p_m, user_id)
            print(R_m)

            if cache_hit == 0:
                transmission_end_time = self.current_time + self.calculate_transmission_delay(b_m, p_m,task['D_m'], user_id)
                processing_end_time = transmission_end_time + self.calculate_server_processing_delay(alpha_m, cache_hit, task['D_m'], F_max_es, f_es_est, task['eta_m'])

                self.transmitting_tasks.append((self.current_time, transmission_end_time, b_m))
                self.processing_tasks.append((transmission_end_time, processing_end_time, F_max_es))

                self.manage_cache(task, delay)
            else:
                processing_end_time = self.current_time + self.calculate_server_processing_delay(alpha_m, cache_hit, task['D_m'], F_max_es, f_es_est, task['eta_m'])
                self.processing_tasks.append((self.current_time, processing_end_time, F_max_es))

            self.total_bandwidth = sum(b for _, end_time, b in self.transmitting_tasks if end_time > self.current_time)
            self.total_computation = sum(f for _, end_time, f in self.processing_tasks if end_time > self.current_time)

            self.transmitting_tasks = [(start_time, end_time, b) for start_time, end_time, b in self.transmitting_tasks if end_time > self.current_time]
            self.processing_tasks = [(start_time, end_time, f) for start_time, end_time, f in self.processing_tasks if end_time > self.current_time]

            task_reward  = -delay - energy

            if delay > task['T_max']:
                task_reward -= self.penalty
            if energy > self.E_max:
                task_reward -= self.penalty
            if R_m < self.R_min:
                task_reward -= self.penalty
            if self.total_bandwidth > 1:
                task_reward -= self.penalty
            if self.total_computation > self.F_max_es:
                task_reward -= self.penalty

            if task_reward < self.treshhold:
                done = True

            task_rewards.append(task_reward)

            state_info.append({
                'cache_size': self.current_cache_size,
                'device_id': user_id,
                'task': task,  
                'delay': delay,
                'energy': energy,
                'Occupied bandwidth': self.total_bandwidth,
                'Occupied computation': self.total_computation
            })

        self.current_time += 0.010 

        next_state = state_info
        return task_rewards, next_state, done

    def reset(self):
        """
        Reset the environment to its initial state.
        """
        self.cache = [] 
        self.current_cache_size = 0  
        self.transmitting_tasks = [] 
        self.processing_tasks = [] 
        self.current_time = 0  
        self.initialize_user_device_params()
        self.total_bandwidth = 0  
        self.total_computation = 0  
        self.server_params = self.initialize_server_params() 

        device_state_info = {user_id: {
            'Occupied bandwidth':  self.total_bandwidth,
            'Occupied computation': self.total_computation,
            'cache_size': self.current_cache_size ,
            'device_id' : None,
            'task' : {
            'eta_m': 0,
            'T_max': 0,
            'D_m': 0
        }
        } for user_id in range(self.M)}

        return device_state_info

    def render(self):
        print(f"Total Bandwidth Used: {self.total_bandwidth}")
        print(f"Total Computation Used: {self.total_computation}")
        print(f"Current Cache Size: {self.current_cache_size}")
        print(f"Number of Transmitting Tasks: {len(self.transmitting_tasks)}")
        print(f"Number of Processing Tasks (Not Exist In Cache): {len(self.processing_tasks)}")


In [7]:
import numpy as np
import tensorflow as tf
from tensorflow.keras import models, layers, optimizers
import random
from collections import deque

# Deep Q-Network (DQN) Agent
class DQNAgent:
    def __init__(self, env, num_users, alpha=0.001, gamma=0.85, epsilon=1.0, epsilon_decay=0.995, epsilon_min=0.01, batch_size=64, max_steps_per_episode=20):
        self.env = env  # Environment for the agent
        self.num_users = num_users  # Number of users/devices in the environment
        self.alpha = alpha  # Learning rate
        self.gamma = gamma  # Discount factor for future rewards
        self.epsilon = epsilon  # Exploration rate
        self.epsilon_decay = epsilon_decay  # Decay rate for epsilon
        self.epsilon_min = epsilon_min  # Minimum value for epsilon
        self.batch_size = batch_size  # Batch size for experience replay
        self.max_steps_per_episode = max_steps_per_episode  # Maximum steps per episode

        # Define the action space and state space dimensions
        self.state_dim = 5  # As each state has 5 parameters
        self.action_dim = 5  # As each action has 5 parameters

        self.memory = deque(maxlen=2000)  # Experience replay memory
        self.model = self.build_model()  # Neural network model

    def build_model(self):
        # Build a neural network to approximate the Q-values
        model = models.Sequential()
        model.add(layers.Dense(64, input_dim=self.state_dim, activation='relu'))
        model.add(layers.Dense(64, activation='relu'))
        model.add(layers.Dense(self.action_dim, activation='linear'))
        model.compile(loss='mse', optimizer=optimizers.Adam(lr=self.alpha))
        return model

    def remember(self, state, action, reward, next_state, done):
        # Store experience in memory
        self.memory.append((state, action, reward, next_state, done))

    def act(self, state):
        # Choose an action based on epsilon-greedy policy
        if np.random.rand() <= self.epsilon:
            return self.env.action_space.sample()  # Sample random action within action_space
        act_values = self.model.predict(state)  # Predict action values using DQN model
        return np.clip(act_values[0], self.env.action_space.low, self.env.action_space.high)  # Clip actions to action_space bounds

    def replay(self):
        # Experience replay to train the network
        if len(self.memory) < self.batch_size:
            return
        minibatch = random.sample(self.memory, self.batch_size)
        for state, action, reward, next_state, done in minibatch:
            target = reward
            if not done:
                target = reward + self.gamma * np.amax(self.model.predict(next_state)[0])
            target_f = self.model.predict(state)
            target_f[0][action] = target
            self.model.fit(state, target_f, epochs=1, verbose=0)
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

    def load(self, name):
        # Load model weights
        self.model.load_weights(name)

    def save(self, name):
        # Save model weights
        self.model.save_weights(name)

    def create_task(self):
        # Create a new task with random parameters within specified ranges
        eta_m = np.round(np.random.choice(np.linspace(self.env.eta_m_range[0], self.env.eta_m_range[1], 100)))
        T_max_task = 10e-3  # Static according to article
        task_info = {
            'eta_m': eta_m,
            'T_max': T_max_task,
            'D_m': 1354  # Task data size
        }
        return task_info

    def train(self, num_episodes):
        # Lists to store average delay and energy values for each episode
        avg_delays = []
        avg_energies = []

        for episode in range(num_episodes):
            # Initialize device state information for all users at the beginning of each episode
            device_state_info = self.env.reset()

            # Initialize total delay and energy for this episode
            total_delay = 0
            total_energy = 0
            total_reward = 0

            # Initialize the number of steps taken in this episode
            num_steps = 0

            for step in range(self.max_steps_per_episode):
                # Initialize lists for actions, tasks, and user IDs
                actions = []
                tasks = []
                users_id = []

                # Generate a random number of tasks and assign them to devices
                num_tasks = np.random.randint(self.num_users / 2, self.num_users + 1)

                # Randomly distribute tasks to users
                task_distribution = np.random.choice(range(self.num_users), num_tasks, replace=True)

                # Initialize a list to store state representations for action selection
                states_choose_actions = []

                for user_id in task_distribution:
                    # Create a new task
                    task = self.create_task()
                    # Append the task to the tasks list
                    tasks.append(task)
                    # Append the user ID to the users_id list
                    users_id.append(user_id)

                    # Update device state information with the new task
                    device_state_info[user_id]['task'] = task
                    device_state_info[user_id]['device_id'] = user_id

                    # Create the state representation for action selection
                    state_choose_actions = [
                        device_state_info[user_id]['cache_size'],
                        user_id,
                        device_state_info[user_id]['task']['eta_m'],
                        device_state_info[user_id]['Occupied bandwidth'],
                        device_state_info[user_id]['Occupied computation']
                    ]

                    # Append the state representation to the list
                    states_choose_actions.append(state_choose_actions)

                # Convert states to numpy array for DQN
                states = np.array(states_choose_actions)

                # Select actions for each device with a task using the DQN model
                actions = [self.act(state.reshape(1, -1)) for state in states]

                # Execute the actions in the environment
                rewards, next_state_info, done = self.env.step(actions, tasks, users_id)
                print(rewards)
                print(next_state_info)

                # Accumulate the total reward for the episode
                total_reward += sum(rewards)
                print(total_reward)
                # List of state representations used for action selection
                state_info_list = states_choose_actions
                print(state_info_list)

                counter_Users = 0
                # Update Q-values and device state information for each device
                for user_id in task_distribution:
                    # Get the current device information
                    device_info = state_info_list[counter_Users]
                    # Get the next device information
                    next_device_info = next_state_info[counter_Users]
                    # Get the action taken by the user
                    action = actions[counter_Users]
                    # Get the reward received by the user
                    reward = rewards[counter_Users]

                    print(device_info)
                    print(next_device_info)
                    print(action)
                    print(reward) 

                    # Extract delay and energy values from the next device information
                    delay = next_device_info.pop('delay', 0)
                    energy = next_device_info.pop('energy', 0)
                    print(next_device_info)

                    # Convert next state to numpy array for DQN
                    next_state = np.array([
                        next_device_info.get('cache_size', 0),
                        user_id,
                        next_device_info.get('task', {}).get('eta_m', 0),
                        next_device_info.get('Occupied bandwidth', 0),
                        next_device_info.get('Occupied computation', 0)
                    ]).reshape(1, -1)
                    print("ff",next_state)
                    # Store experience in replay memory
                    self.remember(device_info, action, reward, next_state, done)

                    # Accumulate the total delay and energy for the episode
                    total_delay += delay
                    total_energy += energy

                    # Update device state information with the combined next state
                    device_state_info[user_id].update(next_device_info)
                    # Increment the counter for the number of users
                    counter_Users += 1

                # Increment the number of steps taken in the episode
                num_steps += 1
                if done:
                    # Exit the loop if the episode is done
                    break

            # Calculate and store average delay and energy for the episode
            avg_delay = (total_delay / (num_steps * len(state_info_list))) * 1000  # Convert to milliseconds
            avg_energy = (total_energy / (num_steps * len(state_info_list)))
            avg_reward = total_reward / num_steps
            avg_delays.append(avg_delay)
            avg_energies.append(avg_energy)

            # Print the episode's results
            print(f"Train : Episode {episode + 1}/{num_episodes} - Steps Count {num_steps} - Avg Delay: {avg_delay}, Avg Energy: {avg_energy}, Avg Reward: {avg_reward}")
            print("-" * 100)
            # Update epsilon for the epsilon-greedy strategy
            self.epsilon = max(self.epsilon_min, self.epsilon * self.epsilon_decay)

            # Perform experience replay
            self.replay()


# Assuming you have your EdgeComputingEnvironment defined as per your code
env = EdgeComputingEnvironment()

# Define the number of users/devices
num_users = env.M

# Initialize the DQN agent
agent = DQNAgent(env, num_users)

# Train the agent
num_episodes = 5  # Adjust the number of episodes as needed
agent.train(num_episodes)


  super().__init__(name, **kwargs)


AttributeError: 'EdgeComputingEnvironment' object has no attribute 'action_space'