In [1]:
from routingapp.compare_algorithm.sec_morl_multipolicy.function_dijkstra import dijkstra, routing_k

from copy import deepcopy
import numpy as np
import json
import os
import gym
from gym.spaces import MultiDiscrete
        
class SDN_Env(gym.Env):
    def __init__(self, graph, function, request, w=1.0):
        super(SDN_Env, self).__init__()
        self.w = w
        self.graph = graph
        self.function = function
        self.request = request
        self.predict_delay = graph.predict_delay
        self.predict_loss = graph.predict_loss
        self.predict_bandwidth = graph.predict_bandwidth
        self.number_clients = graph.number_clients if graph is not None else 0
        self.number_edge_servers = graph.number_edge_servers if graph is not None else 0
        self.number_cloud_servers = graph.number_cloud_servers if graph is not None else 0
        self.edge_servers = sorted(graph.edge_servers) if graph is not None else None
        self.cloud_servers = sorted(graph.cloud_servers) if graph is not None else None
        self.request = request
        self.step_cnt = 0
        self.current_request_index = 0
        self.current_request = self.request[self.current_request_index]
        self.Treq = (graph.number_edge_servers + graph.number_cloud_servers + graph.number_cloud_servers * graph.number_edge_servers)
        self.Tmax = self.Treq * len(self.request)    
        # Bounds are set based on the number of edge and cloud servers
        low_bound = np.zeros(self.number_edge_servers + self.number_cloud_servers)
        high_bound = np.ones(self.number_edge_servers + self.number_cloud_servers)
        self.action_space = gym.spaces.Box(low=low_bound, high=high_bound, shape=(self.number_edge_servers + self.number_cloud_servers,))

        # Initialize the environment
        self.reset()  

    def reset(self, **kwargs):
        # Reset environment variables
        self.step_cnt = 0
        self.step_request_cnt = 0 
        self.task_size = 0
        self.task_user_id = 0
        self.current_request_index = 0
        self.rew_t = 0
        self.rew_lu = 0
        self.invalid_act_flag = False
        self.unassigned_task_list = []
        self.edge_lists = []
        self.cloud_lists = []
        
        # Set environment flags and variables
        self.done = False
        self.reward_buff = []

        for _ in range(self.number_edge_servers):
            self.edge_lists.append([])
        for _ in range(self.number_cloud_servers):
            self.cloud_lists.append([])
            
        # Set the action space size based on the number of edge servers
        self.action_space = gym.spaces.Box(low=0, high=1, shape=(self.number_edge_servers + self.number_cloud_servers,))

            
        # Print the size of the state (ra)
        return self.get_obs()

               
    def step(self, actions):
        # Check the environment status
        assert self.step_cnt < self.Tmax, 'environment already output done'
        assert self.done == False, 'environment already output done'
        self.step_cnt += 1  # Increase the step count
        self.step_request_cnt += 1  # Increase the step request count
        finished_task = []  # Initialize the finished task list
        edge_action = -1
        cloud_action = -1
        cloud_path = None
        edge_path = None
        des_path = None
        
        #########################################################
        # Action processing (Xử lý hành động)
        self.current_request = self.request[self.current_request_index]
    
        # Case 1: Choose the edge server with the highest probability
        if np.all(actions[:self.number_edge_servers] == 1):
            edge_action = np.argmax(actions[:self.number_edge_servers])
        # Case 2: Choose the cloud server with the highest probability
        elif np.all(actions[self.number_edge_servers:] == 1):
            cloud_action = np.argmax(actions[self.number_edge_servers:])
        # Case 3: Choose both edge and cloud servers
        else:
            edge_action = np.argmax(actions[:self.number_edge_servers])
            cloud_action = np.argmax(actions[self.number_edge_servers:])
        
        self.edge_action = edge_action
        self.cloud_action = cloud_action
        
        print(f"Edge Action: {edge_action}, Cloud Action: {cloud_action}")   
        print(f"Request: {self.request}")
        print(f"Current Request: {self.current_request}") 
        #####################################################
        # Assignment of tasks (Phân công công việc)
        the_task = {}
        the_task['start_step'] = self.step_cnt
        the_task['delay_time'] = 0
        the_task['link_utilisation'] = 0  # Initialize  link utilization
        
        # Case 1: Routing task with both edge and cloud
        if (edge_action is not None) and (cloud_action is not None) and (0 <= edge_action < self.number_edge_servers) and (0 <= cloud_action < self.number_cloud_servers):
            # Routing task to the edge server
            e = edge_action
            the_task['to'] = e
            edge_path = dijkstra(self.graph, 1, self.current_request[0], self.edge_servers[e])

            
            # Uploading task to the cloud server
            c = cloud_action
            the_task['to'] = c
            cloud_path = dijkstra(self.graph, 1, self.edge_servers[e], self.cloud_servers[c]) 
            des_path = dijkstra(self.graph, 1, self.cloud_servers[c], self.current_request[1])
            print(f"Edge Path: {edge_path}, Cloud Path: {cloud_path}, Destination Path: {des_path}")
            # If the path is not found, set the invalid action flag to True
            if edge_path is None or cloud_path is None or des_path is None:
                self.invalid_act_flag = True
                # return self.get_obs(), -float('inf'), True, {'error': 'No path found'}
            else: 
                # Calculate the link utilization and delay time for the edge and cloud paths
                edge_link_utilisation = self.function.cal_bandwidth(edge_path, self.predict_bandwidth)
                edge_delay = self.function.cal_delay(edge_path, self.predict_delay)
                the_task['link_utilisation'] += edge_link_utilisation
                the_task['delay_time'] += edge_delay
                self.edge_lists[e].append(the_task)
                
                # Calculate the link utilization and delay time for the cloud and destination paths
                cloud_link_utilisation = self.function.cal_bandwidth(cloud_path, self.predict_bandwidth) + self.function.cal_bandwidth(des_path, self.predict_bandwidth)
                cloud_delay = self.function.cal_delay(cloud_path, self.predict_delay) + self.function.cal_delay(des_path, self.predict_delay)
                the_task['link_utilisation'] += cloud_link_utilisation
                the_task['delay_time'] += cloud_delay
                self.cloud_lists[c].append(the_task)
        else:
            # Case 2: Routing task with edge server 
            if (edge_action is not None) and (0 <= edge_action < self.number_edge_servers):
                e = edge_action
                the_task['to'] = e
                edge_path = dijkstra(self.graph, 1, self.current_request[0], self.edge_servers[e]) 
                des_path = dijkstra(self.graph, 1, self.edge_servers[e], self.current_request[1])
                print(f"Edge Path: {edge_path}, Destination Path: {des_path}")
                # If the edge path or destination path is not found, set the invalid action flag to True
                if edge_path is None or des_path is None:
                    self.invalid_act_flag = True
                    # return self.get_obs(), -float('inf'), True, {'error': 'No path found'}
                else: 
                    edge_link_utilisation = self.function.cal_bandwidth(edge_path, self.predict_bandwidth) + self.function.cal_bandwidth(des_path, self.predict_bandwidth)
                    edge_delay = self.function.cal_delay(edge_path, self.predict_delay) + self.function.cal_delay(des_path, self.predict_delay)
                    the_task['link_utilisation'] += edge_link_utilisation
                    the_task['delay_time'] += edge_delay
                    self.edge_lists[e].append(the_task)
            # Case 3: Routing task with edge server 
            if (cloud_action is not None) and (0 <= cloud_action < self.number_cloud_servers):
                c = cloud_action
                the_task['to'] = c
                cloud_path = dijkstra(self.graph, 1, self.current_request[0], self.cloud_servers[c]) 
                des_path = dijkstra(self.graph, 1, self.cloud_servers[c], self.current_request[1])
                print(f"Cloud Path: {cloud_path}, Destination Path: {des_path}")
                # If the cloud path or destination path is not found, set the invalid action flag to True
                if cloud_path is None or des_path is None:
                    self.invalid_act_flag = True
                    # return self.get_obs(), -float('inf'), True, {'error': 'No path found'}
                else:
                    cloud_link_utilisation = self.function.cal_bandwidth(cloud_path, self.predict_bandwidth) + self.function.cal_bandwidth(des_path, self.predict_bandwidth)
                    cloud_delay = self.function.cal_delay(cloud_path, self.predict_delay) + self.function.cal_delay(des_path, self.predict_delay)
                    the_task['link_utilisation'] += cloud_link_utilisation
                    the_task['delay_time'] += cloud_delay
                    self.cloud_lists[c].append(the_task)
            else:
                assert (0 <= edge_action < self.number_edge_servers or 0 <= cloud_action < self.number_cloud_servers), f'server selection action is invalid: {edge_action}, {cloud_action}'
                # Handle invalid action
                # self.invalid_act_flag = True

        #####################################################
        # Estimate rewards based on task information
        self.rew_t, self.rew_lu = self.estimate_rew()
        #####################################################
        # Done condition (Điều kiện kết thúc)
        
        if (self.step_request_cnt >= self.Treq):
            # Complete the current request and move to the next request
            self.current_request_index += 1
            if self.current_request_index < len(self.request):
                self.current_request = self.request[self.current_request_index]
                self.done = False
            else: 
                self.done = True
        else:
            # Continue the current request
            self.done = False    
        if (self.step_cnt >= self.Tmax):
            self.done = True
        done = self.done

        print(f"Done: {done}")
        #####################################################
        # Observation encoding (Mã hóa quan sát)
        obs = self.get_obs()

        #####################################################
        # Reward calculation (Tính toán thưởng)
        reward = self.get_reward(finished_task)

        # # Print the size of the action
        # print(f"Size of Action: {actions}")

        # # Print the size of the reward
        # print(f"Size of Reward: {reward}")
        #####################################################
        # Additional information (Thông tin bổ sung)
        info = {}
        # Create the complete_path from edge_path and cloud_path (if both are not None)
        if edge_path is not None and cloud_path is not None and des_path is not None:
            complete_path = edge_path + cloud_path[1:]  + des_path[1:] 
            info['complete_path'] = complete_path
        # If only edge_path is not None, store it as complete_path
        elif edge_path is not None and des_path is not None:
            info['complete_path'] = edge_path + des_path[1:] 
        # If only cloud_path is not None, store it as complete_path
        elif cloud_path is not None and des_path is not None:
            info['complete_path'] = cloud_path + des_path[1:] 
        else: 
            self.invalid_act_flag = True
            
        return obs, reward, done, info

    def get_obs(self):
        # Initialize the observation dictionary
        obs = {}
        servers = []

        # Add information of edge servers
        for edge_server in self.edge_servers:
            edge = []
            edge.append(1.0)
            edge.append(float(self.number_edge_servers))
            edge.append(float(1 - self.done))
            # Edge server info (bandwidth, delay, loss)
            edge.append(sum(self.predict_bandwidth[edge_server]))
            edge.append(sum(self.predict_delay[edge_server]))
            edge.append(sum(self.predict_loss[edge_server]))
            servers.append(edge)
        # Add information of cloud servers
        for cloud_server in self.cloud_servers:
            cloud = []
            cloud.append(1.0)
            cloud.append(float(self.number_cloud_servers))
            cloud.append(float(1 - self.done))
            # Cloud server info (bandwidth, delay, loss)
            cloud.append(sum(self.predict_bandwidth[cloud_server]))
            cloud.append(sum(self.predict_delay[cloud_server]))
            cloud.append(sum(self.predict_loss[cloud_server]))
            servers.append(cloud)
        # Swap axes to get the shape (features, servers)
        obs['servers'] = np.array(servers).swapaxes(0, 1)
        
        # Combine edge and cloud observations into a single state dictionary
        re = obs['servers']
        return re

    def estimate_rew(self):
        # Initialize reward components
        reward_dt = 0
        reward_dlu = 0
        
        edge_action = self.edge_action
        cloud_action = self.cloud_action
        
        the_task = {}
        the_task['delay_time'] = 0
        the_task['link_utilisation'] = 0
        
        if 0 <= edge_action < self.number_edge_servers:
            # Chosen action is an edge server
            for task in self.edge_lists[edge_action]:
                the_task['delay_time'] += task['delay_time']
                the_task['link_utilisation'] += task['link_utilisation']
        if 0 <= cloud_action < self.number_cloud_servers:
            # Chosen action is a cloud server
            for task in self.cloud_lists[cloud_action]:
                the_task['delay_time'] += task['delay_time']
                the_task['link_utilisation'] += task['link_utilisation']
        
        # # Estimate rewards based on task information       
        # if self.task_size > 0:
        #     # If there are tasks present, estimate rewards based on task information
        #     reward_dt = -the_task['delay_time'] * 0.01
        #     reward_dlu = -the_task['link_utilisation'] * 50
        # else:
        #     # If no tasks are present, set rewards to zero
        #     reward_dt = 0
        #     reward_dlu = 0
        
        reward_dt = -the_task['delay_time'] * 0.01
        reward_dlu = -the_task['link_utilisation'] * 50
        
        return reward_dt, reward_dlu
    
    def get_reward(self, finished_task):
        # If the path exists, estimate the reward else set it to -inf
        if self.invalid_act_flag:
            return -float('inf')
        else:
            # Calculate the reward based on the delay time and link utilization
            reward_dt, reward_dlu = self.estimate_rew()
            reward = self.w * reward_dt + (1.0 - self.w) * reward_dlu
            return reward

    def seed(self, seed=None):
        np.random.seed(seed)

    def render(self, mode='human'):
        # Implement rendering logic if needed
        pass

    
    def seed(self, seed=None):
        np.random.seed(seed)

    def render(self, mode='human'):
        # Implement rendering logic if needed
        pass
    def estimate_performance(self):
        total_delay = 0
        total_bandwidth_utilization = 0
        
        # Iterate over all tasks in the environment
        for task in self.edge_lists:
            total_delay += task['delay_time']  # Accumulate delay
            total_bandwidth_utilization += task['link_utilisation']  # Accumulate bandwidth utilization
        for task in self.cloud_lists:
            total_delay += task['delay_time']  # Accumulate delay
            total_bandwidth_utilization += task['link_utilisation'] # Accumulate bandwidth utilization
            
        return total_delay, total_bandwidth_utilization

In [13]:
import networkx as nx
from routingapp.compare_algorithm.sec_morl_multipolicy.train import train_sdn_policy
from routingapp.common.routing_utils import * 
from routingapp.common.models import RouteTask
from routingapp.compare_algorithm.sec_morl_multipolicy.module_function import Function
from routingapp.compare_algorithm.sec_morl_multipolicy.module_graph import Graph

func = Function()

In [6]:
from extras.utils import get_topo, get_link_info_legacy
graph = get_topo()
link_info = get_link_info_legacy()


In [14]:
SDN_Env(graph=graph, function = func, request=[6, 2], w=1.0)

AttributeError: 'DiGraph' object has no attribute 'predict_delay'

In [10]:
link_info

li_hmap = {}
for d in link_info:
    key = (d['src.dpid'], d['dst.dpid'])
    li_hmap[key] = d
li_hmap

{(6, 2): {'src.dpid': 6,
  'dst.dpid': 2,
  'packet_loss': 0.05609756097560976,
  'delay': 126.064,
  'bandwidth': 154,
  'link_usage': 0.0010609796734421942,
  'link_utilization': 0.0006889478398975287},
 (6, 5): {'src.dpid': 6,
  'dst.dpid': 5,
  'packet_loss': 0.05853658536585366,
  'delay': 160.066,
  'bandwidth': 115,
  'link_usage': 0.0010609796734421942,
  'link_utilization': 0.0009225910203845167},
 (5, 1): {'src.dpid': 5,
  'dst.dpid': 1,
  'packet_loss': 0.08292682926829269,
  'delay': 64.073,
  'bandwidth': 106,
  'link_usage': 0.00064,
  'link_utilization': 0.0006037735849056604},
 (5, 4): {'src.dpid': 5,
  'dst.dpid': 4,
  'packet_loss': 0.007317073170731708,
  'delay': 78.07,
  'bandwidth': 138,
  'link_usage': 0.0007997334221925481,
  'link_utilization': 0.0005795169726032957},
 (5, 6): {'src.dpid': 5,
  'dst.dpid': 6,
  'packet_loss': 0.05853658536585366,
  'delay': 160.066,
  'bandwidth': 115,
  'link_usage': 0.0010609796734421942,
  'link_utilization': 0.0009225910203