In [None]:
import gymnasium as gym
from gymnasium import spaces
from gymnasium.spaces import MultiDiscrete, Discrete

In [None]:
print(gym.__version__)

# Creating a Custom Environment



In [None]:
# make an environment

class SchedulerEnvironment(gym.Env):
    def __init__(self, size=5, scenario_one = True, terminate_num = 200):
        self.scenario_one = scenario_one

        self.one_queue_size = 0.0
        self.two_queue_size = 0.0
        self.best_effort_queue_size = 0.0 
        self.step_counter = 0 # i.e. timeslots
        self.terminate_num = terminate_num

        PRIORITY_ONE_SIZE = 10
        PRIORITY_TWO_SIZE = 10
        BEST_EFFORT_SIZE = 10
        
        # can adjust size of max delay in timeslots after initial observations
        MAX_DELAY = 100

        self.observation_space = spaces.Dict( 
            {
                
                "queues": MultiDiscrete([
                    PRIORITY_ONE_SIZE,
                    PRIORITY_TWO_SIZE,
                    BEST_EFFORT_SIZE
                ]),
                
                # number of timeslots since scheduler picked packet from queue
                "queue_delay": MultiDiscrete([
                    MAX_DELAY,
                    MAX_DELAY,
                    MAX_DELAY
                ]),
                
#                 boolean for timeslot delay during switch 
#                 zero for no delay, one for delay. 
                "switchCounter": Discrete(2)
                
            }
        )

#         self.queue_one_delay = [] 
#         self.queue_two_delay = []
#         self.queue_best_effort_delay = []       

#         self.queue_delays = dict(
#             {
#                 0: self.queue_one_delay,
#                 1: self.queue_two_delay,
#                 2: self.queue_best_effort_delay
#             }
#         )

        self.current_state = None
        
        self.action_space = spaces.Discrete(3)
        
        self._action_to_queue = {
                0: self.observation_space["queues"][0],
                1: self.observation_space["queues"][1],
                2: self.observation_space["queues"][2]
            }

#         assert render_mode is None or render_mode in self.metadata["render_modes"]
#         self.render_mode = render_mode
        
#         """
#         If human-rendering is used, `self.window` will be a reference
#         to the window that we draw to. `self.clock` will be a clock that is used
#         to ensure that the environment is rendered at the correct framerate in
#         human-mode. They will remain `None` until human-mode is used for the
#         first time.
#         """
#         self.window = None
#         self.clock = None

    def _increment_delay(queue):
        for i in (len(queue)):
            queue[i] += 1

    def _calculate_delay(self):
        for i in range(len(self.queue_delays)):    
            self._increment_delay(self.queue_delays[i])
            
    def _add_delay_to_queue(self):
    
        queues = self.observation_space["queue_delay"]
        # this will give the index of the queue that had the packet selected
        selected_queue = self.action_space.sample()
        
        for q in range(len(queues)):
            if (selected_queue != q):
                queues[q] += 1
            else:
                queues[q] = 0
                

    def _add_packets(self):
        
        PACKET_ARRIVAL_ONE = 0.3
        PACKET_ARRIVAL_TWO = 0.25
        PACKET_ARRIVAL_BEST_EFFORT = 0.4
        
        self.one_queue_size += PACKET_ARRIVAL_ONE
        if self.one_queue_size >= 1.0:
            self.one_queue_size -= 1
            self.observation_space["queues"][0] += 1
                
        self.two_queue_size += PACKET_ARRIVAL_TWO
        if self.two_queue_size >= 1.0:
            self.two_queue_size -= 1
            self.observation_space["queues"][1] += 1

        self.best_effort_queue_size += PACKET_ARRIVAL_BEST_EFFORT
        if self.best_effort_queue_size >= 1.0:
            self.best_effort_queue_size -= 1
            self.observation_space["queues"][2] += 1

        return
     
    def _modify_states(self):

        # will not calculate delay for freshly added packets. 
        self._calculate_delay(self)
        self._add_packets()
        return 
    
    def _get_obs(self):
        return {
            "queues": self._observation_space["queues"]
        }
    
    def _calculate_avg_delay(queue):
        queue_size = len(queue)
        # sum
        sum = 0
        for i in range(queue_size):
            sum += queue[i]
        
        # avg
        avg = sum / queue_size
        return avg

    def _get_info(self):

        avg_one = self._calculate_avg_delay(self.queue_delays[0])
        avg_two = self._calculate_avg_delay(self.queue_delays[1])
        avg_best = self._calculate_avg_delay(self.queue_delays[2])

        return {
        # TODO: return info of some kind.
        # TODO: return the number of packets removed in each queue. 

        # TODO: return average delays for each queue. 
        }
    
    def _initialise_delays(queue, size):
        for i in range(size):
            queue.append(0)
    
    def reset(self, seed=None, options=None):
        # We need the following line to seed self.np_random
        super().reset(seed=seed)

        # TODO: randomise queues 
        self.one_queue_size = 4.0
        self.two_queue_size = 5.0
        self.best_effort_queue_size = 6.0
        
        QUEUE_ONE_SIZE = (int) (self.one_queue_size)
        QUEUE_TWO_SIZE = (int) (self.two_queue_size)
        QUEUE_BEST_EFFORT_SIZE = (int) (self.best_effort_queue_size)
        
        self.observation_space["queues"][0] = QUEUE_ONE_SIZE
        self.observation_space["queues"][1] = QUEUE_TWO_SIZE
        self.observation_space["queues"][2] = QUEUE_BEST_EFFORT_SIZE
        
        self._initialise_delays(self.queue_delays[0], QUEUE_ONE_SIZE)
        self._initialise_delays(self.queue_delays[1], QUEUE_TWO_SIZE)
        self._initialise_delays(self.queue_delays[2], QUEUE_BEST_EFFORT_SIZE)

        self.current_state = None
        self.step_counter = 0

        observation = self._get_obs()
        info = self._get_info()


        if self.render_mode == "human":
            self._render_frame()

        return observation, info
    
    def _reward_function(action, packet_delay):
        reward = 0
        MEAN_DELAY_ONE = 6
        MEAN_DELAY_TWO = 4
        # different ways of determining reward for best effort queue
        MEAN_DELAY_BEST_EFFORT = 8

        mean_delays = dict(
            {
                0: MEAN_DELAY_ONE,
                1: MEAN_DELAY_TWO,
                2: MEAN_DELAY_BEST_EFFORT,  
            }
        )

        # TODO: determine reward (with packet_delay)
        if (action == 2):
            # Penalize for any delay above the best-effort mean delay
            if packet_delay > MEAN_DELAY_BEST_EFFORT:
                reward = -1
            # Reward for any delay below the mean delay
            else:
                reward = 1

        else:
            MARGIN_OF_DELAY = 1

            # packet delay > mean delay by the margin
            if (mean_delays[action] < packet_delay + MARGIN_OF_DELAY):
                # encourage minimising the delay
                reward = 1

            # packet delay < mean delay by the margin
            elif (mean_delays[action] > packet_delay - MARGIN_OF_DELAY):
                # discourage minimising the delay 
                reward = -1

            # packet delay within margin. 
            else:
                reward = 0

        return reward 
    
    def _retrieve_packet(self, action):
        # retrieve the packet. 
        packet_delay = self.queue_delays[action].pop(0)

        # reduce the size of the queue
        self._action_to_queue[action] -= 1
        return packet_delay

    def step(self, action):
        packet_delay = 0
        # reward will be -1.
        if len(self.queue_delays[action]) == 0:
            None
        else:
            if self.scenario_one:
                packet_delay = self._retrieve_packet(action)
    
            else: 
                if (action == self.current_state):
                    packet_delay = self._retrieve_packet(action)

 
        # perform queue switch given the conditions.
        if not self.scenario_one:
            if (action != self.current_state):
                self.current_state = action
        
        reward = self._reward_function(action, packet_delay)
                
        # 
        self._modify_states()

        terminated = False
        if self.step_counter == self.terminate_num:
            terminated = True
        self.step_counter += 1

        # observation made after modifying states. 
        observation = self._get_obs()
        info = self._get_info()

        if self.render_mode == "human":
            self._render_frame()

        return observation, reward, terminated, False, info

    def close(self):
        if self.window is not None:
            pygame.display.quit()
            pygame.quit()
