# An Environment for Admission Control in Acyclic Queueing Networks 
<div>
<img src="https://raw.githubusercontent.com/majidraeis/Figs/master/acyclic.png" width="500"/>
</div>

## Goal:
Consider an acyclic multi-server queueing system with $N$ stages. We want to learn an admission control policy that gurantees an upperbound of $d_{ub}$ on the end-to-end delay.

## State ($\bar{s}$):
$\bar{s}=(s_1, s_2, \cdots, s_N)$

i.e., the vector of queue lengths of all the stages upon arrival of a job 
## Actions ($a$):
Accept or Reject the incoming job
## Reward ( $R(s,a)$ ):
>

\begin{equation*}
r = \left\{
\begin{array}{ll}
+1,  \qquad & a = Accept,\qquad d<d_{up}\\ 
-1,  \qquad & a = Accept,\qquad d>d_{up}\\
-1,  \qquad & a = Reject, \qquad d<d_{up}\\ 
+1,  \qquad & a = Reject, \qquad d>d_{up}\\
\end{array} \right.
\end{equation*}

In [None]:
import os
import copy
import random
import operator
import functools
import numpy as np
import matplotlib.pyplot as plt

import gym
from gym  import spaces
from tqdm import tqdm


In [None]:
class AcyclicEnv(gym.Env):

    def __init__(self, N_s, Mu_s, P_s, rho, d_ub):
       """
       N_s = array of number of servers at each stage
       Mu_s = array of service rates 
       P_s = Probability of choosing each branch
       rho = traffic intensity
       d_ub = delay upperbound 
       """
       self.N_s    = N_s
       self.Mu_s   = Mu_s
       self.rho    = rho
       self.d_ub   = d_ub
          
       self.job_index  = 1
       self.t_arr  = 0
       self.cnt    = 1
       self.MAX_STEPS = 1000
       self.dep_vec = []

       B_max = 60      
       self.action_space = spaces.Discrete(2)
       self.observation_space = [spaces.Discrete(B_max)]*len(N_s)

       self.cost = 0
       self.P_s = P_s
       self.opt_flags = {}
       self.acyclic_job_dict = {}
       self.acyclic = Acyclic(N_s, Mu_s, P_s)
       self.qls = np.zeros(len(N_s),dtype=int)

 
    def step(self, action, opt_flag):
        s = self.qls
        index = self.job_index
        self._take_action(action)
        s_prime = self.qls
        reward_vec = []
        delay_vec = []
        if not action:
          virtual_Qnet = copy.deepcopy(self.acyclic)
          v_flag = True
          time = np.copy(self.t_arr)
          info_vec = np.zeros((2,3))
          info_vec[0] = [index, time, 1]
          time += 5
          info_vec[1] = [0, time, 0]
          while(v_flag):
            _, dep_vec = virtual_Qnet._step(info_vec)
            departed_indices = dep_vec[:,0]
            if index in departed_indices:
              v_flag = False
              v_delay = 0
              v_delay += virtual_Qnet.tandem.queue[0].job_dict[index]['Tw'] 
              for n_s in range(len(self.N_s)-2):
                if index in virtual_Qnet.fork.queue[n_s].job_dict.keys():
                  v_delay += virtual_Qnet.fork.queue[n_s].job_dict[index]['Tw']   
              v_delay += virtual_Qnet.join.queue.job_dict[index]['Ts'] 

            info_vec[0] = [0, time, 0]
            time += 5
            info_vec[1] = [0, time, 0]
          del(virtual_Qnet)
          reward = self._get_reward(v_delay, action) 
          reward_vec.append([s, action, s_prime, reward, opt_flag])
          if v_delay < self.d_ub: 
            self.cost += 1
        else:
          self.acyclic_job_dict[index] = {}
          self.acyclic_job_dict[index]['s'] = s
          self.acyclic_job_dict[index]['s_prime'] = s_prime
          self.opt_flags[index] = opt_flag

        for j in range(0, self.dep_vec.shape[0]-1):
          index = int(self.dep_vec[j][0])
          delay = 0
          delay += self.acyclic.tandem.queue[0].job_dict[index]['Tw'] 
          for n_s in range(len(self.N_s)-2):
            if index in self.acyclic.fork.queue[n_s].job_dict.keys():
              delay += self.acyclic.fork.queue[n_s].job_dict[index]['Tw']   
          delay += self.acyclic.join.queue.job_dict[index]['Ts'] 
          
          del(self.acyclic.tandem.queue[0].job_dict[index])
          if index in self.acyclic.fork.queue[0].job_dict.keys():
            del(self.acyclic.fork.queue[0].job_dict[index])
          else:
            del(self.acyclic.fork.queue[1].job_dict[index])
          del(self.acyclic.join.queue.job_dict[index])

          s = self.acyclic_job_dict[index]['s']
          s_prime = self.acyclic_job_dict[index]['s_prime']
          reward = self._get_reward(delay, action)       
          reward_vec.append([s, 1, s_prime, reward, self.opt_flags[index]]) #delayed reward, action =1
          delay_vec.append(delay)
  
        done = False if self.cnt < self.MAX_STEPS else True
        self.cnt += 1
        return self.qls, reward_vec, done, delay_vec


    def _take_action(self, action):
        info_vec = np.zeros((2,3))
        info_vec[0] = [self.job_index, self.t_arr, action]
        self.job_index += 1
        self.t_arr = self.t_arr + self._inter_arr_gen()
        info_vec[1] = [self.job_index, self.t_arr, 0]
        self.qls, self.dep_vec = self.acyclic._step(info_vec)

    def _get_reward(self, delay, action):

        if action:
          r = 1 if delay <= self.d_ub else -1
        else:
          r = -1 if delay <= self.d_ub else 1
        return r

    def _inter_arr_gen(self):
        c_a2 = 0.7 #SCV^2
        lambda_a = self.N_s[0] * self.rho * self.Mu_s[0]
        mean = 1/lambda_a
        k = 1/c_a2
        theta = mean/k
        interTa = np.random.gamma(k, theta)
        return interTa
    
    def reset(self):
        """
        Reset the state of the environment and returns an initial observation.
        Returns
        -------
        observation (object): the initial observation of the space.
        """
        self.t_arr = 0
        self.job_index = 1
        self.N_s = N_s
        self.Mu_s = Mu_s
        self.cnt = 1
        self.rho = rho
        self.qls = np.zeros(len(self.N_s), dtype=int)
        self.dep_vec = []
        self.acyclic_job_dict = {}
        self.opt_flags = {}
        self.acyclic = Acyclic(self.N_s, self.Mu_s, self.P_s)
        self.cost = 0
        return self.qls 

class Acyclic():
    def __init__(self, N_s, Mu_s,P_s):
        #         self.__version__ = "0.1.0"
        # General variables defining the environment
        self.N_s = N_s
        self.Mu_s = Mu_s
        self.queue = []
        self.ql = np.zeros(len(N_s), dtype=int)
        self.Nobranch = len(P_s)
        self.tandem = Tandem(N_s[0:1], Mu_s[0:1])
        self.fork = Fork(N_s[1:self.Nobranch+1], Mu_s[1:self.Nobranch+1], P_s)
        self.join = Join(N_s[self.Nobranch+1], Mu_s[self.Nobranch+1])

    def _step(self, info_vec):
        info_vec_new = np.copy(info_vec)
        self.ql[0], departure_vec = self.tandem._step(info_vec_new)
        if np.shape(departure_vec)[0]>1:
          ind_sorted = np.argsort(departure_vec[:,1])
          departure_vec = departure_vec[ind_sorted] 
        info_vec_new = np.append(departure_vec,info_vec[-1]).reshape(-1, 3)
        self.ql[1:self.Nobranch+1], departure_vec = self.fork._step(info_vec_new) 
 
        if np.shape(departure_vec[0])[0]>1:
          ind_sorted = np.argsort(departure_vec[0][:,1])
          departure_vec[0] = departure_vec[0][ind_sorted] 
        departure_vec[0] = np.append(departure_vec[0],info_vec[-1]).reshape(-1, 3)
        if np.shape(departure_vec[1])[0]>1:
          ind_sorted = np.argsort(departure_vec[1][:,1])
          departure_vec[1] = departure_vec[1][ind_sorted] 
        departure_vec[1] = np.append(departure_vec[1],info_vec[-1]).reshape(-1, 3)
        self.ql[self.Nobranch+1], departure_vec = self.join._step(departure_vec)
        if np.shape(departure_vec)[0]>1:
          ind_sorted = np.argsort(departure_vec[:,1])
          departure_vec = departure_vec[ind_sorted] 
        info_vec_new = np.append(departure_vec,info_vec[-1]).reshape(-1, 3)
        return self.ql, info_vec_new
        
"----------Defining constituent queueing elements of the network---------------"  

class Tandem():
    def __init__(self, N_s, Mu_s):
        #         self.__version__ = "0.1.0"
        # General variables defining the environment
        self.N_s = N_s
        self.Mu_s = Mu_s
        self.queue = []
        self.ql = np.zeros(len(N_s), dtype=int)
        for i,n_s in enumerate(self.N_s):
          self.queue.append(Queue(n_s, self.Mu_s[i]))

    def _step(self, info_vec):
        info_vec_new = np.copy(info_vec)
        for i in range(len(self.N_s)):
          self.ql[i], departure_vec = self.queue[i]._progress(info_vec_new) 
          if np.shape(departure_vec)[0]>1:
            ind_sorted = np.argsort(departure_vec[:,1])
            departure_vec = departure_vec[ind_sorted] 
          info_vec_new = np.append(departure_vec,info_vec[-1]).reshape(-1, 3)
        return self.ql, info_vec_new

class Fork():
    def __init__(self, N_s, Mu_s, P_s):
        #         self.__version__ = "0.1.0"
        # General variables defining the environment
        self.N_s = N_s
        self.P_s = P_s
        self.queue = []
        self.ql = np.zeros(len(N_s), dtype=int)
        for i,n_s in enumerate(self.N_s):
          self.queue.append(Queue(n_s, Mu_s[i]))

    def _step(self, info_vec):
        # job_index = info_vec[0][0]
        info_vec_perFork = {}
        arrival_cnt = info_vec.shape[0]-1
        info_vec_new = np.copy(info_vec)
        picked_queue = np.random.multinomial(1, self.P_s, size=arrival_cnt)
        picked_queue = np.argmax(picked_queue, axis=1)
        for i in range(len(self.N_s)):
          
          info_vec_perFork[i] = np.append(info_vec_new[:-1][picked_queue == i], info_vec_new[-1]).reshape(-1, 3)
          # print(i, info_vec_perFork[i])
          self.ql[i], departure_vec = self.queue[i]._progress(info_vec_perFork[i]) 
          if np.shape(departure_vec)[0]>1:
            ind_sorted = np.argsort(departure_vec[:,1])
            departure_vec = departure_vec[ind_sorted] 
          info_vec_perFork[i] = np.append(departure_vec,info_vec[-1]).reshape(-1, 3)
        return self.ql, info_vec_perFork        

class Join():
    def __init__(self, n_s, mu_s):
        #         self.__version__ = "0.1.0"
        # General variables defining the environment
        self.ql = 0
        self.queue = Queue(n_s, mu_s)

    def _step(self, info_vec):
        info_vec_new = []
        self.N_branch = len(list(info_vec.keys()))
        for i in range(self.N_branch):
          info_vec_new = np.append(info_vec_new, info_vec[i][:-1]).reshape(-1, 3)
        if np.shape(info_vec_new)[0]>1:
          ind_sorted = np.argsort(info_vec_new[:,1])
          info_vec_new = info_vec_new[ind_sorted] 
        info_vec_new = np.append(info_vec_new,info_vec[i][-1]).reshape(-1, 3)
        self.ql, departure_vec = self.queue._progress(info_vec_new)
        info_vec_new = np.append(departure_vec,info_vec[i][-1]).reshape(-1, 3)
        return self.ql, departure_vec 

class Queue():
    def __init__(self, n_s, mu_s):
        #         self.__version__ = "0.1.0"
        # General variables defining the environment
        self.n_servers = n_s
        self.n_jobs = 0
        self.ql_vec = [0]
        # print(n_s)
        self.empty_servers = np.arange(n_s)
        self.assigned_servers = []
        self.t_fin = []
        self.ind_fin = []
        self.job_dict = {}
        self.job_dict[0] = {'Tw': 0.0, 'Ts':0.0}
        self.mu_s = mu_s
        # self.cnt = 1
    def _progress(self, info_vec):
        # Queue length before taking the action (upon job arrival)
        departure_vec = []
        assert(np.shape(info_vec)[0]>=1), 'error'
        for j in range(np.shape(info_vec)[0]-1):
          # print(info_vec)
          job_index = int(info_vec[j][0])
          time = info_vec[j][1]
          isArrival = info_vec[j][2]
          self.ql = max(self.n_jobs - self.n_servers, 0) # before arrival
          if isArrival:
              if self.n_jobs < self.n_servers:
                  t_ent = time
                  self.empty_servers = [x for x in range(self.n_servers) if x not in self.assigned_servers]
                  self.assigned_servers = np.append(self.assigned_servers, random.choice(self.empty_servers))

              else:
                  # finding the time that each server gets empty
                  t_available = [np.max(self.t_fin[self.assigned_servers == i]) for i in range(self.n_servers)]
                  # pick the earliest server available
                  picked_server = np.argmin(t_available)
                  t_ent = max(time, t_available[picked_server])
                  self.assigned_servers = np.append(self.assigned_servers, picked_server)

              t_s = self._service_gen()
              self.t_fin = np.append(self.t_fin, t_ent + t_s)
              self.ind_fin = np.append(self.ind_fin, job_index)
              self.n_jobs += 1
              self.job_dict[job_index] = {'Ta': time, 'Td': t_ent + t_s, 'Ts': t_s, 'Tw': t_ent- time,
                                              'Ba': self.ql}

          next_time = info_vec[j+1][1]
          self.n_jobs -= np.sum(np.array(self.t_fin) < next_time)
          served_jobs = np.arange(len(self.t_fin))[np.array(self.t_fin) < next_time]
          for i in served_jobs:
            departure_vec.append([int(self.ind_fin[i]), self.t_fin[i], 1])
          self.t_fin = np.delete(self.t_fin, served_jobs)
          self.ind_fin = np.delete(self.ind_fin, served_jobs)
          self.assigned_servers = np.delete(self.assigned_servers, served_jobs)

        if np.shape(info_vec)[0]==1:
          next_time = info_vec[0][1]
          self.n_jobs -= np.sum(np.array(self.t_fin) < next_time)
          served_jobs = np.arange(len(self.t_fin))[np.array(self.t_fin) < next_time]
          for i in served_jobs:
            departure_vec.append([int(self.ind_fin[i]), self.t_fin[i], 1])
          self.t_fin = np.delete(self.t_fin, served_jobs)
          self.ind_fin = np.delete(self.ind_fin, served_jobs)
          self.assigned_servers = np.delete(self.assigned_servers, served_jobs)

        QL = max(self.n_jobs - self.n_servers, 0) # queue length of this stage before the next arrival to the first stage
        return QL, np.array(departure_vec)

    def _service_gen(self):
        c_s2 = 0.8 #SCV^2
        mean = 1/self.mu_s
        k = 1/c_s2
        theta = mean/k
        Ts = np.random.gamma(k, theta)
        # lambda_s = 1.0
        # return np.random.exponential(1 / lambda_s)
        return Ts