In [4]:
import sys
sys.path.insert(0, '..')

from sim.LoadBalancer import LoadBalancer
import numpy as np
import random

### Implementing the Epsilon Greedy algorithm in our LoadBalancer structure
We use the LoadBalancer as the base class and import the epsilong greedy algorithm

In [None]:
class GreedyEpsilonLoadBalancer(LoadBalancer):
    def __init__(self, nServers, environment, model, processReward=1, cancelReward=10, serverReward=-300, eta=0.2, nServerRange = (1,40)):
        """
            model: obj with methods predict, fit, fit_partial etc.
            eta: float or callable(periodIndex: int -> float)
        """
        super().__init__(nServers, environment)
        self.model = model # a model that takes (N_t, X_t) and predicts the reward of the next time period
        self.agg = {'totalInQueue': np.mean,'requestStartWaiting':np.sum,'arrivalEvent':np.sum,'requestWaitingTime': np.mean,'requestProcessed':np.sum,'totalTimeInSystem':np.mean,'requestCancelled':np.sum}
        self.currentPeriod = 0
        self.processReward, self.cancelReward, self.serverReward = processReward, cancelReward, serverReward
        self.nServerRange = nServerRange #the range for the bernoulli to sample from

    def getEta(self):
        if callable(self.eta):
            return self.eta(self.currentPeriod)
        else:
            return self.eta

    def getPreviousPeriodContext(self):
        """
        Get the context from the period that just ended (potentially add lags here from previous periods)
        """
        previousPeriodData = self.environment.getPeriodLog() #returns {key: values}
        previousPeriodContext = {key: self.agg[key](vals) for key,vals in previousPeriodData.items()} #apply aggregation: go from lists to scalars
        self.environment.resetPeriod()
        return previousPeriodContext

    def getPreviousPeriodReward(self, context: dict):
        """
        Get the reward of the period that just ended
        """
        nProcessed = context['requestProcessed']
        nCancelled = context['requestCancelled']
        nServers = self.nServers
        reward = nProcessed * self.processReward + nCancelled * self.cancelReward + nServers * self.serverReward

    def getNextPeriodNumberOfServers(self, context):
        #input the context into the model and maximizy the output w.r.t N
        draw = random.randint(0,1)
        eta = self.getEta()
        nServers = None
        if draw < eta:
            #Take random action
            nServers = random.randint(self.nServerRange[0], self.nServerRange[1])
        else:
            #Use model to determine N that maximizes the reward
            raise NotImplementedError

        return nServers
        
    def updateModel(self, context, reward):
        #update the model using the context, nServers and observed reward
        return

    def onPeriodEnd(self):
        #Get the reward of the current period
        previousPeriodContext = self.getPreviousPeriodContext()
        previousPeriodReward = self.getPreviousPeriodReward(previousPeriodContext)



