<a href="https://colab.research.google.com/github/nabazar/ALPR_Dataset/blob/main/DQNforLoadBalancingv5.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [10]:
import random
import tensorflow as tf
import gym
import numpy as np
from gym import spaces
from gym.spaces import Tuple , Box
import time
import numpy as np
from collections import deque
import matplotlib.pyplot as plt
from IPython.display import clear_output

  and should_run_async(code)


In [11]:
class DQN:
    def __init__(self, state_shape, action_size, learning_rate_max=0.001, learning_rate_decay=0.995, gamma=0.75,
                 memory_size=2000, batch_size=32, exploration_max=1.0, exploration_min=0.01, exploration_decay=0.995):
        self.state_shape = state_shape

        self.state_tensor_shape = (-1,) + state_shape
        self.action_size = action_size
        self.learning_rate_max = learning_rate_max
        self.learning_rate = learning_rate_max
        self.learning_rate_decay = learning_rate_decay
        self.gamma = gamma
        self.memory_size = memory_size
        self.memory = PrioritizedReplayBuffer(capacity=2000)
        self.batch_size = batch_size
        self.exploration_rate = exploration_max
        self.exploration_max = exploration_max
        self.exploration_min = exploration_min
        self.exploration_decay = exploration_decay

        self.model = self._build_model()
        self.target_model = self._build_model()
        self.update_target_model()

    def _build_model(self):
        # the actual neural network structure
        model = tf.keras.models.Sequential()
        model.add(tf.keras.layers.Input(shape=self.state_shape))
        model.add(tf.keras.layers.Conv2D(32, (3, 3), activation='relu', padding='same', kernel_initializer='he_uniform', input_shape=self.state_shape))
        model.add(tf.keras.layers.Conv2D(64, (3, 3), activation='relu', padding='same', kernel_initializer='he_uniform'))
        model.add(tf.keras.layers.Flatten())
        model.add(tf.keras.layers.Dense(128, activation='relu', kernel_initializer='he_uniform'))
        model.add(tf.keras.layers.Dense(128, activation='relu', kernel_initializer='he_uniform'))
        model.add(tf.keras.layers.Dropout(0.1))
        model.add(tf.keras.layers.Dense(self.action_size, activation='linear', name='action_values', kernel_initializer='he_uniform'))
        model.compile(loss='mse', optimizer=tf.keras.optimizers.Adam(learning_rate=self.learning_rate))
        return model

    def update_target_model(self):
        self.target_model.set_weights(self.model.get_weights())

    def remember(self, state, action, reward, next_state, done):
        self.memory.push((state, action, reward, next_state, done))

    def act(self, state, epsilon=None):
        if epsilon == None:
            epsilon = self.exploration_rate
        if np.random.rand() < epsilon:
            return random.randrange(self.action_size)
        return np.argmax(self.target_model.predict(state, verbose=0)[0])

    def replay(self, episode=0):

        if self.memory.length() < self.batch_size:
            return None

        experiences, indices, weights = self.memory.sample(self.batch_size)
        unpacked_experiences = list(zip(*experiences))
        states, actions, rewards, next_states, dones = [list(arr) for arr in unpacked_experiences]

        # Convert to tensors
        states = tf.convert_to_tensor(states)
        states = tf.reshape(states, self.state_tensor_shape)
        actions = tf.convert_to_tensor(actions, dtype=tf.int32)
        rewards = tf.convert_to_tensor(rewards, dtype=tf.float32)
        next_states = tf.convert_to_tensor(next_states)
        next_states = tf.reshape(next_states, self.state_tensor_shape)
        dones = tf.convert_to_tensor(dones, dtype=tf.float32)

        # Compute Q values and next Q values
        target_q_values = self.target_model.predict(next_states, verbose=0)
        q_values = self.model.predict(states, verbose=0)

        # Compute target values using the Bellman equation
        max_target_q_values = np.max(target_q_values, axis=1)
        targets = rewards + (1 - dones) * self.gamma * max_target_q_values

        # Compute TD errors
        batch_indices = np.arange(self.batch_size)
        q_values_current_action = q_values[batch_indices, actions]
        td_errors = targets - q_values_current_action
        self.memory.update_priorities(indices, np.abs(td_errors))

        # For learning: Adjust Q values of taken actions to match the computed targets
        q_values[batch_indices, actions] = targets

        loss = self.model.train_on_batch(states, q_values, sample_weight=weights)

        self.exploration_rate = self.exploration_max*self.exploration_decay**episode
        self.exploration_rate = max(self.exploration_min, self.exploration_rate)
        self.learning_rate = self.learning_rate_max*self.learning_rate_decay**episode
        tf.keras.backend.set_value(self.model.optimizer.learning_rate, self.learning_rate)

        return loss

    def load(self, name):
        self.model = tf.keras.models.load_model(name)
        self.target_model = tf.keras.models.load_model(name)

    def save(self, name):
        self.model.save(name)

In [12]:
class PrioritizedReplayBuffer:
    def __init__(self, capacity, epsilon=1e-6, alpha=0.8, beta=0.4, beta_increment=0.001):
        self.capacity = capacity
        self.epsilon = epsilon
        self.alpha = alpha   # how much prioritisation is used
        self.beta = beta    # for importance sampling weights
        self.beta_increment = beta_increment
        self.priority_buffer = np.zeros(self.capacity)
        self.data = []
        self.position = 0

    def length(self):
        return len(self.data)

    def push(self, experience):
        max_priority = np.max(self.priority_buffer) if self.data else 1.0
        if len(self.data) < self.capacity:
            self.data.append(experience)
        else:
            self.data[self.position] = experience
        self.priority_buffer[self.position] = max_priority
        self.position = (self.position + 1) % self.capacity

    def sample(self, batch_size):
        priorities = self.priority_buffer[:len(self.data)]
        probabilities = priorities ** self.alpha
        probabilities /= probabilities.sum()

        indices = np.random.choice(len(self.data), batch_size, p=probabilities)
        experiences = [self.data[i] for i in indices]

        total = len(self.data)
        weights = (total * probabilities[indices]) ** (-self.beta)
        weights /= weights.max()

        self.beta = np.min([1., self.beta + self.beta_increment])

        return experiences, indices, weights

    def update_priorities(self, indices, errors):
        for idx, error in zip(indices, errors):
            self.priority_buffer[idx] = error + self.epsilon

In [13]:
def createVM(nVM,ProcessTime,QueueLength,ProcessorNumber):
  VMs=[]
  for i in range(0,nVM):
    VMs.append({'name': 'vm'+str(i), 'L_Q': QueueLength[i], 'ProcessTime':ProcessTime[i],'ProcessorNumber':ProcessorNumber[i],'Occuapted_memory':[],'InputTasks':[],'FinishedTasks':[],'Time_of_the_FinishedTasks':[]})
  return VMs



In [14]:
class LoadBalancing:
  def __init__(self,nVM,ProcessTime,QueueLength,ProcessorNumber,lower_bound,upper_bound):
    self.action_space = spaces.Discrete(nVM)

    self.observation_space = spaces.Box(lower_bound, upper_bound, dtype=np.float32)
    self.nVM=nVM
    self.ProcessTime=ProcessTime
    self.ProcessorNumber=ProcessorNumber
    self.QueueLength=QueueLength
    self.VMs=createVM(self.nVM,self.ProcessTime,self.QueueLength,self.ProcessorNumber)



  def step(self,action):
    stp=self.stp

    task={'name':'T'+str(stp),'id':stp,'Delay':self.VMs[action]['ProcessTime'] ,'VMid': action,'Timer':self.time}

    ProcessorNumber=self.ProcessorNumber

    for i in range(0,self.nVM):
      if len(self.VMs[i]['Occuapted_memory'])>0:
        if self.time%self.VMs[i]['ProcessTime']==0 or self.time%self.VMs[i]['ProcessTime']<1:

          if ProcessorNumber[i]<=len(self.VMs[i]['Occuapted_memory']):
            a=ProcessorNumber[i]

          else:
            a=len(self.VMs[i]['Occuapted_memory'])
          for ia in range(0, a):
            self.VMs[i]['FinishedTasks'].append(self.VMs[i]['Occuapted_memory'][ia])
            self.VMs[i]['Time_of_the_FinishedTasks'].append(self.time)
          self.VMs[i]['Occuapted_memory'][0:a]=[]

          self.VMs[i]['L_Q']=self.VMs[i]['L_Q']+a

    if self.VMs[action]['L_Q']>0:
      self.VMs[action]['L_Q']=self.VMs[action]['L_Q']-1
      self.VMs[action]['Occuapted_memory'].append(task['id'])
      self.VMs[action]['InputTasks'].append(task['id'])


    next_state=[]
    for i in range(0,self.nVM):
      next_state.append(self.VMs[i]['L_Q'])

    C1=len(self.VMs[action]['InputTasks'])>500
    C2=len(self.VMs[action]['InputTasks'])<200

    reward=-1*C1-1*C2-1*self.VMs[action]['ProcessTime']/30-20*len(self.VMs[action]['Occuapted_memory'])/100
    reward=reward/4
    return next_state, reward,self.VMs

  def reset(self):

    self.VMs=createVM(self.nVM,self.ProcessTime,self.QueueLength,self.ProcessorNumber)

    state=[]
    for i in range(0,self.nVM):
      state.append(self.VMs[i]['L_Q'])

    ####if normalized
    # state=(lq0,lq1,lq2,lq3,lq4,lq5)
    # state=np.reshape(state,(1,-1))
    # state = preprocessing.normalize(state)[0]

    return state,self.VMs
  def print_output(self):
      if self.print_mode==1:
        for i in range(0,self.nVM):
          print("-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.")
          print("-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-Print Outputs-.-.-.-.-.-.-.-.-.-.-.-.")
          print("-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.")
          print("Current Time is: ", str(self.time))
          print( "Occuapted_memory in VM "+str(i)+": ",self.VMs[i]['Occuapted_memory'])
          print("InputTasks in VM "+str(i)+": ",self.VMs[i]['InputTasks'])
          print("Finished Tasks in VM "+str(i)+": ",self.VMs[i]['FinishedTasks'])
          print("Time of Finished Tasks in VM "+str(i)+": ",self.VMs[i]['Time_of_the_FinishedTasks'])
          print( "VM "+str(i)+": ",self.VMs[i])
  def render(self):
    frame_idx=self.frame_idx
    scores=self.scores
    losses: self.losses
    epsilons: self.epsilons
    """Plot the training progresses."""
    clear_output(True)
    plt.close()
    plt.figure(figsize=(12, 3))
    plt.subplot(121)
    plt.title('frame %s. score: %s' % (frame_idx, np.mean(scores[-10:])))
    plt.plot(scores)
    plt.subplot(122)
    plt.title('loss')
    plt.plot(losses)
    plt.show()

In [15]:
nVM=3
ProcessorNumber=[8,8,12,12,16,16]
ProcessTime=[30,30,20,20,10,10]
QueueLength=[100,100,100,100,100,100]
LB=[60,60,60,60,60,60]
UB=QueueLength
lower_bound=np.array(LB[0:nVM],dtype=np.float32,)
upper_bound=np.array(UB[0:nVM],dtype=np.float32,)
env=LoadBalancing(nVM,ProcessTime,QueueLength,ProcessorNumber,lower_bound,upper_bound)
env.print_mode=0
pm=env.print_mode

In [20]:

EPISODES = 101
max_steps=1000
LEARNING_RATE = 1e-4
LEARNING_RATE_DECAY = 0.99
EXPLORATION_DECAY = 0.95
GAMMA = 0.975
UPDATE_TARGET_EVERY = 10

BATCH_SIZE = 128


agent = DQN(
    state_shape=env.observation_space.shape[0],
    action_size=env.action_space.n,
    batch_size=BATCH_SIZE,
    learning_rate_max=LEARNING_RATE,
    learning_rate_decay=LEARNING_RATE_DECAY,
    exploration_decay=EXPLORATION_DECAY,
    gamma=GAMMA
)
agent.save(f'models/-1.h5')

state = env.reset()
state = np.expand_dims(state, axis=0)

most_recent_losses = deque(maxlen=BATCH_SIZE)

log = []
done=0

# fill up memory before training starts
while agent.memory.length() < BATCH_SIZE:
    action = agent.act(state)
    next_state, reward,VMs = env.step(action)

    next_state = np.expand_dims(next_state, axis=0)
    agent.remember(state, action, reward, next_state, done)
    state = next_state
scores = []
score = 0
done=0
inputtask=np.ndarray((EPISODES ,env.nVM))
losses=[]
for e in range(0, EPISODES):
    state,VMs = env.reset()
    state = np.expand_dims(state, axis=0)
    done = False
    step = 0
    all_input_tasks=0
    env.VMs=VMs
    env.start_time= time.time()
    scores.append(np.mean(score))
    ma_loss = None
    score=0

    for step in range(0,max_steps):

      env.stp=step
      env.time=1000*(time.time()-env.start_time)
      env.time=np.int64(np.round(env.time))
      action = agent.act(state)
      next_state, reward,VMs = env.step(action)
      env.VMs=VMs
      next_state = np.expand_dims(next_state, axis=0)
      agent.remember(state, action, reward, next_state, done)

      state = next_state

      score += reward

      loss = agent.replay(episode=e)
      most_recent_losses.append(loss)
      ma_loss = np.array(most_recent_losses).mean()
      losses.append(most_recent_losses)
      env.print_output()
      env.e=e
    env.scores=scores
    env.losses=losses

    for i in range(0,env.nVM):
      inputtask[e,i]=len(env.VMs[i]['InputTasks'])
            # plotting
    if e % 10 == 0:
      env.render()
      print_mode=1
      env.print_output()
      env.print_mode=0
      env.print_mode=pm


    log.append([e, step, score, agent.learning_rate, agent.exploration_rate, ma_loss])

    agent.save(f'models/{e}.h5')

TypeError: ignored

In [None]:
model_path = "models\5.h5"
agent = DQN(
    state_shape=env.observation_space.shape[0],
    action_size=env.action_space.n,
)
agent.load(model_path)

state = env.reset()
state = np.expand_dims(state, axis=0)

import pygame
pygame.init()
screen = pygame.display.set_mode((env.WINDOW_WIDTH, env.WINDOW_HEIGHT))
clock = pygame.time.Clock()
running = True
score = 0

while running:
    pygame.display.set_caption(f"Score: {score}")
    for event in pygame.event.get():
        if event.type == pygame.QUIT:
            running = False

    action = agent.act(state, 0)
    state, reward, done, score = env.step(action)
    state = np.expand_dims(state, axis=0)

    env.render(screen)
    pygame.display.flip()
    clock.tick(30)

pygame.quit()

In [22]:
state_shape

NameError: ignored