In [None]:
# import environment building stuff from gym
import gym 
from gym import Env
from gym.spaces import Discrete, Box, Dict, Tuple, MultiBinary, MultiDiscrete 

# import additional helping libraries
import numpy as np
import pandas as pd
import random
import os
from random import randint

# import stable baelines RL algorithms
from stable_baselines3 import DQN
from stable_baselines3.common.vec_env import VecFrameStack
from stable_baselines3.common.evaluation import evaluate_policy

In [None]:
# Class for Job
#-------------------------------------------------------------------------------
class Job:
    def __init__(self, job_id, time_proc, time_rel, time_due, job_core):
        self.job_id = job_id
        self.time_proc = time_proc
        self.time_rel = time_rel
        self.time_due = time_due
        self.job_core = job_core
    
    def func_print(self):
        print(self.job_id)

# Class for Machines
#--------------------------------------------------------------------------------

class Machine:
    def __init__(self, machine_id):
        self.machine_id = machine_id
        self.accepted_jobs = 0
        self.current_load = 0
        self.remaining_load = 0

        self.keys = ['job_id','time_rel','time_proc','time_due','time_start','time_comp']
        self.job_on_machine = dict((k, []) for k in self.keys)
    
    def data_entry(self,job_id,time_rel,time_proc,time_due,time_start,time_comp):
        self.job_on_machine['job_id'].append(job_id)
        self.job_on_machine['time_rel'].append(time_rel)
        self.job_on_machine['time_proc'].append(time_proc)
        self.job_on_machine['time_due'].append(time_due)
        self.job_on_machine['time_start'].append(time_start)
        self.job_on_machine['time_comp'].append(time_comp)

In [None]:
# function to create machine list
def initialize_machines(no_of_machines):
    list_machines = []
    for m in range (0, no_of_machines):
        machine_id = "M" + str(m+1)  # Assigning unique id to each machine
        list_machines.append(Machine(machine_id))
    return list_machines

In [None]:
# Function for reading jobs from the text file
#---------------------------------------------------------------------------------

def read_jobs(file_name,no_of_jobs):
    
    file_name = os.path.join("Training_Data",file_name)
    
    with open(file_name) as read_file:
        file_data = read_file.readlines()
    read_file.close()

    jobs = []
    if no_of_jobs == ':':
        for line in file_data[:]:
            job_properties = line.split(";")
            job_id = job_properties[0]
            time_proc = int(job_properties[1])
            time_rel = int(job_properties[2])
            time_due = int(job_properties[3])
            job_core = int(job_properties[4])
            j = Job(job_id, time_proc, time_rel, time_due, job_core)
            jobs.append(j)
    else:
        for line in file_data[:no_of_jobs]:
            job_properties = line.split(";")
            job_id = job_properties[0]
            time_proc = int(job_properties[1])
            time_rel = int(job_properties[2])
            time_due = int(job_properties[3])
            job_core = int(job_properties[4])
            j = Job(job_id, time_proc, time_rel, time_due, job_core)
            jobs.append(j)

    return jobs

In [None]:
# Function to calculate remaining load on the machines at present time
def update_all_machines(list_machines,job):
    for j in range(0,len(list_machines)):
        remaining_load =   list_machines[j].current_load - job.time_rel
        remaining_load  =  remaining_load if remaining_load >=0 else 0
        list_machines[j].remaining_load = remaining_load
    
    list_machines.sort(key=lambda Machine: Machine.remaining_load,reverse=True)
    return list_machines

In [None]:
# Function to assign jobs to machines
def assign_job_to_machine(action,list_machines, job):
    
    job_assigned = False
    if not action:
        return list_machines,job_assigned
    for j in range(0,len(list_machines)):                    
        # Checking for legality and assigning with Greedy Best Fit method
        if job.time_due >= job.time_proc + job.time_rel + list_machines[j].remaining_load:

            list_machines[j].current_load = job.time_proc +job.time_rel + list_machines[j].remaining_load    
            list_machines[j].accepted_jobs += job.time_proc
            list_machines[j].remaining_load += job.time_proc
            job_assigned = True

            break
    list_machines.sort(key=lambda Machine: Machine.remaining_load,reverse=True)
    return list_machines,job_assigned

In [None]:
# Function to retrieve the observation space (remaining load on the machines) 
def remaining_load_on_machines(list_machines):
    remaining_loads_list = []
    for j in range(0,len(list_machines)):
        remaining_loads_list.append(list_machines[j].remaining_load)
    return remaining_loads_list

In [None]:
def binarySearchMean(list_machines, l, r, x):
  
    # Check base case
    if r >= l:
  
        mid = l + (r - l) // 2
            
        # If element is present at the middle itself
        if list_machines[mid].remaining_load < x and list_machines[mid-1].remaining_load>= x:
            return mid

        # If element is smaller than mid, then it
        # can only be present in left subarray
        elif list_machines[mid].remaining_load < x:
            return binarySearchMean(list_machines, l, mid-1, x)

        # Else the element can only be present
        # in right subarray
        else:
            return binarySearchMean(list_machines, mid + 1, r, x)

            
    else:
        # Element is not present in the array
        return -1

In [None]:
def leap_calc(list_machines,l,r,remaining_load_prime):
        # Check base case
    if r >= l:
  
        mid = l + (r - l) // 2
  
        # If element is present at the middle itself
        if list_machines[mid].remaining_load < remaining_load_prime and list_machines[mid-1].remaining_load> remaining_load_prime:
            return mid
  
        # If element is smaller than mid, then it
        # can only be present in left subarray
        elif list_machines[mid].remaining_load < remaining_load_prime:
            return leap_calc(list_machines, l, mid-1, remaining_load_prime)
  
        # Else the element can only be present
        # in right subarray
        else:
            return leap_calc(list_machines, mid + 1, r, remaining_load_prime)
  
    else:
        # Element is not present in the array
        return -1 
        

In [None]:
def profitability_check(list_machines,job):
    
    # Find the mean of the remaining load on the machines
    mean_load = sum([j.remaining_load for j in list_machines])/len(list_machines)
    pos_mean = binarySearchMean(list_machines,0,len(list_machines)-1,mean_load)
    threshold = len(list_machines) - pos_mean
    leap_pos = leap_calc(list_machines,0,len(list_machines)-2,list_machines[-1].remaining_load + job.time_proc)
    
    if leap_pos <= threshold:
        profitable = True
    else :
        profitable = False
    
    return profitable

In [None]:
def box_plot_data(list_machines):
    data = remaining_load_on_machines(list_machines)
    median = np.median(data)
    upper_quartile = np.percentile(data, 75)
    lower_quartile = np.percentile(data, 25)
    max_value = data[0]
    min_value = data[-1]
    
    return [max_value, upper_quartile, median, lower_quartile, min_value]
    


In [None]:
# Custom environment for scheduling process
class ProcessEnv(Env):
    
    """
        ### Methodology
        -----------------------------------------
        For every incoming job the profitability is checked and then the job is passed to the assign_job_to_machine()
        function which returns job_assigned(True/False).
        The rewards for learning are given as specified under Rewards section.
        The learning algorithm tries to maximize profitable actions.
        
        ### Action Space
        -----------------------------------------
        The agent take a 1-element vector for actions.
        The action space is `(action)` in `[0, 1]`, where `action` is used to accept or reject
        the incoming job:
        
        | Num | Action                 |
        |-----|------------------------|
        | 0   | Reject the job         |
        | 1   | Accept the job         |
        
        ### Observation Space
        -----------------------------------------
        The observation space includes the remaining load summary(box plot 5 point summary) on the machines and the incoming
        job profile(release date, processing time , due date).
        
        The observation is a `ndarray` with shape `(8,)` where the elements correspond to the following:
        
        | Num | Observation           | Min                  | Max                |
        |-----|-----------------------|----------------------|--------------------|
        | 0   | max remaining load    | -Inf                 | Inf                |
        | 1   | upper_quartile        | -Inf                 | Inf                |
        | 2   | median                | -Inf                 | Inf                |
        | 3   | lower_quartile        | -Inf                 | Inf                |
        | 4   | min remaining load    | -Inf                 | Inf                |
        | 5   | processing time       | -Inf                 | Inf                |
        | 6   | release time          | -Inf                 | Inf                |
        | 7   | due time              | -Inf                 | Inf                |
        
        ### Rewards
        -----------------------------------------
        The reward scheme works as stated below
        
        | job_assigned | profitable | reward |
        |--------------|------------|--------|
        | True         | True       | +1     |
        | True         | False      | -1     |
        | False        | True       | -1     |
        | False        | False      | +1     |
        
    """
    
    def __init__(self):
        
        self.action_space = Discrete(2)
        
        # machine data
        self.no_of_machines = randint(10, 200)
        
        # Observation space
        self.observation_space = Box(low=-np.inf, high=np.inf,shape=(5+3,), dtype=np.float32)
        
        # Set no of process
        self.no_of_jobs = ':'      
        
        # Set no of machines
        self.list_machines = initialize_machines(self.no_of_machines)
       
        # Set start state of machines
        self.state = 0 
        self.file_no = 1
        self.reward = 0
        self.iter_value = 0
        self.prev_reward = 0
        
        self.total_proc_time = 0
        self.accepted_jobs = 0
        
        # Read Data from files
        self.files_list = []
        for dirName, subDirList, fileList in os.walk('Training_Data'):
            for file in fileList: 
                self.files_list.append(file)
                
        self.jobs = read_jobs(self.files_list[0],self.no_of_jobs)
        self.total_file_count = len(self.files_list)
        self.current_file_count = 0
        
    def step(self, action):
        done = False
        info = { }

    
        if self.iter_value < len(self.jobs): 
            
            # get the next job from the jobs list
            self.job = self.jobs[self.iter_value]
            # update the current remaining load on all the machines
            self.list_machines = update_all_machines(self.list_machines,self.job)
            # check the profitability of accepting this job
            profitable = profitability_check(self.list_machines,self.job)
            # take the action specified by the learning model 
            self.list_machines,job_assigned = assign_job_to_machine(action,self.list_machines, self.job)     
            # update the observation space
            obs_list = box_plot_data(self.list_machines)
            obs_list.append(self.job.time_proc)
            obs_list.append(self.job.time_rel)
            obs_list.append(self.job.time_due)
            
            # assign rewards 
            if job_assigned and profitable :
                self.reward += 1
            elif job_assigned and not profitable:
                self.reward -= 1 
            elif not job_assigned and profitable:
                self.reward -= 1
            else:
                self.reward += 1         
            
            self.iter_value += 1
        else:
            
            if self.current_file_count < self.total_file_count-1:
                self.current_file_count += 1
            else:
                # Traveresed through entire dataset
                print("Looped through entire Data.....")
                self.current_file_count = 0  
            
            self.reward = self.reward - self.prev_reward
            self.prev_reward  = self.reward
            obs_list = [0]*(5+3)
            #print('Proc_time:{} Accepted_jobs:{}'.format(self.total_proc_time,self.accepted_jobs))
            #print('Reward :{} Prev_Reward:{}'.format(self.reward,self.prev_reward))
            done = True
        
        # Return step information
        return np.array(obs_list), self.reward, done,info

    def render(self):
        pass
    
    def reset(self):
        # Reset machines
        self.no_of_machines = randint(10, 200)
        
        
        # Set no of process
        self.no_of_jobs = ':'      
        
        # Set no of machines
        self.list_machines = initialize_machines(self.no_of_machines)
        
        # Set start state of machines
        self.state = 0 
        self.jobs = read_jobs(self.files_list[self.current_file_count],self.no_of_jobs)
        self.reward = 0
        self.iter_value = 0
        self.prev_reward = 0
        
        self.total_proc_time = 0
        self.accepted_jobs = 0
        
        return np.array([0]*(5+3)).astype(float)
    
    
env=ProcessEnv()
from stable_baselines3.common.env_checker import check_env
check_env(env, warn=True)

In [None]:
models_dir = "models/DQN"
logdir = "logs"
if not os.path.exists(models_dir):
    os.makedirs(models_dir)
    
if not os.path.exists(logdir):
    os.makedirs(logdir)
TIMESTEPS = 1000000
iters = 0
model = DQN("MlpPolicy", env, verbose=1, tensorboard_log=logdir)
while True:
    iters += 1
    model.learn(total_timesteps=TIMESTEPS, reset_num_timesteps=False, tb_log_name="DQN")
    model.save(f"{models_dir}/{TIMESTEPS*iters}")
    
    
log_path = os.path.join('Training', 'logs')

In [None]:
model.save('DQN')