<a href="https://colab.research.google.com/github/ssooniunnie/reinforcement_scheduling/blob/main/%EA%B0%95%ED%99%94%ED%95%99%EC%8A%B5.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
!pip install tensorflow==2.3.0
!pip install gym
!pip install keras
!pip install keras-rl2

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [3]:
import pandas as pd

import random
import numpy as np


from gym import Env
from gym.spaces import Discrete, Box

import ast

from gym.vector.utils import spaces


In [4]:
df = pd.read_csv('/content/drive/MyDrive/Colab/강화학습/data.csv')

In [5]:
MINIMUM_DAY_OFFS = 9


class Pilot:
    def __init__(self, pilot_id):
        self.result = 0
        self.plans = []
        self.schedule_map = [0] * 30
        self.pilot_id = pilot_id
  

    def get_pilot_id(self):
        return self.pilot_id

    def get_plans(self):
        return self.plans

    def get_schedule_map(self):
        return self.schedule_map

    def apply_plan(self, plan):
        if self.is_applicable(plan):
            self.plans.append(plan)
            self.update_schedule_map(plan)
        else:
            return False
        return True

    def is_applicable(self, plan):
        # 정책 체크
        # 1. 2가 생기면 안됨
        if self.is_overlapping(plan):
            return False
        # 2. 7일에 한번 0이 있어야 한다.
        if self.is_overwork_in_week(plan):
            return False

        # 3. 한 달에 9번 쉬어야 한다.
        if self.is_overwork_in_month(plan):
            return False

        # 모든정첵 통과
        return True

    def is_overlapping(self, plan):
        days_encoding = plan.get_days_encoding()
        for index in range(0, len(days_encoding)):
            if (days_encoding[index] + self.schedule_map[index]) > 1:
                return True
        return False

    def is_overwork_in_week(self, plan):
        start_day_in_window = 0
        while (start_day_in_window + 6) <= (len(self.schedule_map) - 1):
            total_working_days = 0
            for offset in range(0, 7):
                current_cursor = start_day_in_window + offset
                total_working_days += (self.schedule_map[current_cursor] + plan.get_days_encoding()[current_cursor])
            if total_working_days > 6:
                return True
            start_day_in_window += 1
        return False

    def is_overwork_in_month(self, plan):
        total_working_days_in_month = 0
        for i in range(0, len(self.schedule_map)):
            total_working_days_in_month += (self.schedule_map[i] + plan.get_days_encoding()[i])

        num_day_offs = len(self.schedule_map) - total_working_days_in_month
        if num_day_offs < MINIMUM_DAY_OFFS:
            return True
        return False

    def update_schedule_map(self, plan):
        for i in range(0, len(self.schedule_map)):
            self.schedule_map[i] += plan.get_days_encoding()[i]

    def print_schedule_map(self):
        print("current scheduel map : ", self.schedule_map)

    def print_plan_list(self):
        print("current plan list : ")
        for plan in self.plans:
            print("    plan id : ", plan.get_plan_id())
            print("    plan days_encoding : ", plan.get_days_encoding())

In [6]:
import random


class Scheduler:
    def __init__(self, pilots, plans):
        self.pilots = pilots
        self.plans = plans
        self.num_plans = len(self.plans)
        self.num_pilots = len(self.pilots)
        self.next_plan = self.get_next_random_plan()

    def get_next_plan(self):
        return self.next_plan

    def get_pilots(self):
        return self.pilots

    def do_next_deploy(self, pilot_id):
        pilot = self.pilots[pilot_id]
        result = pilot.apply_plan(self.next_plan)
        if not result:
            return
        self.next_plan = self.get_next_random_plan()
        return True

    def do_schedule(self):
        next_plan = self.get_next_random_plan()
        while next_plan is not None:
            #########
            pilot = self.get_next_random_pilot()
            result = pilot.apply_plan(next_plan)
            if not result:
                return False
            #########
            next_plan = self.get_next_random_plan()
        return True

    def get_next_random_pilot(self):
        pilot_index = random.randint(0, self.num_pilots - 1)
        return self.pilots[pilot_index]

    def get_next_random_plan(self):
        retry_count = 0
        while retry_count < self.num_plans:
            candidate_index = random.randint(0, self.num_plans - 1)
            # candidate_index 검증
            if self.validate_candidate(candidate_index):
                self.plans[candidate_index].set_is_applied(True)
                return self.plans[candidate_index]
            retry_count += 1
        return None

    def validate_candidate(self, candidate_index):
        retrieved_plan = self.plans[candidate_index]
        if retrieved_plan.get_is_applied():
            return False
        return True

    def print_all_plans_info(self):
        pass

    def print_all_pilots_info(self):
        for pilot in self.pilots:
            pilot_id = pilot.get_pilot_id()
            plans = pilot.get_plans()
            print("pilot id : ", pilot_id)
            for plan in plans:
                print("    plan id : ", plan.get_plan_id())
                print("    plan encoding : ", plan.get_days_encoding())


In [7]:
class Plan:
    def __init__(self, plan_id, days_encoding, reward):
        self.plan_id = plan_id
        self.days_encoding = days_encoding
        self.is_applied = False
        self.reward = reward

    def get_plan_id(self):
        return self.plan_id

    def get_days_encoding(self):
        return self.days_encoding

    def get_is_applied(self):
        return self.is_applied

    def set_is_applied(self, is_applied):
        self.is_applied = is_applied


In [8]:
def create_new_scheduler():
    # 파일에서 데이터 읽어오기
    df = pd.read_csv('/content/drive/MyDrive/Colab/강화학습/data.csv')
    # 읽은 파일로 Pilot 리스랑 Plan 트리스트 만들기
    pilot_list = []
    for i in range(0, 20):
        pilot_list.append(Pilot(i))

    plan_list = []

    plan_id = df.loc[0, 'id']
    days_encoding = ast.literal_eval(df.loc[0, 'BLK'])
    reward = df.loc[0, 'R']

    for i in range(0, len(df)):
        plan_id = df.loc[i, 'id']
        days_encoding = ast.literal_eval(df.loc[i, 'BLK'])
        reward = df.loc[i, 'R']
        plan_list.append(Plan(plan_id, days_encoding, reward))

    # Pilot이랑 Plan을 생성자로 전달해서 Scheduler 생성
    return Scheduler(pilot_list, plan_list)


class SchedulingEnvironment(Env):
    def __init__(self):
        self.scheduler = create_new_scheduler()
        self.action_space = Discrete(len(self.scheduler.get_pilots()))
        self.observation_space = spaces.Box(low= 0,high =1,
                                            shape=(21, 30), dtype=int)
        self.state = self.get_observation_data()
        

    def step(self, action):
        result = self.scheduler.do_next_deploy(action)

        if result:
            reward = 1
        else:
            reward = -100

        next_plan = self.scheduler.get_next_plan()

        if next_plan is None:
            done = True
        else:
            done = False

        if result is None:
            done = True

        info = {}

        self.state = self.get_observation_data()

        return self.state, reward, done, info

    def render(self):
        pass

    def reset(self):
        self.scheduler = create_new_scheduler()
        self.state = self.get_observation_data()
        return self.state

    def get_observation_data(self):
        result = np.empty((0,30), int)
        pilots = self.scheduler.get_pilots()
        for pilot in pilots:
          schedule = pilot.get_schedule_map()
          result = np.append(result, np.array([schedule]), axis=0)

        current_plan = self.scheduler.get_next_plan()
        days_encoding = current_plan.get_days_encoding()

        result = np.append(result, np.array([days_encoding]), axis=0)
        return result



In [9]:

def test_episode():
    env = SchedulingEnvironment()
    episode = 10
    for episode in range(1, episode + 1):
        state = env.reset()
        done = False
        score = 0
        while not done:
            env.render()
            action = random.randint(0, 19)
            n_state, reward, done, info = env.step(action)
            score += reward
        print('Episode:{} Score:{}'.format(episode, score))



In [10]:
test_episode()

Episode:1 Score:-86
Episode:2 Score:-88
Episode:3 Score:-90
Episode:4 Score:-92
Episode:5 Score:-88
Episode:6 Score:-85
Episode:7 Score:-69
Episode:8 Score:-83
Episode:9 Score:-65
Episode:10 Score:-93


In [11]:
env = SchedulingEnvironment()
states = env.observation_space.shape
actions = env.action_space.n


In [12]:
 obs_shape = env.observation_space.shape
 print(obs_shape)

(21, 30)


In [13]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.optimizers import Adam


  class IteratorBase(collections.Iterator, trackable.Trackable,
  class DatasetV2(collections.Iterable, tracking_base.Trackable,


In [19]:
#'Sequential' object has no attribute '_compile_time_distribution_strategy' 나면 model 지워주는거 필요
#del model

In [20]:
#'Sequential' object has no attribute '_compile_time_distribution_strategy' 나면 위에서 del model하고 다시 실행할 것
model = Sequential()
model.add(Flatten(input_shape=(1,) + env.observation_space.shape))
model.add(Dense(24, activation='relu'))
model.add(Dense(24, activation='relu'))
model.add(Dense(actions, activation='linear'))

model.compile(loss='mean_squared_error', optimizer='rmsprop', metrics=['accuracy'])

model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten_1 (Flatten)          (None, 630)               0         
_________________________________________________________________
dense_4 (Dense)              (None, 24)                15144     
_________________________________________________________________
dense_5 (Dense)              (None, 24)                600       
_________________________________________________________________
dense_6 (Dense)              (None, 24)                600       
_________________________________________________________________
dense_7 (Dense)              (None, 20)                500       
Total params: 16,844
Trainable params: 16,844
Non-trainable params: 0
_________________________________________________________________


  if isinstance(sample_weight_mode, collections.Mapping):


In [16]:

from rl.agents import DQNAgent
from rl.policy import BoltzmannQPolicy
from rl.memory import SequentialMemory


In [21]:
def build_agent(model, actions):
    policy = BoltzmannQPolicy()
    memory = SequentialMemory(limit=50000, window_length=1)
    dqn = DQNAgent(model=model, memory=memory, policy=policy, 
                  nb_actions=actions, nb_steps_warmup=5, target_model_update=1e-2)
    return dqn


In [22]:
#'Sequential' object has no attribute '_compile_time_distribution_strategy' 에러 날 경우 del model 실행 후
# model 부터 다시하면 됨

dqn = build_agent(model, actions)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])
dqn.fit(env, nb_steps=100000, visualize=False, verbose=1)

Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.


Training for 100000 steps ...
Interval 1 (0 steps performed)
    1/10000 [..............................] - ETA: 9:05 - reward: 1.0000

  batch_idxs = np.random.random_integers(low, high - 1, size=size)


   11/10000 [..............................] - ETA: 13:40 - reward: 1.0000

  batch_idxs = np.random.random_integers(low, high - 1, size=size)
  batch_idxs = np.random.random_integers(low, high - 1, size=size)
  batch_idxs = np.random.random_integers(low, high - 1, size=size)
  batch_idxs = np.random.random_integers(low, high - 1, size=size)
  batch_idxs = np.random.random_integers(low, high - 1, size=size)
  batch_idxs = np.random.random_integers(low, high - 1, size=size)


   16/10000 [..............................] - ETA: 11:55 - reward: -5.3125

  batch_idxs = np.random.random_integers(low, high - 1, size=size)
  batch_idxs = np.random.random_integers(low, high - 1, size=size)
  batch_idxs = np.random.random_integers(low, high - 1, size=size)
  batch_idxs = np.random.random_integers(low, high - 1, size=size)
  batch_idxs = np.random.random_integers(low, high - 1, size=size)


   23/10000 [..............................] - ETA: 9:50 - reward: -3.3913 

  batch_idxs = np.random.random_integers(low, high - 1, size=size)
  batch_idxs = np.random.random_integers(low, high - 1, size=size)
  batch_idxs = np.random.random_integers(low, high - 1, size=size)
  batch_idxs = np.random.random_integers(low, high - 1, size=size)
  batch_idxs = np.random.random_integers(low, high - 1, size=size)
  batch_idxs = np.random.random_integers(low, high - 1, size=size)
  batch_idxs = np.random.random_integers(low, high - 1, size=size)


   30/10000 [..............................] - ETA: 8:38 - reward: -2.3667

  batch_idxs = np.random.random_integers(low, high - 1, size=size)
  batch_idxs = np.random.random_integers(low, high - 1, size=size)
  batch_idxs = np.random.random_integers(low, high - 1, size=size)
  batch_idxs = np.random.random_integers(low, high - 1, size=size)
  batch_idxs = np.random.random_integers(low, high - 1, size=size)
  batch_idxs = np.random.random_integers(low, high - 1, size=size)
  batch_idxs = np.random.random_integers(low, high - 1, size=size)


   36/10000 [..............................] - ETA: 8:15 - reward: -1.8056

  batch_idxs = np.random.random_integers(low, high - 1, size=size)


610 episodes - episode_reward: -84.634 [-99.000, -50.000] - loss: 2178613.616 - mae: 2651.217 - mean_q: 3306.118

Interval 2 (10000 steps performed)
457 episodes - episode_reward: -79.112 [-99.000, -38.000] - loss: 778372864.000 - mae: 78566.398 - mean_q: 91942.078

Interval 3 (20000 steps performed)
463 episodes - episode_reward: -79.382 [-99.000, -40.000] - loss: 10372251648.000 - mae: 301640.156 - mean_q: 344388.000

Interval 4 (30000 steps performed)
488 episodes - episode_reward: -80.527 [-99.000, -51.000] - loss: 70505734144.000 - mae: 529807.125 - mean_q: 604381.438

Interval 5 (40000 steps performed)
683 episodes - episode_reward: -86.341 [-99.000, -34.000] - loss: 286486986752.000 - mae: 722470.562 - mean_q: 811376.000

Interval 6 (50000 steps performed)
940 episodes - episode_reward: -90.361 [-99.000, -64.000] - loss: 322103803904.000 - mae: 495284.688 - mean_q: 547188.688

Interval 7 (60000 steps performed)
1465 episodes - episode_reward: -94.181 [-99.000, -73.000] - loss: 5

<tensorflow.python.keras.callbacks.History at 0x7f05085657c0>