In [None]:
! pip install gym
! pip install pandas
! pip install stable-baselines3
! pip install tensorflow
! pip install pytorch
! pip install sb3-contrib
! pip install numpy
! pip install optuna

In [None]:
!pip install importlib-metadata==4.13.0

In [12]:
import random
import gym
import numpy as np
from gym import spaces
import pandas as pd
import logging

class operationPlan(gym.Env):

    def __init__(self,amount_of_operations):
        """
        # Initialisierung der Attribute
        """
        self.total_reward = 0
        self.count_fail = 0
        self.already_added_ops = []
        self.number_of_cancellations = 0
        self.last_op = 400
        self.assigned_ops = 0
        self.start_of_ops = {}
        self.action_dict = {}
        self.n_res = 0
        self.step_penalty = 0
        self.actual_day = 0
        self.amount_of_operations = amount_of_operations
        self.operationList = pd.DataFrame(np.zeros((amount_of_operations+20,487)))
        self.block_plan = pd.DataFrame(np.full((32,487),fill_value=0))
        self.or_blocks = {
            0:{"category":1,"day":1},
            1:{"category":2,"day":1},
            2:{"category":3,"day":1},
            3:{"category":4,"day":1},
            4:{"category":5,"day":1},
            5:{"category":2,"day":1},
            6:{"category":5,"day":1},
            7:{"category":1,"day":2},
            8:{"category":3,"day":2},
            9:{"category":3,"day":2},
            10:{"category":4,"day":2},
            11:{"category":4,"day":2},
            12:{"category":5,"day":2},
            13:{"category":1,"day":3},
            14:{"category":1,"day":3},
            15:{"category":2,"day":3},
            16:{"category":6,"day":3},
            17:{"category":4,"day":3},
            18:{"category":4,"day":3},
            19:{"category":5,"day":3},
            20:{"category":3,"day":3},
            21:{"category":1,"day":4},
            22:{"category":3,"day":4},
            23:{"category":4,"day":4},
            24:{"category":4,"day":4},
            25:{"category":5,"day":4},
            26:{"category":1,"day":5},
            27:{"category":2,"day":5},
            28:{"category":3,"day":5},
            29:{"category":4,"day":5},
            30:{"category":5,"day":5},
            31:{"category":2,"day":5}
        }
        # Observation Space für den Agenten bestehend aus Operationen und Operationsräumen
        self.observation_space = spaces.Dict({
            "operations":spaces.Box(shape=(self.amount_of_operations+20,11),dtype=np.int64, low=-1,high=400),
            "blocks":spaces.Box(shape=(32,487),dtype=np.int64,low=-1,high=400)
        })
        # Action Space mit nachfolgenden Attributen
        # operation, block, startTime, nextDay
        self.action_space = spaces.MultiDiscrete([self.amount_of_operations+20,32,480,2],dtype=np.int32)
        self.createOperations(self.amount_of_operations)
        self.createBlocks()
    def reset(self):
        """
        reset wird aufgerufen wennd der Agent die Aufgabe beginnt/abschließt
        """
        print("reward total:  ",self.total_reward)
        print("fail:",self.count_fail)
        self.total_reward = 0
        self.count_fail = 0
        self.already_added_ops = []
        self.last_op = 400
        self.step_penalty = 0
        self.start_of_ops = {}
        self.assigned_ops = 0
        self.actual_day = 0
        self.number_of_cancellations = 0
        self.operationList = pd.DataFrame(np.zeros((self.amount_of_operations+20,11)))
        self.block_plan = pd.DataFrame(np.full((32,487),fill_value=0))
        self.createOperations(self.amount_of_operations)
        self.createBlocks()
        print("complete time", self.operationList.loc[:,["duration"]]["duration"].sum() )
        return {"operations":self.operationList.to_numpy(),
                "blocks":self.block_plan.to_numpy()}
    def createOperations(self,amount):
        """
            Erzeugt ein Numpy Array das als Observation Space benutzt werden kann
            Die OP Kategorien sind(1:GASTRO;2:CARD;3:ORTH;4:GYN;5:URO;6:MED)
        """
        try:
            self.operationList = pd.DataFrame(np.full((amount+20,11),fill_value=-1))
            columns = ["exist","assigned","op_category_1","op_category_2","op_category_3","op_category_4","op_category_5","op_category_6","duration","is_emergency","day"]
            self.operationList.columns = columns
            self.operationList.loc[:,["exist"]] = 0
            self.operationList.loc[:amount,["op_category_1","op_category_2","op_category_3","op_category_4","op_category_5","op_category_6"]] = 0
            op_informations = {
                1:{"mean":132,"std":76,"ratio":0.18},
                2:{"mean":99,"std":53,"ratio":0.14},
                3:{"mean":142,"std":58,"ratio":0.17},
                4:{"mean":78,"std":52,"ratio":0.28},
                5:{"mean":72,"std":38,"ratio":0.18},
                6:{"mean":75,"std":72,"ratio":0.05}
            }
            last_element = 0 
            for op in range(1,7):
                number_of_ops = round(amount*op_informations[op]["ratio"])
                length_of_Op = np.random.normal(op_informations[op]["mean"],op_informations[op]["std"],number_of_ops)
                for row in range(last_element,last_element + number_of_ops):
                    if row > amount:
                        continue
                    self.operationList.loc[row,[str("op_category_"+str(op)),"exist","duration","is_emergency","assigned","day"]] = [1,1,abs(round(length_of_Op[row-last_element]))+1,0,0,0]
                last_element += number_of_ops
            return self.operationList.to_numpy()
        except:
            logging.exception("ex")

    def createBlocks(self):
        """
        erzeugt die Operationsräume und die notwendige Struktur für den Agenten
        """
        self.block_plan = pd.DataFrame(np.full((32,487),fill_value=-1))
        columns = ["day","op_category_1","op_category_2","op_category_3","op_category_4","op_category_5","op_category_6"]
        columns = columns + ["timestamp_"+str(i) for i in range(0,480)]
        self.block_plan.columns = columns
        self.block_plan.loc[:,["op_category_1","op_category_2","op_category_3","op_category_4","op_category_5","op_category_6"]] = 0
        for _ in range(32):
            self.block_plan.loc[_,["day"]] = self.or_blocks[_]["day"]
            self.block_plan.loc[_,["op_category_"+str(self.or_blocks[_]["category"])]] = 1

    def setoperation(self,operation,block,start_time):
        """
        # Wird aufgerufen wenn der Agent eine Zuweisung tätigt und seine Tagesplanung nicht beendet
        """
        added_and_delted_ops_duration = 0
        # entweder Tag null oder es dürfen nur noch Notfälle hinzugefügt werden
        if self.actual_day == 0 or (self.operationList.loc[operation]["is_emergency"] ==1 and self.operationList.loc[operation]["day"] ==self.actual_day):
            #nur existierende Einträge
            if self.operationList.loc[operation]["exist"] ==1:
                duration = self.operationList.loc[operation]["duration"].astype(int)
                actual_one = self.operationList.loc[operation,[(self.operationList.loc[operation,col] ==1).all() for col in self.operationList.columns]].index
                category = [_ for _ in actual_one  if "op" in _ ]
                #Kategorie muss passen oder wenn notfall gleicher Tag
                if (str(self.or_blocks[block]["category"]) == category[0].split("y_")[1] and self.operationList.loc[operation]["is_emergency"] ==0) or (self.operationList.loc[operation]["is_emergency"] ==1 and self.or_blocks[block]["day"] == self.operationList.loc[operation]["day"]):
                    #nur zuweisung wenn länge nicht zu überstunden führen würde
                    if (480 - start_time - duration)>=0:
                        intersection = np.unique(self.block_plan.iloc[block,start_time+7:duration+8].to_numpy())
                        for _ in intersection:
                            if _ != -1:
                                # entfernen aller kollisionen
                                self.start_of_ops[_] = 9999999
                                added_and_delted_ops_duration +=  self.operationList.loc[_]["duration"].astype(int)
                                self.operationList.loc[_]["assigned"] = 0
                                columns = self.block_plan.loc[block,[(self.block_plan.loc[block,col] == _ ).all() for col in self.block_plan.columns]].index
                                columns = [_ for _ in columns  if "timestamp" in _ ]
                                self.block_plan.loc[block,columns] = -1
                        if self.operationList.loc[operation]["assigned"] == 1:
                            # falls op selber schon hinzugefügt war muss sie am alten Platz gelöscht werden
                            added_and_delted_ops_duration +=  self.operationList.loc[operation]["duration"].astype(int)
                            for b in range(0,32):
                                columns = self.block_plan.loc[b,[(self.block_plan.loc[b,col] == operation ).all() for col in self.block_plan.columns]].index
                                columns = [_ for _ in columns  if "timestamp" in _ ]
                                if columns != []: 
                                    self.block_plan.loc[b,columns] = -1
                                    break
                        self.start_of_ops[operation] = start_time + ( self.block_plan.loc[block]["day"].astype(int)-1) * 480
                        added_and_delted_ops_duration +=  self.operationList.loc[operation]["duration"].astype(int)
                        self.block_plan.iloc[block,start_time+7:duration+start_time+8] = operation
                        self.operationList.loc[operation,["assigned"]] = 1
                        if self.actual_day == 0:
                          reward = (self.operationList.loc[operation]["duration"].astype(int) / added_and_delted_ops_duration) * 0 if self.last_op == operation else 7
                        else:
                          reward = (self.operationList.loc[operation]["duration"].astype(int) / added_and_delted_ops_duration) * 0 if self.last_op == operation else 5
                        if self.step_penalty % 1000 == 0:
                            print("step Reward",reward) 
                        self.last_op = operation
                        if operation in self.already_added_ops:
                            reward  = 0.2
                        self.already_added_ops.append(operation)
                        return reward * 1
                    else:
                        self.count_fail +=1
                        return -0.02
                else:
                    self.count_fail +=1
                    return -0.02
            else:
                self.count_fail +=1
                return -0.02
        else:
            self.count_fail +=1
            return -0.02

 
    def go_to_next_day(self):
      """
      Wird aufgerufen wenn der Tag beendet wird
      """
        try:
            if self.actual_day == 0:
                print("fail0:",self.count_fail)
                print("step0:",self.step_penalty)
                self.assigned_ops = self.operationList[(self.operationList["assigned"]==1)]["assigned"].sum()
            self.number_of_cancellations = self.assigned_ops - self.operationList[(self.operationList["assigned"]==1) & (self.operationList["is_emergency"]==0)]["assigned"].sum()
            reward = 0
            if self.actual_day < 5:
                self.create_emergency_tasks()
            assignments=(self.operationList[(self.operationList["assigned"]==1) & (self.operationList["is_emergency"]==0)]["assigned"].sum())
            assignments_reward = np.sum([pow(x,0.75) for x in range(0,assignments+1)])
            reward = assignments_reward+((self.operationList[(self.operationList["assigned"]==1)]["duration"].sum() / self.operationList[(self.operationList["exist"]==1)]["duration"].sum()) * 100) 
            if self.actual_day >0:
                reward = 5 -( self.number_of_cancellations * 1) - ((min(self.start_of_ops.values())/(2400)) * 10)
            self.actual_day +=1
            return reward
        except:
            logging.exception("ex")

    def create_emergency_tasks(self):
      """
      erzeugen und hinzufügen von Notfällen
      """
        try:
            number_of_emergencies = random.randint(1, 3)
            number_of_ops  = self.operationList["exist"].sum().astype(int)
            print(number_of_ops)
            for _ in range(number_of_ops,number_of_ops+number_of_emergencies):
                self.operationList.loc[_, ["exist","assigned","op_category_1","op_category_2","op_category_3","op_category_4","op_category_5","op_category_6","duration","is_emergency","day"]] = [1,0,1,1,1,1,1,1,random.randint(60,180),1,self.actual_day+1]
        except:
            logging.exception("ex")

    def step(self, action):
        done =False
        reward = 0
        self.step_penalty += 1
        if self.step_penalty % 1000 == 0:
            print(action)
        operation = action[0].astype(int)
        block = action[1].astype(int)
        start_time = action[2].astype(int)
        next_day = action[3].astype(int)      
        if ( next_day == 1) and self.actual_day != 0 and self.operationList[(self.operationList["assigned"]==1) & (self.operationList["exist"]==1)& (self.operationList["is_emergency"]==1)]["is_emergency"].sum().astype(int) == self.operationList[(self.operationList["is_emergency"]==1)]["is_emergency"].sum().astype(int):
            reward = self.go_to_next_day()
            if self.actual_day ==6:
                done =True 
        elif (next_day == 1 ) and self.actual_day == 0 and ((self.operationList[(self.operationList["assigned"]==1) & (self.operationList["exist"]==1)]["assigned"].sum()>(32*8*60)*0.9)or(self.operationList[(self.operationList["assigned"]==1) & (self.operationList["exist"]==1)]["assigned"].sum()> (self.operationList[self.operationList["exist"]==1]["exist"].sum() * 0.70))):
            reward = self.go_to_next_day()
        elif next_day ==1:
            self.count_fail +=1
            reward = -1
        else:
            reward = self.setoperation(operation,block,start_time)
        if done:
            print("step", self.step_penalty)
            print("start",min(self.start_of_ops.values()))
            print("gesamte OP ZEit mit Emergencies", self.operationList[self.operationList["exist"]==1]["duration"].sum())
            print("belegte Zeit ", self.operationList[self.operationList["assigned"]==1]["duration"].sum()/(32*8*60))
            print("cancellations ",self.number_of_cancellations)
            print("anzahl hinzugefügter ops", self.operationList[(self.operationList["assigned"]==1) & (self.operationList["is_emergency"]==0)]["assigned"].sum())
            print("anzahl hinzugefügter emergencies", self.operationList[(self.operationList["assigned"]==1) & (self.operationList["is_emergency"]==1)]["assigned"].sum())
        self.total_reward += reward
        return {"operations":self.operationList.to_numpy(),
                "blocks":self.block_plan.to_numpy()},reward,done,{}

    def action_masks(self):
        illegal_action = self.operationList[self.operationList["exist"]==0].index.values.tolist()
        if self.actual_day == 0 and self.operationList[(self.operationList["assigned"]==1) & (self.operationList["exist"]==1)]["assigned"].sum()< (self.operationList[self.operationList["exist"]==1]["exist"].sum() * 0.70):
            illegal_action.append(673)
        if self.actual_day != 0 and self.operationList[(self.operationList["assigned"]==1) & (self.operationList["exist"]==1)& (self.operationList["is_emergency"]==1)]["is_emergency"].sum().astype(int) != self.operationList[(self.operationList["is_emergency"]==1)]["is_emergency"].sum().astype(int):
            illegal_action.append(673)
        if self.actual_day != 0:
            illegal_action = illegal_action + self.operationList[self.operationList["is_emergency"]==0].index.values.tolist()
        return [False if x in illegal_action else True for x in range(674)]

In [None]:
from sb3_contrib import MaskablePPO
from gym.wrappers import FlattenObservation
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3 import PPO

env = operationPlan(140)
params = {'n_steps': 1535, 'gamma': 0.9728850181649547, 'clip_range': 0.235, 'learning_rate': 0.00012, "ent_coef":0.0081}
model  = MaskablePPO("MultiInputPolicy",env, verbose=1, **params)
model.learn(100000)

In [None]:


from stable_baselines3.common.evaluation import evaluate_policy
import pandas as pd
import gym
import optuna
from sb3_contrib import MaskablePPO
from stable_baselines3.common.env_util import make_vec_env
import stable_baselines3.common.env_checker as envc

def optimize_ppo(trial):
    """Alle Parameter"""
    return {
        'n_steps': int(trial.suggest_loguniform('n_steps', 1200, 1700)),
        'gamma': trial.suggest_loguniform('gamma', 0.96, 0.98),
        'ent_coef': trial.suggest_loguniform('ent_coef', 0.000001, 0.0081),
        'clip_range': trial.suggest_loguniform('clip_range', 0.2, 0.245),
        'learning_rate': trial.suggest_loguniform('learning_rate',0.00005, 0.00012)
      }
def optimize_agent(trial):
    """ 
      Mehrere Versuche um zufällige Parameter zu vergleichen
    """
    model_params = optimize_ppo(trial)
    environment = make_vec_env(lambda: operationPlan(70), n_envs=1, seed=0)
    model = MaskablePPO("MultiInputPolicy",environment)
    print(model.learning_rate)
    model.learn(total_timesteps=60000)
    mean_reward, _ = evaluate_policy(model, model.get_env(), n_eval_episodes=1, deterministic=False)
    return -1 * mean_reward

study = optuna.create_study()
try:
    study.optimize(optimize_agent, n_trials=15)
except KeyboardInterrupt:
    print('Interrupted by keyboard.')
print(study.best_params)