In [1]:
#enable autoreload
%load_ext autoreload
%autoreload 2

In [2]:
import sys

sys.path.append('..')

from environments.ElevatorEnvironment import ElevatorEnvironment
from agents.llmzero import LLMTransitionModel
from agents.random_agent import RandomAgent
from agents.elevator_expert import ElevatorExpertPolicyAgent

import numpy as np
import random
import matplotlib.pyplot as plt

  from pyRDDLGym.Visualizer.MovieGenerator import MovieGenerator
  from tqdm.autonotebook import tqdm, trange


In [3]:
env = ElevatorEnvironment()

c:\Users\ianch\miniconda3\envs\aiplanning\Lib\site-packages\pyRDDLGym\Examples c:\Users\ianch\miniconda3\envs\aiplanning\Lib\site-packages\pyRDDLGym\Examples\manifest.csv
Available example environment(s):
CartPole_continuous -> A simple continuous state-action MDP for the classical cart-pole system by Rich Sutton, with actions that describe the continuous force applied to the cart.
CartPole_discrete -> A simple continuous state MDP for the classical cart-pole system by Rich Sutton, with discrete actions that apply a constant force on either the left or right side of the cart.
Elevators -> The Elevator domain models evening rush hours when people from different floors in a building want to go down to the bottom floor using elevators.
HVAC -> Multi-zone and multi-heater HVAC control problem
MarsRover -> Multi Rover Navigation, where a group of agent needs to harvest mineral.
MountainCar -> A simple continuous MDP for the classical mountain car control problem.
NewLanguage -> Example with

<op> is one of {<=, <, >=, >}
<rhs> is a deterministic function of non-fluents or constants only.
>> ( sum_{?f: floor} [ elevator-at-floor(?e, ?f) ] ) == 1


In [4]:
SEED = 117
np.random.seed(SEED)
random.seed(SEED)

In [7]:
random_agent = RandomAgent(env, seed=SEED)
expert_agent = ElevatorExpertPolicyAgent()

In [None]:

def estimate_value(state, gamma=0.9):
    """
    Estimate the value of a state using a formula.
    Estimate the total reward until the next delivery assuming no future arrivals.
    Assume elevator is following a fix policy where it will move to the top floor and
    pick up all passengers and deliver them to the first floor.
    """
    
    num_person_waiting = [None for _ in range(5)]
    num_in_elavator = None
    door_state = None
    direction = None
    current_floor = None
    
    for feature, value in state.items():
        if "num-person-waiting" in feature:
            num_person_waiting[int(feature[-1])] = value
        if "elevator-at-floor" in feature and value == True:
            current_floor = int(feature[-1]) + 1
        if feature == "elevator-dir-up___e0":
            direction = "up" if value == True else "down"
        if feature == "elevator-closed___e0":
            door_state = "closed" if value == True else "open"
        if feature == "num-person-in-elevator___e0":
            num_in_elavator = value
            
    delivered = num_in_elavator if current_floor == 1 else 0
    num_in_elavator = 0 if current_floor == 1 else num_in_elavator
    total_waiting = sum(num_person_waiting)
    
    # if no one waiting and no one in the elevator, return 0
    if total_waiting == 0 and num_in_elavator == 0:
        return 0
    
    reward = 30 * delivered - 3 * total_waiting - 0.75 * num_in_elavator
    value = reward
    
    print(f"current floor: {current_floor}, total waiting: {total_waiting}, num in elevator: {num_in_elavator}, value: {value}")
    
    # base case if the elevator is at the first floor
    if current_floor == 1:
        return value
    
    steps = 1
    
    # moving up
    if direction == "up":
        top_floor = 1
        for i in range(5):
            if num_person_waiting[i] is not None:
                top_floor = i + 1
                
        floors_to_go = top_floor - current_floor 
        
        #reward unchanged while going up
        for _ in range(floors_to_go):
            value += reward * gamma ** steps
            steps += 1
            current_floor = current_floor + 1
            print(f"current floor: {current_floor}, total waiting: {total_waiting}, num in elevator: {num_in_elavator}, value: {value}")
            
        
            
    # moving down, open, close and move if there are people waiting
    while current_floor > 1:
        #check if there are people waiting
        if num_person_waiting[current_floor - 1] > 0:
            #open door, people still waiting
            value += reward * gamma ** steps
            steps += 1
            #close door, people already in the elevator
            total_waiting -= num_person_waiting[current_floor - 1]
            num_in_elavator += num_person_waiting[current_floor - 1]
            num_in_elavator = max(10, num_in_elavator)
            
            reward = - 3 * total_waiting - 0.75 * num_in_elavator
            value += reward * gamma ** steps
            steps += 1
        #move to the next floor
        value += reward * gamma ** steps
        steps += 1
        
        print(f"current floor: {current_floor}, total waiting: {total_waiting}, num in elevator: {num_in_elavator}, value: {value}")
            
        current_floor -= 1
    
    assert current_floor == 1
        
    # add delivery reward
    reward = 30 * num_in_elavator - 3 * total_waiting
    value += reward * gamma ** steps
    
    return value

In [65]:
state, _ = env.reset()

for _ in range(10):
    state, reward, done, _, _ = env.step(expert_agent.act(state))
    
state

{'num-person-waiting___f0': 0,
 'num-person-waiting___f1': 1,
 'num-person-waiting___f2': 2,
 'num-person-waiting___f3': 0,
 'num-person-waiting___f4': 1,
 'num-person-in-elevator___e0': 2,
 'elevator-dir-up___e0': False,
 'elevator-closed___e0': True,
 'elevator-at-floor___e0__f0': False,
 'elevator-at-floor___e0__f1': False,
 'elevator-at-floor___e0__f2': True,
 'elevator-at-floor___e0__f3': False,
 'elevator-at-floor___e0__f4': False}

In [66]:
for _ in range(10):
    state, reward, done, _, _ = env.step(expert_agent.act(state))
    
state

{'num-person-waiting___f0': 0,
 'num-person-waiting___f1': 2,
 'num-person-waiting___f2': 0,
 'num-person-waiting___f3': 0,
 'num-person-waiting___f4': 2,
 'num-person-in-elevator___e0': 0,
 'elevator-dir-up___e0': True,
 'elevator-closed___e0': True,
 'elevator-at-floor___e0__f0': False,
 'elevator-at-floor___e0__f1': False,
 'elevator-at-floor___e0__f2': False,
 'elevator-at-floor___e0__f3': True,
 'elevator-at-floor___e0__f4': False}

In [75]:
value = estimate_value(state)
value

current floor: 4, value: -12.0
current floor: 5, value: -22.8
current floor: 5, value: -51.21885
current floor: 4, value: -59.190465
current floor: 3, value: -66.3649185
current floor: 2, value: -80.182915941


45.34132249500003