In [1]:
import numpy as np
import pandas as pd
from overcooked_ai_py.mdp.overcooked_mdp import OvercookedGridworld
from overcooked_ai_py.mdp.overcooked_env import OvercookedEnv
from overcooked_ai_py.agents.agent import AgentPair, RandomAgent
from overcooked_ai_py.agents.benchmarking import AgentEvaluator
from overcooked_ai_py.visualization.state_visualizer import StateVisualizer

import matplotlib.pyplot as plt

In [22]:
# Define EVENT_TYPES
EVENT_TYPES = ['tomato_pickup', 'useful_tomato_pickup', 'tomato_drop', 'useful_tomato_drop', 'potting_tomato', 'onion_pickup', 'useful_onion_pickup', 'onion_drop', 'useful_onion_drop', 'potting_onion', 'dish_pickup', 'useful_dish_pickup', 'dish_drop', 'useful_dish_drop', 'soup_pickup', 'soup_delivery', 'soup_drop', 'optimal_onion_potting', 'optimal_tomato_potting', 'viable_onion_potting', 'viable_tomato_potting', 'catastrophic_onion_potting', 'catastrophic_tomato_potting', 'useless_onion_potting', 'useless_tomato_potting']

# Define available_maps_path
available_maps_path = 'data/'

# Define all_layouts
all_layouts = ['pipeline', 'forced_coordination', 'bonus_order_test', 'simple_o', 'inverse_marshmallow_experiment', 'long_cook_time', 'simple_tomato', 'scenario1_s', 'cramped_corridor', 'random3', 'multiplayer_schelling', 'tutorial_3', 'coordination_ring', 'cramped_room_single', 'cramped_room_tomato', 'counter_circuit_o_1order', 'small_corridor', 'five_by_five', 'centre_objects', 'forced_coordination_tomato', 'counter_circuit', 'scenario3', 'old_dynamics_put_test', 'large_room', 'bottleneck', 'tutorial_1', 'schelling', 'random0', 'soup_coordination', 'cramped_room', 'scenario2_s', 'marshmallow_experiment_coordination', 'corridor', 'scenario4', 'mdp_test', 'old_dynamics_cook_test', 'cramped_room_o_3orders', 'asymmetric_advantages', 'tutorial_2', 'unident', 'schelling_s', 'you_shall_not_pass', 'tutorial_0', 'scenario2', 'centre_pots', 'simple_o_t', 'marshmallow_experiment', 'asymmetric_advantages_tomato', 'm_shaped_s']

# Define included_layouts
included_layouts = ['cramped_room', 'asymmetric_advantages_tomato', 'coordination_ring', 'forced_coordination', 'counter_circuit']

In [4]:
mdp = OvercookedGridworld.from_layout_name('cramped_room')
env = OvercookedEnv.from_mdp(mdp)
# print(layout)
print(env)
obs0, obs1 = env.lossless_state_encoding_mdp(env.state)
print(obs0.shape, obs1.shape)

In [53]:
trans_obs0 = np.transpose(obs0, (2, 0, 1))
_, w, h = trans_obs0.shape
w, h

(5, 4)

In [7]:
mdp.lossless_state_encoding_mdp

In [60]:
trans_obs0 + 1

array([[[1, 1, 1, 1],
        [1, 1, 2, 1],
        [1, 1, 1, 1],
        [1, 1, 1, 1],
        [1, 1, 1, 1]],

       [[1, 1, 1, 1],
        [1, 1, 1, 1],
        [1, 1, 1, 1],
        [1, 2, 1, 1],
        [1, 1, 1, 1]],

       [[1, 1, 1, 1],
        [1, 1, 2, 1],
        [1, 1, 1, 1],
        [1, 1, 1, 1],
        [1, 1, 1, 1]],

       [[1, 1, 1, 1],
        [1, 1, 1, 1],
        [1, 1, 1, 1],
        [1, 1, 1, 1],
        [1, 1, 1, 1]],

       [[1, 1, 1, 1],
        [1, 1, 1, 1],
        [1, 1, 1, 1],
        [1, 1, 1, 1],
        [1, 1, 1, 1]],

       [[1, 1, 1, 1],
        [1, 1, 1, 1],
        [1, 1, 1, 1],
        [1, 1, 1, 1],
        [1, 1, 1, 1]],

       [[1, 1, 1, 1],
        [1, 1, 1, 1],
        [1, 1, 1, 1],
        [1, 2, 1, 1],
        [1, 1, 1, 1]],

       [[1, 1, 1, 1],
        [1, 1, 1, 1],
        [1, 1, 1, 1],
        [1, 1, 1, 1],
        [1, 1, 1, 1]],

       [[1, 1, 1, 1],
        [1, 1, 1, 1],
        [1, 1, 1, 1],
        [1, 1, 1, 1],
        [1, 1, 1

In [61]:
width_diff = (max_width - w ) // 2
height_diff = (max_height - h) // 2
padded_trans_obs0 = np.pad(trans_obs0 + 1, ((0, 0), (width_diff, max_width - w - width_diff), (height_diff, max_height - h - height_diff)), mode='constant', constant_values=0)
print(padded_trans_obs0.shape)

(26, 9, 5)


In [62]:
padded_trans_obs0

array([[[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [1, 1, 1, 1, 0],
        ...,
        [1, 1, 1, 1, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]],

       [[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [1, 1, 1, 1, 0],
        ...,
        [1, 1, 1, 1, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]],

       [[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [1, 1, 1, 1, 0],
        ...,
        [1, 1, 1, 1, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]],

       ...,

       [[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [1, 1, 1, 1, 0],
        ...,
        [1, 1, 1, 1, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]],

       [[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [1, 1, 1, 1, 0],
        ...,
        [1, 1, 1, 1, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]],

       [[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [1, 1, 1, 1, 0],
        ...,
        [1, 1, 1, 1, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]]

In [5]:
agent = RandomAgent()

In [None]:
random_action, random_action_probs = agent.action(env.state)
random_action, random_action_probs

((-1, 0), {'action_probs': array([0.2, 0.2, 0.2, 0.2, 0.2, 0. ])})

In [43]:
action_pair = AgentPair(RandomAgent(), RandomAgent())
trajectories = env.get_rollouts(action_pair, num_games=1, display_phi=True)
trajectories.keys()

Avg rew: 0.00 (std: 0.00, se: 0.00); avg len: 400.00; : 100%|██████████| 1/1 [00:00<00:00, 16.44it/s]


dict_keys(['ep_actions', 'ep_infos', 'ep_dones', 'env_params', 'ep_lengths', 'metadatas', 'ep_returns', 'ep_rewards', 'ep_states', 'mdp_params'])

In [9]:
mdp.lossless_state_encoding_mdp()

AttributeError: 'OvercookedGridworld' object has no attribute 'lossless_state_encoding_mdp'

In [65]:
trajectories['ep_infos'][0][0]['agent_infos']

[{'action_probs': array([0.2, 0.2, 0.2, 0.2, 0.2, 0. ])},
 {'action_probs': array([0.2, 0.2, 0.2, 0.2, 0.2, 0. ])}]

In [59]:
reward_shaping_factor = 0.1
sparse_rewards = np.array(trajectories['ep_rewards']) # num episodes by num_steps 

In [None]:
dense_rewards = []
# num episodes, num steps, num agents = 2
for game in range(len(trajectories['ep_infos'])):
    rewards = []
    for d in trajectories['ep_infos'][game]:
        potential = d['phi_s_prime'] - d['phi_s']
        rewards.append(potential)
    dense_rewards.append(rewards)
dense_rewards = np.array(dense_rewards)
print(dense_rewards.shape)
reward1 =  sparse_rewards + dense_rewards * reward_shaping_factor

In [44]:
trajectories['ep_infos'][0][0]

In [47]:
trajectories['ep_rewards'].shape

In [48]:
reward_shaping_factor = 0.1
sparse_rewards = np.array(trajectories['ep_rewards']) # num episodes by num_steps 

In [52]:
5 ** np.arange(4)

array([  1,   5,  25, 125])

In [None]:
dense_rewards = []
# num episodes, num steps, num agents = 2
for game in range(len(trajectories['ep_infos'])):
    rewards = []
    for d in trajectories['ep_infos'][game]:
        potential = d['phi_s_prime'] - d['phi_s']
        rewards.append(potential)
    dense_rewards.append(rewards)
dense_rewards = np.array(dense_rewards)
print(dense_rewards.shape)
reward1 =  sparse_rewards + dense_rewards * reward_shaping_factor

In [44]:
trajectories['ep_infos'][0][0]

{'agent_infos': [{'action_probs': array([0.2, 0.2, 0.2, 0.2, 0.2, 0. ])},
  {'action_probs': array([0.2, 0.2, 0.2, 0.2, 0.2, 0. ])}],
 'sparse_r_by_agent': [0, 0],
 'shaped_r_by_agent': [0, 0],
 'phi_s': 30.633689117862914,
 'phi_s_prime': 30.633689117862914}

In [47]:
trajectories['ep_rewards'].shape

(1, 400)

In [48]:
reward_shaping_factor = 0.1
sparse_rewards = np.array(trajectories['ep_rewards']) # num episodes by num_steps 

In [None]:
dense_rewards = []
# num episodes, num steps, num agents = 2
for game in range(len(trajectories['ep_infos'])):
    rewards = []
    for d in trajectories['ep_infos'][game]:
        potential = d['phi_s_prime'] - d['phi_s']
        rewards.append(potential)
    dense_rewards.append(rewards)
dense_rewards = np.array(dense_rewards)
print(dense_rewards.shape)
reward1 =  sparse_rewards + dense_rewards * reward_shaping_factor

(1, 400)
(1, 400)


In [34]:
import numpy as np
for k, v in trajectories.items():
    if type(v) == np.ndarray:
        print(f"{k}: {v.shape}")

ep_actions: (1, 400)
ep_infos: (1, 400)
ep_dones: (1, 400)
env_params: (1,)
ep_lengths: (1,)
ep_returns: (1,)
ep_rewards: (1, 400)
ep_states: (1, 400)
mdp_params: (1,)


In [36]:
trajectories['ep_returns']

array([0])

In [None]:

agent_evaluator = AgentEvaluator.from_mdp(mdp, env_params={"horizon": 400})

In [13]:
random_trajectory = agent_evaluator.evaluate_random_pair(num_games=1, display=False)
StateVisualizer().display_rendered_trajectory(random_trajectory, ipython_display=True)

Avg rew: 0.00 (std: 0.00, se: 0.00); avg len: 400.00; : 100%|██████████| 1/1 [00:00<00:00, 20.03it/s]


interactive(children=(IntSlider(value=0, description='timestep', max=399), Output()), _dom_classes=('widget-in…

In [None]:
greedy_trajectory = agent_evaluator.evaluate_human_model_pair(num_games=1, display=False)
StateVisualizer().display_rendered_trajectory(greedy_trajectory, ipython_display=True)

Avg rew: 180.00 (std: 0.00, se: 0.00); avg len: 400.00; : 100%|██████████| 1/1 [00:00<00:00, 12.33it/s]


interactive(children=(IntSlider(value=0, description='timestep', max=399), Output()), _dom_classes=('widget-in…

In [None]:
greedy_trajectory = agent_evaluator.evaluate_human_model_pair(num_games=1, display=False)
StateVisualizer().display_rendered_trajectory(greedy_trajectory, ipython_display=True)

X       X       P       X       X       

O       ↑0              →1      O       

X                               X       

X       D       X       S       X       




In [None]:
agent = "something"
agent_trajectory = agent_evaluator.evaluate_agent_pair(agent, num_games=1, display=False)
StateVisualizer().display_rendered_trajectory(agent_trajectory, ipython_display=True)

  0%|          | 0/1 [00:00<?, ?it/s]


AttributeError: 'RandomAgent' object has no attribute 'joint_action'