In [1]:
%load_ext autoreload
%autoreload 2

import numpy as np
import matplotlib.pyplot as plt
import re
import time

from src.mazeworld import LimitedCollectMazeWorld, AutoCollectMazeWord, WallAutoCollectMazeWorld
from src.mdp import MDP, LegibleTaskMDP, LearnerMDP
from tqdm import tqdm
from itertools import combinations
%matplotlib notebook

In [2]:
def get_goal_states(states, goal):

    state_lst = list(states)
    return [state_lst.index(x) for x in states if x.find(goal) != -1]

def simulate(mdp, pol, mdp_tasks, leg_pol, x0, n_trajs):

    mdp_trajs = []
    tasks_trajs = []

    for _ in tqdm(range(n_trajs), desc='Simulate Trajectories'):
        traj, acts = mdp.trajectory(x0, pol)
        traj_leg, acts_leg = mdp_tasks.trajectory(x0, leg_pol)
        mdp_trajs += [[traj, acts]]
        tasks_trajs += [[traj_leg, acts_leg]]

    mdp_r = mdp.trajectory_reward(mdp_trajs)
    mdp_rl = mdp_tasks.trajectory_reward(mdp_trajs)
    task_r = mdp.trajectory_reward(tasks_trajs)
    task_rl = mdp_tasks.trajectory_reward(tasks_trajs)

    return mdp_r, mdp_rl, task_r, task_rl

In [117]:
def create_world_view(n_rows, n_cols, obj_place, walls=None):
    
    fig = plt.figure(figsize=(8, 8))
    plt.xlim(0, n_cols)
    plt.ylim(0,  n_rows)
    plt.xticks([i+1 for i in range(n_cols)])
    plt.yticks([i+1 for i in range(n_rows)])
    plt.grid(True)

    for obj in obj_place:
        x = obj[1] - 0.5
        y = obj[0] - 0.5
        o = '$' + obj[2] + '$'
        
        plt.plot(x, y, marker=o, color='k', markersize=10)
    
    if walls:
        for wall in walls:
            wall_sec = np.array([list(wall_elem) for wall_elem in wall])
            plt.plot(wall_sec[:, 1] - 0.5, wall_sec[:, 0] - 0.5, color='k', linewidth=5)
    
    return fig


def visualize_trajectory(x0, trajectory, figure, color):
    action = {'U': (0, -1), 'D': (0, 1), 'L':(-1, 0), 'R':(1, 0), 'G': (0, 0), 'P':(0, 0), 'N': (0, 0)}
    plt.plot(x0[0]-0.5, x0[1]-0.5, marker='x', markersize=15, color='gold')
    for ptr in trajectory:
        x = ptr[0]-0.5
        y = ptr[1]-0.5
        a = action[ptr[2]]
        plt.arrow(x, y, a[0], a[1], head_width=0.1, head_length=0.1, lw=1.5, fc=color, ec=color)

        
def process_trajectory(trajectory, actions):
    
    traj = []
    
    for i in range(len(trajectory) - 1):
        state_split = re.match(r"([0-9]+) ([0-9]+) ([a-z]+)", trajectory[i], re.I)
        y = int(state_split.group(1))
        x = int(state_split.group(2))
        o = state_split.group(3)
        
        traj += [(x, y, actions[i])]
    
    return traj

In [222]:
n_rows = 8
n_cols = 8
objs_states = [(7, 2, 'P'), (4, 4, 'D'), (4, 1, 'C'), (8, 1, 'L'), (6, 7, 'T'), (8, 8, 'O')]
walls = [[(0.5, x + 0.5) for x in range(0, n_cols + 1)],
         [(n_rows + 0.5, x + 0.5) for x in range(0, n_cols + 1)], 
         [(x + 0.5, 0.5) for x in range(0, n_rows + 1)], 
         [(x + 0.5, n_cols + 0.5) for x in range(0, n_rows + 1)],
         [(1.5, 3.5), (2.5, 3.5)],
         [(3.5, 3.5), (4.5, 3.5)],
         [(2.5, 0.5), (2.5, 1.5)],
         [(2.5, 2.5), (2.5, 3.5)],
         [(1.5, x + 0.5) for x in range(4, 6)],
         [(1.5, 6.5), (1.5, 7.5)],
         [(7.5, x + 0.5) for x in range(0, 3)],
         [(7.5, x + 0.5) for x in range(5, 8)],
         [(x + 0.5, 4.5) for x in range(4, 6)],
         [(x + 0.5, 4.5) for x in range(6, 8)],
         [(4.5, x + 0.5) for x in range(3, 6)],
         [(4.5, x + 0.5) for x in range(6, 8)],
         [(x + 0.5, 7.5) for x in range(1, 4)],
         [(x + 0.5, 7.5) for x in range(5, 8)]]
# x0 = np.random.choice([x for x in X_a if 'N' in x])
x0 = '1 1 N'
goals = ['P', 'D', 'C', 'L', 'T', 'O']
goal = 'T'

fig = create_world_view(n_rows, n_cols, objs_states, walls)
fig.show()

<IPython.core.display.Javascript object>

In [10]:
n_rows = 8
n_cols = 8
objs_states = [(6, 1, 'P'), (1, 7, 'D'), (3, 2, 'C'), (8, 1, 'L'), (7, 7, 'T'), (8, 8, 'O')]
walls = [[(0.5, x + 0.5) for x in range(0, n_cols + 1)],
         [(n_rows + 0.5, x + 0.5) for x in range(0, n_cols + 1)], 
         [(x + 0.5, 0.5) for x in range(0, n_rows + 1)], 
         [(x + 0.5, n_cols + 0.5) for x in range(0, n_rows + 1)],
         [(0.5, 3.5), (1.5, 3.5), (2.5, 3.5)],
         [(x + 0.5, 4.5) for x in range(4, 6)],
         [(x + 0.5, 4.5) for x in range(6, 8)],
         [(x + 0.5, 2.5) for x in range(2, 5)],
         [(x + 0.5, 2.5) for x in range(6, 8)],
         [(x + 0.5, 7.5) for x in range(1, 3)],
         [(x + 0.5, 7.5) for x in range(3, 8)],
         [(2.5, 0.5), (2.5, 1.5), (2.5, 2.5)],
         [(2.5, x + 0.5) for x in range(3, 5)],
         [(2.5, x + 0.5) for x in range(5, 7)],
         [(7.5, x + 0.5) for x in range(0, 2)],
         [(7.5, x + 0.5) for x in range(5, 8)],
         [(4.5, x + 0.5) for x in range(4, 6)],
         [(4.5, x + 0.5) for x in range(6, 8)]]
x0 = '1 1 N'
goals = ['P', 'D', 'C', 'L', 'T', 'O']
goal = 'O'

fig = create_world_view(n_rows, n_cols, objs_states, walls)
fig.show()

In [42]:
n_rows = 8
n_cols = 8
objs_states = [(8, 5, 'P'), (1, 5, 'D'), (4, 1, 'C'), (8, 1, 'L'), (4, 7, 'T'), (8, 8, 'O')]
walls = [[(0.5, x + 0.5) for x in range(0, n_cols + 1)],
         [(n_rows + 0.5, x + 0.5) for x in range(0, n_cols + 1)], 
         [(x + 0.5, 0.5) for x in range(0, n_rows + 1)], 
         [(x + 0.5, n_cols + 0.5) for x in range(0, n_rows + 1)],
         [(x + 0.5, 2.5) for x in range(2, 6)],
         [(x + 0.5, 2.5) for x in range(6, 8)],
         [(x + 0.5, 3.5) for x in range(0, 2)],
         [(x + 0.5, 3.5) for x in range(6, 8)],
         [(x + 0.5, 5.5) for x in range(6, 9)],
         [(x + 0.5, 6.5) for x in range(3, 6)],
         [(x + 0.5, 6.5) for x in range(6, 8)],
         [(2.5, x + 0.5) for x in range(0, 3)],
         [(1.5, x + 0.5) for x in range(3, 6)],
         [(1.5, x + 0.5) for x in range(6, 9)],
         [(3.5, x + 0.5) for x in range(6, 8)],
         [(6.5, x + 0.5) for x in range(0, 3)],
         [(6.5, x + 0.5) for x in range(3, 5)],
         [(6.5, x + 0.5) for x in range(6, 8)],
         [(5.5, x + 0.5) for x in range(7, 9)]]
x0 = '1 1 N'
goals = ['P', 'D', 'C', 'L', 'T', 'O']
goal = 'O'

fig = create_world_view(n_rows, n_cols, objs_states, walls)
fig.show()

<IPython.core.display.Javascript object>

In [82]:
n_rows = 10
n_cols = 10
objs_states = [(9, 3, 'P'), (9, 1, 'D'), (1, 6, 'C'), (5, 4, 'L'), (10, 8, 'T'), (7, 9, 'O')]
walls = [[(0.5, x + 0.5) for x in range(0, n_cols + 1)],
         [(n_rows + 0.5, x + 0.5) for x in range(0, n_cols + 1)], 
         [(x + 0.5, 0.5) for x in range(0, n_rows + 1)], 
         [(x + 0.5, n_cols + 0.5) for x in range(0, n_rows + 1)],
         [(0.5, 2.5), (1.5, 2.5)],
         [(3.5, 6.5), (3.5, 7.5)],
         [(3.5, 0.5), (3.5, 1.5)],
         [(x + 0.5, 2.5) for x in range(2, 6)],
         [(x + 0.5, 2.5) for x in range(6, 10)],
         [(x + 0.5, 6.5) for x in range(0, 3)],
         [(x + 0.5, 8.5) for x in range(3, 8)],
         [(x + 0.5, 8.5) for x in range(8, 11)],
         [(3.5, x + 0.5) for x in range(2, 6)],
         [(3.5, x + 0.5) for x in range(8, 10)],
         [(8.5, x + 0.5) for x in range(2, 4)],
         [(8.5, x + 0.5) for x in range(4, 8)]]
# x0 = np.random.choice([x for x in X_a if 'N' in x])
x0 = '1 1 N'
goals = ['P', 'D', 'C', 'L', 'T', 'O']
goal = 'O'

fig = create_world_view(n_rows, n_cols, objs_states, walls)
fig.show()

<IPython.core.display.Javascript object>

In [83]:
n_rows = 10
n_cols = 10
objs_states = [(1, 7, 'P'), (10, 10, 'D'), (6, 10, 'C'), (10, 2, 'L'), (10, 7, 'T'), (7, 1, 'O')]
walls = [[(0.5, x + 0.5) for x in range(0, n_cols + 1)],
         [(n_rows + 0.5, x + 0.5) for x in range(0, n_cols + 1)], 
         [(x + 0.5, 0.5) for x in range(0, n_rows + 1)], 
         [(x + 0.5, n_cols + 0.5) for x in range(0, n_rows + 1)],
         [(0.5, 3.5), (1.5, 3.5)],
         [(x + 0.5, 3.5) for x in range(3, 6)],
         [(x + 0.5, 3.5) for x in range(6, 9)],
         [(x + 0.5, 4.5) for x in range(0, 4)],
         [(x + 0.5, 4.5) for x in range(9, 11)],
         [(x + 0.5, 7.5) for x in range(8, 11)],
         [(x + 0.5, 8.5) for x in range(0, 4)],
         [(x + 0.5, 8.5) for x in range(7, 10)],
         [(x + 0.5, 8.5) for x in range(5, 7)],
         [(2.5, x + 0.5) for x in range(0, 4)],
         [(2.5, x + 0.5) for x in range(9, 11)],
         [(8.5, x + 0.5) for x in range(0, 3)],
         [(8.5, x + 0.5) for x in range(4, 7)],
         [(5.5, x + 0.5) for x in range(8, 11)],
         [(3.5, x + 0.5) for x in range(0, 2)],
         [(3.5, x + 0.5) for x in range(2, 4)],
         [(3.5, x + 0.5) for x in range(4, 6)],
         [(3.5, x + 0.5) for x in range(6, 8)]]
x0 = '1 1 N'
goals = ['P', 'D', 'C', 'L', 'T', 'O']
goal = 'O'

fig = create_world_view(n_rows, n_cols, objs_states, walls)
fig.show()

<IPython.core.display.Javascript object>

In [78]:
n_rows = 10
n_cols = 10
objs_states = [(1, 7, 'P'), (10, 10, 'D'), (7, 10, 'C'), (9, 1, 'L'), (9, 5, 'T'), (5, 1, 'O')]
walls = [[(0.5, x + 0.5) for x in range(0, n_cols + 1)],
         [(n_rows + 0.5, x + 0.5) for x in range(0, n_cols + 1)], 
         [(x + 0.5, 0.5) for x in range(0, n_rows + 1)], 
         [(x + 0.5, n_cols + 0.5) for x in range(0, n_rows + 1)], 
         [(x + 0.5, 6.5) for x in range(4, 7)], 
         [(x + 0.5, 4.5) for x in range(4, 7)],
         [(x + 0.5, 2.5) for x in range(3, 6)],
         [(x + 0.5, 2.5) for x in range(6, 8)],
         [(x + 0.5, 1.5) for x in range(8, 10)],
         [(x + 0.5, 3.5) for x in range(0, 2)],
         [(x + 0.5, 3.5) for x in range(8, 11)],
         [(x + 0.5, 4.5) for x in range(0, 3)],
         [(x + 0.5, 7.5) for x in range(0, 3)],
         [(x + 0.5, 7.5) for x in range(8, 11)],
         [(x + 0.5, 8.5) for x in range(0, 4)],
         [(x + 0.5, 8.5) for x in range(4, 7)],
         [(x + 0.5, 8.5) for x in range(8, 10)],
         [(2.5, x + 0.5) for x in range(0, 3)],
         [(2.5, x + 0.5) for x in range(4, 6)],
         [(2.5, x + 0.5) for x in range(6, 8)],
         [(3.5, x + 0.5) for x in range(0, 3)],
         [(3.5, x + 0.5) for x in range(9, 11)],
         [(4.5, x + 0.5) for x in range(9, 11)],
         [(4.5, x + 0.5) for x in range(4, 7)],
         [(6.5, x + 0.5) for x in range(4, 7)],
         [(7.5, x + 0.5) for x in range(0, 3)],
         [(7.5, x + 0.5) for x in range(8, 11)],
         [(8.5, x + 0.5) for x in range(0, 2)],
         [(8.5, x + 0.5) for x in range(3, 6)],
         [(8.5, x + 0.5) for x in range(6, 8)],
         [(8.5, x + 0.5) for x in range(8, 10)]
        ]
x0 = '1 1 N'
goals = ['P', 'D', 'C', 'L', 'T', 'O']
goal = 'D'

fig = create_world_view(n_rows, n_cols, objs_states, walls)
fig.show()

<IPython.core.display.Javascript object>

In [223]:
print('##########################################')
print('#####  Wall Auto Collect Maze World  #####')
print('##########################################')
wacmw = WallAutoCollectMazeWorld()
X_w, A_w, P_w = wacmw.generate_world(n_rows, n_cols, objs_states, walls, 'stochastic', 0.15)

print('### Computing Costs and Creating Task MDPs ###')
mdps_w = {}
q_mdps_w = []
task_mdps_w = {}
costs = []
for i in tqdm(range(len(goals)), desc='Single Task MDPs'):
    c = wacmw.generate_costs(goals[i], X_w, A_w)
    costs += [c]
    mdp = MDP(X_w, A_w, P_w, c, 0.9, get_goal_states(X_w, goals[i]))
    _, q = mdp.policy_iteration()
    q_mdps_w += [q]
    mdps_w['mdp' + str(i + 1)] = mdp
print('Legible task MDP')
leg_costs = []
for i in tqdm(range(len(goals)), desc='Legible Task MDPs'):
    mdp = LegibleTaskMDP(X_w, A_w, P_w, 0.9, goals[i], objs_states, goals, 10.0, get_goal_states(X_w, goals[i]), -1, 
                         'leg_optimal',
                         # task_mdps=list(mdps_w.values()))
                         q_mdps=q_mdps_w)
    leg_costs += [mdp.costs]
    task_mdps_w['leg_mdp_' + str(i + 1)] = mdp
task_mdp_w = task_mdps_w['leg_mdp_' + str(goals.index(goal) + 1)]

print('### Computing Optimal policy ###')
time1 = time.time()
pol_w, Q1 = mdps_w['mdp' + str(goals.index(goal) + 1)].policy_iteration()
print('Took %.3f seconds to compute policy' % (time.time() - time1))

print('### Computing Legible policy ###')
time1 = time.time()
task_pol_w, task_Q = task_mdp_w.policy_iteration()
print('Took %.3f seconds to compute policy' % (time.time() - time1))

##########################################
#####  Wall Auto Collect Maze World  #####
##########################################



Single Task MDPs:   0%|                                                                                                                                                                                                            | 0/6 [00:00<?, ?it/s][A

### Computing Costs and Creating Task MDPs ###
Iteration 3


Single Task MDPs:  17%|████████████████████████████████▋                                                                                                                                                                   | 1/6 [00:11<00:58, 11.76s/it][A

N. iterations:  3
Iteration 3


Single Task MDPs:  33%|█████████████████████████████████████████████████████████████████▎                                                                                                                                  | 2/6 [00:21<00:44, 11.14s/it][A

N. iterations:  3
Iteration 3


Single Task MDPs:  50%|██████████████████████████████████████████████████████████████████████████████████████████████████                                                                                                  | 3/6 [00:31<00:32, 10.68s/it][A

N. iterations:  3
Iteration 3


Single Task MDPs:  67%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                                                                 | 4/6 [00:42<00:21, 10.92s/it][A

N. iterations:  3
Iteration 3


Single Task MDPs:  83%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                | 5/6 [00:52<00:10, 10.69s/it][A

N. iterations:  3
Iteration 3


Single Task MDPs: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 6/6 [01:05<00:00, 10.84s/it][A

Legible Task MDPs:   0%|                                                                                                                                                                                                           | 0/6 [00:00<?, ?it/s][A

N. iterations:  3
Legible task MDP



Legible Task MDPs:  17%|████████████████████████████████▌                                                                                                                                                                  | 1/6 [00:00<00:03,  1.50it/s][A
Legible Task MDPs:  33%|█████████████████████████████████████████████████████████████████                                                                                                                                  | 2/6 [00:01<00:02,  1.55it/s][A
Legible Task MDPs:  50%|█████████████████████████████████████████████████████████████████████████████████████████████████▌                                                                                                 | 3/6 [00:02<00:02,  1.45it/s][A
Legible Task MDPs:  67%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                                                 | 4/6 [00:02<00:01,  

### Computing Optimal policy ###
N. iterations:  3
Took 10.400 seconds to compute policy
### Computing Legible policy ###
N. iterations:  5
Took 13.290 seconds to compute policy


In [224]:
x0 = '1 1 N'
print('Initial State: ' + x0)
print('##########################################')
print('#####  Wall Auto Collect Maze World  #####')
print('##########################################')

print('Optimal trajectory for task: ' + goal)
t1, a1 = mdps_w['mdp' + str(goals.index(goal) + 1)].trajectory(x0, pol_w)
print('Trajectory: ' + str(t1))
print('Cost: ' + str(mdps_w['mdp' + str(goals.index(goal) + 1)].trajectory_reward([[t1, a1]])))
print('Legible Reward: ' + str(task_mdp_w.trajectory_reward([[t1, a1]])))
t_opt = process_trajectory(t1, a1)

print('Legible trajectory for task: ' + goal)
task_traj, task_act = task_mdp_w.trajectory(x0, task_pol_w)
print('Trajectory: ' + str(task_traj))
print('Cost: ' + str(mdps_w['mdp' + str(goals.index(goal) + 1)].trajectory_reward([[task_traj, task_act]])))
print('Legible Reward: ' + str(task_mdp_w.trajectory_reward([[task_traj, task_act]])))
t_leg = process_trajectory(task_traj, task_act)

fig = create_world_view(n_rows, n_cols, objs_states, walls)
visualize_trajectory(t_opt[0], t_opt, fig, 'b')
visualize_trajectory(t_leg[0], t_leg, fig, 'k')
fig.show()

print('Getting model performance!!')
clock_1 = time.time()
mdp_r, mdp_rl, leg_mdp_r, leg_mdp_rl = simulate(mdps_w['mdp' + str(goals.index(goal) + 1)], pol_w,
                                                task_mdp_w, task_pol_w, x0, 100)
time_simulation = time.time() - clock_1
print('Simulation length = %.3f' % time_simulation)
print('Optimal Policy performance:\nCost: %.3f\nLegible Reward: %.3f' % (mdp_r, mdp_rl))
print('Legible Policy performance:\nCost: %.3f\nLegible Reward: %.3f' % (leg_mdp_r, leg_mdp_rl))


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 999.83it/s][A

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 1001.98it/s][A

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<?, ?it/s][A

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 

Initial State: 1 1 N
##########################################
#####  Wall Auto Collect Maze World  #####
##########################################
Optimal trajectory for task: T
Trajectory: ['1 1 N' '1 2 N' '2 2 N' '2 2 N' '3 2 N' '4 2 N' '4 2 N' '5 2 N' '6 2 N'
 '6 2 N' '6 3 N' '6 4 N' '6 5 N' '6 6 N' '6 6 N' '6 7 T']
Cost: 7.941088679053512
Legible Reward: 1.2906551867336602
Legible trajectory for task: T
Trajectory: ['1 1 N' '1 2 N' '1 3 N' '1 4 N' '1 4 N' '1 5 N' '1 5 N' '1 6 N' '1 7 N'
 '1 8 N' '2 8 N' '2 8 N' '3 8 N' '4 8 N' '4 8 N' '5 8 N' '5 7 N' '5 7 N'
 '5 7 N' '6 7 T']
Cost: 8.649148282327012
Legible Reward: 3.051982462005765





<IPython.core.display.Javascript object>


Simulate Trajectories:   0%|                                                                                                                                                                                                     | 0/100 [00:00<?, ?it/s][A

Getting model performance!!



Simulate Trajectories:  13%|████████████████████████▎                                                                                                                                                                  | 13/100 [00:00<00:00, 119.27it/s][A
Simulate Trajectories:  26%|████████████████████████████████████████████████▌                                                                                                                                          | 26/100 [00:00<00:00, 121.94it/s][A
Simulate Trajectories:  41%|████████████████████████████████████████████████████████████████████████████▋                                                                                                              | 41/100 [00:00<00:00, 126.90it/s][A
Simulate Trajectories:  54%|████████████████████████████████████████████████████████████████████████████████████████████████████▉                                                                                      | 54/100 [00:00<00:00, 12

Simulation length = 1.001
Optimal Policy performance:
Cost: 7.783
Legible Reward: 1.234
Legible Policy performance:
Cost: 8.661
Legible Reward: 3.184





In [None]:
trajs, a_trajs = mdps_w['mdp' + str(goals.index(goal) + 1)].all_trajectories(x0, pol_w)
colors = ['blue', 'darkred', 'green', 'black', 'orange', 'pink', 'yellow', 'magenta', 'brown', 'cyan', 'khaki', 'olivedrab', 'lightcoral']
i = 0
fig = create_world_view(n_rows, n_cols, objs_states, walls)
for j in range(len(trajs)):
    traj = trajs[j]
    a_traj = a_trajs[j]
    p_traj = process_trajectory(traj, a_traj)
    visualize_trajectory(p_traj[0], p_traj, fig, colors[min(i, len(colors) - 1)])
    i += 1
fig.show()
print('Optimal trajectories: %d' % len(trajs))
leg_trajs, leg_a_trajs = task_mdp_w.all_trajectories(x0, task_pol_w)
i = 0
fig = create_world_view(n_rows, n_cols, objs_states, walls)
for j in range(len(leg_trajs)):
    traj = leg_trajs[j]
    a_traj = leg_a_trajs[j]
    p_traj = process_trajectory(traj, a_traj)
    visualize_trajectory(p_traj[0], p_traj, fig, colors[min(i, len(colors) - 1)])
    i += 1
fig.show()
print('Legible trajectories: %d' % len(leg_trajs))

<IPython.core.display.Javascript object>

Optimal trajectories: 186


In [68]:
print(leg_trajs[0][10], leg_a_trajs[0][10])

4 8 N D


In [73]:
print('Creating demo trajectories')
n_trajs = 100
print('Optimal trajectories')
opt_trajs = []
opt_traj_len = 0
for _ in range(n_trajs):
    x0 = '1 1 N'
    t1, a1 = mdps_w['mdp' + str(goals.index(goal) + 1)].trajectory(x0, pol_w)
    opt_traj_len = max(opt_traj_len, len(t1))
    traj = []
    for i in range(len(t1)):
        traj += [[list(X_w).index(t1[i]), list(A_w).index(a1[i])]]
    opt_trajs += [np.array(traj)]
# print(opt_trajs)
print('Legible trajectories')
leg_trajs = []
leg_traj_len = 0
for _ in range(n_trajs):
    x0 = '1 1 N'
    traj = []
    task_traj, task_act = task_mdp_w.trajectory(x0, task_pol_w)
    leg_traj_len = max(leg_traj_len, len(task_traj))
    for i in range(len(task_traj)):
        traj += [[list(X_w).index(task_traj[i]), list(A_w).index(task_act[i])]]
    leg_trajs += [np.array(traj)]
# print(leg_trajs)

# for i in range(len(opt_trajs) - 1):
#     for j in range(i, len(opt_trajs)):
#         print((opt_trajs[i] == opt_trajs[j]).all(), end='\t')
#     print('\n')
    
# for i in range(len(leg_trajs) - 1):
#     for j in range(i, len(leg_trajs)):
#         print((leg_trajs[i] == leg_trajs[j]).all(), end='\t')
#     print('\n')

Creating demo trajectories
Optimal trajectories
Legible trajectories


In [78]:
print('######################################')
print('#####     IRL Agent Learning     #####')
print('######################################')

print('IRL Agent')
opt_learner = LearnerMDP(X_w, A_w, P_w, 0.9, costs, -1)
# leg_learner = LearnerMDP(X_w, A_w, P_w, 0.9, leg_costs, 1)


step = 1
print('Optimal trajectory eval')
# opt_learner_count, opt_avg_confidence = opt_learner.learner_eval(0.9, opt_trajs, opt_traj_len, step, goals.index(goal))
# leg_learner_count, leg_avg_confidence = leg_learner.learner_eval(0.9, opt_trajs, leg_traj_len, step, goals.index(goal))
# print(opt_learner_count/n_trajs, leg_learner_count/n_trajs)
# print(opt_avg_confidence, leg_avg_confidence)

print('Legible trajectory eval')
opt_learner_count, opt_avg_confidence = opt_learner.learner_eval(0.9, leg_trajs, opt_traj_len, step, goals.index(goal))
# leg_learner_count, leg_avg_confidence = leg_learner.learner_eval(0.9, leg_trajs, leg_traj_len, step, goals.index(goal))
# print(opt_learner_count/n_trajs, leg_learner_count/n_trajs)
# print(opt_avg_confidence, leg_avg_confidence)

######################################
#####     IRL Agent Learning     #####
######################################
IRL Agent
N. iterations:  11
N. iterations:  8
N. iterations:  9
N. iterations:  10
N. iterations:  8
N. iterations:  8
Optimal trajectory eval
Legible trajectory eval
1 1 N R
[0.19908073 0.1988844  0.19897416 0.19897416 0.19889405 0.19889405]
1 2 N R
[0.03955173 0.0394563  0.03949992 0.03949992 0.03946099 0.03946099]
1 3 N D
[0.00783719 0.00780273 0.00781847 0.00781847 0.00780442 0.00780442]
2 3 N D
[0.00154839 0.00153755 0.0015425  0.0015425  0.00153808 0.00153808]
3 3 N R
[0.00031175 0.00031074 0.00031148 0.00031148 0.00031082 0.00031082]
3 4 N R
[6.20137166e-05 6.30398716e-05 6.24354247e-05 6.24354247e-05
 6.26802564e-05 6.26802564e-05]
3 5 N R
[1.22781393e-05 1.25766876e-05 1.23840855e-05 1.23815444e-05
 1.24646271e-05 1.24646271e-05]
3 6 N R
[2.42179861e-06 2.53897113e-06 2.44328274e-06 2.44064415e-06
 2.45390740e-06 2.45981474e-06]
3 7 N R
[4.88359591e-07 5.043729

In [16]:
print('######################################')
print('#####     IRL Agent Learning     #####')
print('######################################')

print('IRL Agent')
opt_learner = LearnerMDP(X_w, A_w, P_w, 0.9, costs, -1)
leg_learner = LearnerMDP(X_w, A_w, P_w, 0.9, leg_costs, 1)

print('Preparing Trajectories')
p_trajs = []
for i in range(len(trajs)):
    p_traj = []
    traj = trajs[i]
    a_traj = a_trajs[i]
    for j in range(len(traj)):
        p_traj += [[list(X_w).index(traj[j]), list(A_w).index(a_traj[j])]]
    p_trajs +=  [np.array(p_traj)]

p_leg_trajs = []
for i in range(len(leg_trajs)):
    p_traj = []
    traj = leg_trajs[i]
    a_traj = leg_a_trajs[i]
    for j in range(len(traj)):
        p_traj += [[list(X_w).index(traj[j]), list(A_w).index(a_traj[j])]]
    p_leg_trajs +=  [np.array(p_traj)]
    
print('Learning')
indexes = []
traj_len = len(p_trajs[0])
step = 2
for i in range(step, traj_len+1, step):
    indexes += [i]

if traj_len % step == 0:
    n_idx = traj_len // step
else:
    n_idx = traj_len // step + 1
    indexes += [traj_len]
    
#for traj in p_trajs:
for traj in p_leg_trajs:
    for i in range(n_idx):
        idx = indexes[i]
        r, o_idx = opt_learner.birl_inference(traj[:idx], 0.9)
        r, l_idx = leg_learner.birl_inference(traj[:idx], 0.9)
        print(o_idx, l_idx, goals.index(goal))

######################################
#####     IRL Agent Learning     #####
######################################
IRL Agent
N. iterations:  9
N. iterations:  10
N. iterations:  9
N. iterations:  11
N. iterations:  9
N. iterations:  11
N. iterations:  7
N. iterations:  6
N. iterations:  6
N. iterations:  6
N. iterations:  7
N. iterations:  7
Preparing Trajectories
Learning
[[0.20256820395118674, 0.20301073265900071, 0.20163574553769653, 0.203049314997532, 0.20269176442114917, 0.2029794840170114], [0.19849996631854322, 0.19824297652607725, 0.19854573103445547, 0.19822059108847415, 0.1984281674727827, 0.1982611093662127]]
[[0.11996787179347485, 0.2837385303607539, 0.14804434416818826, 0.12663912325137214, 0.251933764108495, 0.23820341896092703], [0.25450842881558733, 0.3370266544005124, 0.2236689237677673, 0.24774715199644676, 0.2936209464330597, 0.27100038443782576]]
1 1 4
[[0.20256820395118674, 0.20301073265900071, 0.20163574553769653, 0.203049314997532, 0.20269176442114917, 0.202979

In [9]:
def irl_trajectory(mdp, goal, pol, x0):
    X = mdp[0]
    A = mdp[1]
    P = mdp[2]

    nX = len(X)
    nA = len(A)

    traj = [x0]
    actions = []
    x = list(X).index(x0)
    stop = False

    print(x0)
    while not stop:
        a = np.random.choice(nA, p=pol[x, :])
        x = np.random.choice(nX, p=P[A[a]][x, :])

        print(A[a], X[x])
        
        traj += [X[x]]
        actions += [A[a]]

        stop = (X[x].find(goal) != -1)
        if stop:
            actions += [A[np.random.choice(nA, p=pol[x, :])]]

    return np.array(traj), np.array(actions)

fig = create_world_view(n_rows, n_cols, objs_states, walls)
fig.show()

<IPython.core.display.Javascript object>

In [210]:
n_rows = 4
n_cols = 3
objs_states = [(3, 3, 'P'), (3, 1, 'D')]
walls = [[(0.5, x + 0.5) for x in range(0, n_cols + 1)],
         [(n_rows + 0.5, x + 0.5) for x in range(0, n_cols + 1)], 
         [(x + 0.5, 0.5) for x in range(0, n_rows + 1)], 
         [(x + 0.5, n_cols + 0.5) for x in range(0, n_rows + 1)], 
         [(1.5, x + 0.5) for x in range(0, 2)],
         [(1.5, x + 0.5) for x in range(2, 4)],
         #[(x + 0.5, 1.5) for x in range(1, 4)],
         #[(x + 0.5, 2.5) for x in range(1, 4)],]
        ]
# x0 = np.random.choice([x for x in X_a if 'N' in x])
x0 = '1 1 N'
goals = ['P', 'D']
goal = 'P'

fig = create_world_view(n_rows, n_cols, objs_states, walls)
fig.show()

<IPython.core.display.Javascript object>

In [220]:
print('##########################################')
print('#####  Wall Auto Collect Maze World  #####')
print('##########################################')
wacmw = WallAutoCollectMazeWorld()
X_w, A_w, P_w = wacmw.generate_world(n_rows, n_cols, objs_states, walls, 'stochastic', 0.1)

print('### Computing Costs and Creating Task MDPs ###')
mdps_w = {}
q_mdps_w = []
task_mdps_w = {}
costs = []
for i in tqdm(range(len(goals)), desc='Single Task MDPs'):
    c = wacmw.generate_costs(goals[i], X_w, A_w)
    costs += [c]
    mdp = MDP(X_w, A_w, P_w, c, 0.9, get_goal_states(X_w, goals[i]))
    _, q = mdp.policy_iteration()
    q_mdps_w += [q]
    mdps_w['mdp' + str(i + 1)] = mdp
print('Legible task MDP')
leg_costs = []
for i in tqdm(range(len(goals)), desc='Legible Task MDPs'):
    mdp = LegibleTaskMDP(X_w, A_w, P_w, 0.9, goals[i], objs_states, goals, 15.0, get_goal_states(X_w, goals[i]), 
                         -1, 'leg_optimal',
                         # task_mdps=list(mdps_w.values()))
                         q_mdps=q_mdps_w)
    leg_costs += [mdp.costs]
    task_mdps_w['leg_mdp_' + str(i + 1)] = mdp
task_mdp_w = task_mdps_w['leg_mdp_' + str(goals.index(goal) + 1)]

print('### Computing Optimal policy ###')
time1 = time.time()
pol_w, Q1 = mdps_w['mdp' + str(goals.index(goal) + 1)].policy_iteration()
print('Took %.3f seconds to compute policy' % (time.time() - time1))

print('### Computing Legible policy ###')
time1 = time.time()
task_pol_w, task_Q = task_mdp_w.policy_iteration()
print('Took %.3f seconds to compute policy' % (time.time() - time1))


Single Task MDPs: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 125.01it/s][A

Legible Task MDPs: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 498.22it/s][A

##########################################
#####  Wall Auto Collect Maze World  #####
##########################################
### Computing Costs and Creating Task MDPs ###
Iteration 1Iteration 2Iteration 3N. iterations:  3
Iteration 1Iteration 2Iteration 3N. iterations:  3
Legible task MDP
### Computing Optimal policy ###
Iteration 1Iteration 2Iteration 3N. iterations:  3
Took 0.009 seconds to compute policy
### Computing Legible policy ###
Iteration 1Iteration 2Iteration 3N. iterations:  3
Took 0.006 seconds to compute policy





In [221]:
print('Initial State: ' + x0)
print('##########################################')
print('#####  Wall Auto Collect Maze World  #####')
print('##########################################')

print('Optimal trajectory for task: ' + goal)
t1, a1 = mdps_w['mdp' + str(goals.index(goal) + 1)].trajectory(x0, pol_w)
print('Trajectory: ' + str(t1))
print('Cost: ' + str(mdps_w['mdp' + str(goals.index(goal) + 1)].trajectory_reward([[t1, a1]])))
print('Legible Reward: ' + str(task_mdp_w.trajectory_reward([[t1, a1]])))
t_opt = process_trajectory(t1, a1)

print('Legible trajectory for task: ' + goal)
task_traj, task_act = task_mdp_w.trajectory(x0, task_pol_w)
print('Trajectory: ' + str(task_traj))
print('Cost: ' + str(mdps_w['mdp' + str(goals.index(goal) + 1)].trajectory_reward([[task_traj, task_act]])))
print('Legible Reward: ' + str(task_mdp_w.trajectory_reward([[task_traj, task_act]])))
t_leg = process_trajectory(task_traj, task_act)

for ptr in task_traj:
    print(task_pol_w[list(X_w).index(ptr)])
    print(leg_costs[goals.index(goal)][list(X_w).index(ptr)])

fig = create_world_view(n_rows, n_cols, objs_states, walls)
visualize_trajectory(t_opt[0], t_opt, fig, 'b')
visualize_trajectory(t_leg[0], t_leg, fig, 'k')
fig.show()

print('Getting model performance!!')
clock_1 = time.time()
mdp_r, mdp_rl, leg_mdp_r, leg_mdp_rl = simulate(mdps_w['mdp' + str(goals.index(goal) + 1)], pol_w,
                                                task_mdp_w, task_pol_w, x0, 1000)
time_simulation = time.time() - clock_1
print('Simulation length = %.3f' % time_simulation)
print('Optimal Policy performance:\nCost: %.3f\nLegible Reward: %.3f' % (mdp_r, mdp_rl))
print('legible Policy performance:\nCost: %.3f\nLegible Reward: %.3f' % (leg_mdp_r, leg_mdp_rl))


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<?, ?it/s][A

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 999.60it/s][A

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<?, ?it/s][A

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:

Initial State: 1 1 N
##########################################
#####  Wall Auto Collect Maze World  #####
##########################################
Optimal trajectory for task: P
Trajectory: ['1 1 N' '1 2 N' '2 2 N' '3 2 N' '3 3 P']
Cost: 3.439
Legible Reward: 2.740099999992011
Legible trajectory for task: P
Trajectory: ['1 1 N' '1 2 N' '2 2 N' '2 3 N' '2 3 N' '2 3 N' '2 3 N' '2 3 N' '3 3 P']
Cost: 5.6953279000000006
Legible Reward: 5.175795109830954
[0. 0. 0. 1. 0.]
[0.5 0.5 0.5 0.5 0.5]
[0. 1. 0. 0. 0.]
[0.5 0.5 0.5 0.5 0.5]
[0. 0. 0. 1. 0.]
[5.00000000e-01 5.00000000e-01 1.75586958e-10 1.00000000e+00
 5.00000000e-01]
[0.25 0.25 0.   0.25 0.25]
[1.         1.         0.92385223 1.         1.        ]
[0.25 0.25 0.   0.25 0.25]
[1.         1.         0.92385223 1.         1.        ]
[0.25 0.25 0.   0.25 0.25]
[1.         1.         0.92385223 1.         1.        ]
[0.25 0.25 0.   0.25 0.25]
[1.         1.         0.92385223 1.         1.        ]
[0.25 0.25 0.   0.25 0.25]
[1.    




<IPython.core.display.Javascript object>


Simulate Trajectories:   0%|                                                                                                                                                                                                    | 0/1000 [00:00<?, ?it/s][A
Simulate Trajectories:  10%|██████████████████▏                                                                                                                                                                       | 98/1000 [00:00<00:00, 970.37it/s][A

Getting model performance!!



Simulate Trajectories:  19%|███████████████████████████████████▉                                                                                                                                                     | 194/1000 [00:00<00:00, 967.24it/s][A
Simulate Trajectories:  30%|██████████████████████████████████████████████████████▉                                                                                                                                  | 297/1000 [00:00<00:00, 985.25it/s][A
Simulate Trajectories:  41%|███████████████████████████████████████████████████████████████████████████▎                                                                                                            | 409/1000 [00:00<00:00, 1022.14it/s][A
Simulate Trajectories:  51%|██████████████████████████████████████████████████████████████████████████████████████████████▌                                                                                         | 514/1000 [00:00<00:00, 103

Simulation length = 1.050
Optimal Policy performance:
Cost: 3.744
Legible Reward: 3.106
legible Policy performance:
Cost: 5.221
Legible Reward: 4.655





In [23]:
print('######################################')
print('#####     IRL Agent Learning     #####')
print('######################################')

print('IRL Agent')
opt_learner = LearnerMDP(X_w, A_w, P_w, 0.9, costs, -1)
leg_learner = LearnerMDP(X_w, A_w, P_w, 0.9, leg_costs, 1)

print('Preparing Trajectories')
p_traj = []
for j in range(len(t1)):
    p_traj += [[list(X_w).index(t1[j]), list(A_w).index(a1[j])]]
p_traj =  np.array(p_traj)

print('Learning')
indexes = []
traj_len = len(p_traj)
step = 2
for i in range(step, traj_len+1, step):
    indexes += [i]

if traj_len % step == 0:
    n_idx = traj_len // step
else:
    n_idx = traj_len // step + 1
    indexes += [traj_len]
    
for i in tqdm(range(n_idx)):
    idx = indexes[i]
    r, o_idx = opt_learner.birl_inference(p_traj[:idx], 0.9)
    r, l_idx = leg_learner.birl_inference(p_traj[:idx], 0.9)
    print(o_idx, l_idx, goals.index(goal))

100%|████████████████████████████████████████████████████████████████████████████████████| 4/4 [00:00<00:00, 41.67it/s]

######################################
#####     IRL Agent Learning     #####
######################################
IRL Agent
Iteration 1Iteration 2Iteration 3Iteration 4N. iterations:  4
Iteration 1Iteration 2Iteration 3Iteration 4N. iterations:  4
Iteration 1Iteration 2Iteration 3N. iterations:  3
Iteration 1Iteration 2Iteration 3N. iterations:  3
Preparing Trajectories
Learning
1 0 1
0 1 1
1 1 1
1 1 1





In [38]:
print('######################################')
print('#####     IRL Agent Learning     #####')
print('######################################')

print('IRL Agent')
opt_learner = LearnerMDP(X_w, A_w, P_w, 0.9, costs, -1)
leg_learner = LearnerMDP(X_w, A_w, P_w, 0.9, leg_costs, 1)

print('Preparing Trajectories')
p_traj = []
for j in range(len(t1)):
    p_traj += [[list(X_w).index(t1[j]), list(A_w).index(a1[j])]]
p_traj =  np.array(p_traj)

print('Learning')
indexes = []
traj_len = len(p_traj)
step = 2
for i in range(step, traj_len+1, step):
    indexes += [i]

if traj_len % step == 0:
    n_idx = traj_len // step
else:
    n_idx = traj_len // step + 1
    indexes += [traj_len]
    
for i in tqdm(range(n_idx)):
    idx = indexes[i]
    r, o_idx = opt_learner.birl_inference(p_traj[:idx], 0.9)
    r, l_idx = leg_learner.birl_inference(p_traj[:idx], 0.9)
    print(o_idx, l_idx, goals.index(goal))

100%|██████████████████████████████████████████████████████████████████████████████████| 4/4 [00:00<00:00, 1346.05it/s]

######################################
#####     IRL Agent Learning     #####
######################################
IRL Agent
Iteration 1Iteration 2Iteration 3Iteration 4N. iterations:  4
Iteration 1Iteration 2Iteration 3Iteration 4N. iterations:  4
Iteration 1Iteration 2Iteration 3N. iterations:  3
Iteration 1Iteration 2Iteration 3N. iterations:  3
Preparing Trajectories
Learning
0 1 1
0 1 1
1 1 1
1 1 1





In [36]:
n_rows = 8
n_cols = 8
objs_states = [(8, 5, 'P'), (1, 5, 'D'), (4, 1, 'C'), (8, 1, 'L'), (4, 7, 'T'), (8, 8, 'O')]
walls = [[(0.5, x + 0.5) for x in range(0, n_cols + 1)],
         [(n_rows + 0.5, x + 0.5) for x in range(0, n_cols + 1)], 
         [(x + 0.5, 0.5) for x in range(0, n_rows + 1)], 
         [(x + 0.5, n_cols + 0.5) for x in range(0, n_rows + 1)],
         [(x + 0.5, 2.5) for x in range(2, 6)],
         [(x + 0.5, 2.5) for x in range(6, 8)],
         [(x + 0.5, 3.5) for x in range(0, 2)],
         [(x + 0.5, 3.5) for x in range(6, 8)],
         [(x + 0.5, 5.5) for x in range(6, 9)],
         [(x + 0.5, 6.5) for x in range(3, 6)],
         [(x + 0.5, 6.5) for x in range(6, 8)],
         [(2.5, x + 0.5) for x in range(0, 3)],
         [(1.5, x + 0.5) for x in range(3, 6)],
         [(1.5, x + 0.5) for x in range(6, 9)],
         [(3.5, x + 0.5) for x in range(6, 8)],
         [(6.5, x + 0.5) for x in range(0, 3)],
         [(6.5, x + 0.5) for x in range(3, 5)],
         [(6.5, x + 0.5) for x in range(6, 8)],
         [(5.5, x + 0.5) for x in range(7, 9)]
        ]

fig = create_world_view(n_rows, n_cols, objs_states, walls)
fig.show()
print(np.array(walls, dtype=object))

<IPython.core.display.Javascript object>

In [84]:
n_rows = 10
n_cols = 10
objs_states = [(1, 7, 'P'), (10, 10, 'D'), (6, 10, 'C'), (9, 1, 'L'), (9, 5, 'T'), (5, 1, 'O')]
walls = [[(0.5, x + 0.5) for x in range(0, n_cols + 1)],
         [(n_rows + 0.5, x + 0.5) for x in range(0, n_cols + 1)], 
         [(x + 0.5, 0.5) for x in range(0, n_rows + 1)], 
         [(x + 0.5, n_cols + 0.5) for x in range(0, n_rows + 1)], 
         [(x + 0.5, 6.5) for x in range(4, 7)], 
         [(x + 0.5, 4.5) for x in range(4, 7)],
         [(x + 0.5, 2.5) for x in range(3, 6)],
         [(x + 0.5, 2.5) for x in range(6, 8)],
         [(x + 0.5, 1.5) for x in range(8, 10)],
         [(x + 0.5, 3.5) for x in range(0, 2)],
         [(x + 0.5, 3.5) for x in range(8, 11)],
         [(x + 0.5, 4.5) for x in range(0, 3)],
         [(x + 0.5, 7.5) for x in range(0, 3)],
         [(x + 0.5, 7.5) for x in range(8, 11)],
         [(x + 0.5, 8.5) for x in range(0, 4)],
         [(x + 0.5, 8.5) for x in range(4, 6)],
         [(x + 0.5, 8.5) for x in range(6, 8)],
         [(x + 0.5, 8.5) for x in range(8, 10)],
         [(2.5, x + 0.5) for x in range(0, 3)],
         [(2.5, x + 0.5) for x in range(4, 6)],
         [(2.5, x + 0.5) for x in range(6, 8)],
         [(3.5, x + 0.5) for x in range(0, 3)],
         [(3.5, x + 0.5) for x in range(9, 11)],
         [(4.5, x + 0.5) for x in range(9, 11)],
         [(4.5, x + 0.5) for x in range(4, 7)],
         [(6.5, x + 0.5) for x in range(4, 7)],
         [(7.5, x + 0.5) for x in range(0, 3)],
         [(7.5, x + 0.5) for x in range(8, 11)],
         [(8.5, x + 0.5) for x in range(0, 2)],
         [(8.5, x + 0.5) for x in range(3, 6)],
         [(8.5, x + 0.5) for x in range(6, 8)],
         [(8.5, x + 0.5) for x in range(8, 10)]
        ]

fig = create_world_view(n_rows, n_cols, objs_states, walls)
fig.show()
print(np.array(walls, dtype=object))

<IPython.core.display.Javascript object>

[list([(0.5, 0.5), (0.5, 1.5), (0.5, 2.5), (0.5, 3.5), (0.5, 4.5), (0.5, 5.5), (0.5, 6.5), (0.5, 7.5), (0.5, 8.5), (0.5, 9.5), (0.5, 10.5)])
 list([(10.5, 0.5), (10.5, 1.5), (10.5, 2.5), (10.5, 3.5), (10.5, 4.5), (10.5, 5.5), (10.5, 6.5), (10.5, 7.5), (10.5, 8.5), (10.5, 9.5), (10.5, 10.5)])
 list([(0.5, 0.5), (1.5, 0.5), (2.5, 0.5), (3.5, 0.5), (4.5, 0.5), (5.5, 0.5), (6.5, 0.5), (7.5, 0.5), (8.5, 0.5), (9.5, 0.5), (10.5, 0.5)])
 list([(0.5, 10.5), (1.5, 10.5), (2.5, 10.5), (3.5, 10.5), (4.5, 10.5), (5.5, 10.5), (6.5, 10.5), (7.5, 10.5), (8.5, 10.5), (9.5, 10.5), (10.5, 10.5)])
 list([(4.5, 6.5), (5.5, 6.5), (6.5, 6.5)])
 list([(4.5, 4.5), (5.5, 4.5), (6.5, 4.5)])
 list([(3.5, 2.5), (4.5, 2.5), (5.5, 2.5)]) list([(6.5, 2.5), (7.5, 2.5)])
 list([(8.5, 1.5), (9.5, 1.5)]) list([(0.5, 3.5), (1.5, 3.5)])
 list([(8.5, 3.5), (9.5, 3.5), (10.5, 3.5)])
 list([(0.5, 4.5), (1.5, 4.5), (2.5, 4.5)])
 list([(0.5, 7.5), (1.5, 7.5), (2.5, 7.5)])
 list([(8.5, 7.5), (9.5, 7.5), (10.5, 7.5)])
 list([(0.