In [13]:
# import
import numpy as np
import gym
from gym import spaces
import random

import collections
import copy
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

In [14]:
class Grid(gym.Env):
    metadata = {'render.modes': ['console']}
    # action id
    XM = 0 # x minus
    XP = 1 # x plus
    YM = 2 # y minus
    YP = 3 # y plus
    
    def __init__(self, x_size=5):
        super(Grid, self).__init__()
        
        # size of 2D grid
        self.x_size = x_size
        
        # initialize the position of the agent
        self.init_agent()
        
        # define action space
        n_actions = 4 # LEFT, RIGHT, TOP, BOTTOM
        self.action_space = spaces.Discrete(n_actions)
        
        # define observation space (x and y coordinates)
        self.obs_low = np.zeros(2)
        self.obs_high = np.ones(2) * (self.x_size - 1)
        self.observation_space = spaces.Box(self.obs_low, self.obs_high)
    
    def init_agent(self, initial_pos=None):
        if initial_pos is not None:
            self.agent_pos = initial_pos
        else:
            self.agent_pos = [0, 0]
            while True:
                self.agent_pos[0] = random.randrange(0, self.x_size)
                self.agent_pos[1] = random.randrange(0, self.x_size)
                if self.agent_pos[0] != self.x_size -1 and self.agent_pos[1] != self.x_size - 1:
                    break

    def get_agent_obs(self):
        pos_x  = copy.deepcopy(self.agent_pos[0])
        pos_y  = copy.deepcopy(self.agent_pos[1])

        return [pos_x, pos_y]

    def reset(self, initial_pos=None):
        # initialize the position of the agent
        self.init_agent(initial_pos)

        return self.get_agent_obs()
        
    def step(self, action): # i: index of the drone
        # original position
        org_x  = copy.deepcopy(self.agent_pos[0])
        org_y  = copy.deepcopy(self.agent_pos[1])

        # move the agent
        if action == self.XM:
            self.agent_pos[0] -= 1
        elif action == self.XP:
            self.agent_pos[0] += 1
        elif action == self.YM:
            self.agent_pos[1] -= 1
        elif action == self.YP:
            self.agent_pos[1] += 1
        else:
            raise ValueError("Received invalid action={} which is not part of the action space".format(action))
        
        # account for the boundaries of the grid (-2: out of the grid)
        if self.agent_pos[0] > self.x_size - 1 or self.agent_pos[0] < 0 or self.agent_pos[1] > self.x_size - 1 or self.agent_pos[1] < 0:
            self.agent_pos[0] = org_x
            self.agent_pos[1] = org_y

        if self.agent_pos[0] == self.x_size - 1 and self.agent_pos[1] == self.x_size - 1:
            reward = 100
            done = True
        else:
            reward = -1
            done = False
        
        return self.get_agent_obs(), reward, done

    def close(self):
        pass

In [15]:
class QTables():
    def __init__(self, observation_space, action_space, eps_start=1, eps_end=0.1, gamma=0.9, r=0.99, lr=0.1):
        self.observation_space = observation_space
        self.observation_length = observation_space.shape[0]
        self.size = int(self.observation_space.high[0] - self.observation_space.low[0]) + 1

        self.action_space = action_space
        self.action_values = [0, 1, 2, 3] # corresponding to the column numbers in q table
        self.action_num = len(self.action_values) # 4

        self.eps = eps_start  # current epsilon
        self.eps_end = eps_end # epsilon lower bound
        self.r = r  # decrement rate of epsilon
        self.gamma = gamma  # discount rate
        self.lr = lr  # learning rate

        self.q_table = np.zeros([self.size**2, self.action_num])

    # support function: convert the fov to the unique row number in the q table
    def obs_to_row(self, obs_array):
        return obs_array[0] * self.size + obs_array[1]
    
    def get_action(self, obs):
        if np.random.rand() < self.eps:
            action = random.choice(self.action_values)
            greedy = False
        else:
            obs_row = self.obs_to_row(obs)
            action = np.argmax(self.q_table[obs_row])
            greedy = True
        
        return action, greedy
    
    def update_eps(self):
        # update the epsilon
        if self.eps > self.eps_end: # lower bound
            self.eps *= self.r

    def train(self, obs, obs_next, action, reward, done):
        obs_row = self.obs_to_row(obs)
        obs_next_row = self.obs_to_row(obs_next)

        q_current = self.q_table[obs_row][action] # current q value
        q_next_max = np.max(self.q_table[obs_next_row]) # the maximum q value in the next state

        # update the q value
        if done:
            self.q_table[obs_row][action] = q_current + self.lr * reward
        else:
            self.q_table[obs_row][action] = q_current + self.lr * (reward + self.gamma * q_next_max - q_current)

### Fixed Initial Position

#### 3 x 3

In [16]:
# records for each episode
time_steps = [] # number of time steps in total
epsilons = [] # epsilon at the end of each episode
greedy = [] # the ratio of greedy choices
trajectory = []

q_class = []

# parameters for training
train_episodes = 500
size = 3
max_steps = size * 5

# initialize the environment and the q tables
env = Grid(x_size=size)
q = QTables(observation_space=env.observation_space, action_space=env.action_space, eps_start=1, eps_end=0, gamma=0.5, r=0.99, lr=0.01)

# training
for episode in range(train_episodes):
    env.reset([0, 0])
    state = env.get_agent_obs()
    eps_tmp = q.eps

    greedy_count = 0
    epi_trajectory = []
    epi_trajectory.append(env.get_agent_obs())

    for step in range(max_steps):
        action, greedy_tf = q.get_action(obs=state)
        next_state, reward, done = env.step(action)
        q.train(state, next_state, action, reward, done)

        greedy_count += greedy_tf * 1
        epi_trajectory.append(env.get_agent_obs())

        if done:
            break
    
        # update the observation
        state = next_state

    # record
    time_steps.append(len(epi_trajectory)-1)
    epsilons.append(eps_tmp)
    greedy.append(greedy_count / (step + 1))
    q_class.append(copy.deepcopy(q))
    trajectory.append(epi_trajectory)

    # update epsilon
    q.update_eps()

    print(episode, time_steps[episode], epsilons[episode], greedy[episode], trajectory[episode])



0 15 1 0.0 [[0, 0], [0, 1], [1, 1], [2, 1], [2, 0], [1, 0], [2, 0], [2, 0], [2, 0], [2, 0], [2, 0], [1, 0], [0, 0], [1, 0], [1, 0], [1, 0]]
1 15 0.99 0.0 [[0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 1], [0, 1], [0, 0], [0, 1], [0, 0], [0, 1], [0, 2], [0, 1], [0, 0], [0, 1]]
2 8 0.9801 0.0 [[0, 0], [0, 1], [0, 1], [1, 1], [0, 1], [1, 1], [2, 1], [2, 1], [2, 2]]
3 6 0.9702989999999999 0.0 [[0, 0], [1, 0], [1, 0], [1, 0], [1, 1], [1, 2], [2, 2]]
4 15 0.96059601 0.0 [[0, 0], [1, 0], [0, 0], [0, 1], [0, 1], [0, 1], [0, 2], [0, 1], [0, 1], [0, 1], [0, 2], [1, 2], [0, 2], [0, 2], [0, 2], [0, 1]]
5 15 0.9509900498999999 0.06666666666666667 [[0, 0], [0, 0], [0, 0], [0, 0], [0, 1], [0, 2], [0, 2], [0, 2], [1, 2], [0, 2], [0, 2], [0, 1], [0, 0], [1, 0], [1, 1], [1, 2]]
6 15 0.9414801494009999 0.06666666666666667 [[0, 0], [0, 0], [0, 0], [0, 0], [0, 1], [0, 1], [0, 0], [1, 0], [2, 0], [1, 0], [1, 0], [2, 0], [2, 0], [2, 1], [2, 0], [2, 1]]
7 14 0.9320653479069899 0.07142857142857142 [[0, 0

In [17]:
idx3 = []
for i in range(3):
    for j in range(3):
        idx3.append((i, j))

idx4 = []
for i in range(4):
    for j in range(4):
        idx4.append((i, j))

idx5 = []
for i in range(5):
    for j in range(5):
        idx5.append((i, j))

In [24]:
df = pd.DataFrame(q_class[300].q_table, index=idx3)
df

Unnamed: 0,0,1,2,3
"(0, 0)",-0.616475,-0.574515,-0.628523,3.906466
"(0, 1)",-0.27468,20.841861,-0.285174,-0.07349
"(0, 2)",-0.127688,8.891365,-0.108486,-0.105287
"(1, 0)",-0.222064,-0.212303,-0.251628,1.827693
"(1, 1)",-0.090085,1.227912,-0.154109,79.288805
"(1, 2)",-0.027174,252.0,1.104406,5.636236
"(2, 0)",-0.089643,-0.07636,-0.076366,0.78458
"(2, 1)",-0.053394,0.060592,-0.064591,28.0
"(2, 2)",0.0,0.0,0.0,0.0


### 4 x 4

In [7]:
# records for each episode
time_steps = [] # number of time steps in total
epsilons = [] # epsilon at the end of each episode
greedy = [] # the ratio of greedy choices
trajectory = []

q_class = []

# parameters for training
train_episodes = 1000
size = 4
max_steps = size * 5

# initialize the environment and the q tables
env = Grid(x_size=size)
q = QTables(observation_space=env.observation_space, action_space=env.action_space, eps_start=1, eps_end=0, gamma=0.5, r=0.99, lr=0.1)

# training
for episode in range(train_episodes):
    env.reset([0, 0])
    state = env.get_agent_obs()
    eps_tmp = q.eps

    greedy_count = 0
    epi_trajectory = []
    epi_trajectory.append(env.get_agent_obs())

    for step in range(max_steps):
        action, greedy_tf = q.get_action(obs=state)
        next_state, reward, done = env.step(action)
        q.train(state, next_state, action, reward, done)

        greedy_count += greedy_tf * 1
        epi_trajectory.append(env.get_agent_obs())

        if done:
            break
    
        # update the observation
        state = next_state

    # record
    time_steps.append(len(epi_trajectory)-1)
    epsilons.append(eps_tmp)
    greedy.append(greedy_count / (step + 1))
    q_class.append(copy.deepcopy(q))
    trajectory.append(epi_trajectory)

    # update epsilon
    q.update_eps()

    print(episode, time_steps[episode], epsilons[episode], greedy[episode], trajectory[episode])



0 20 1 0.0 [[0, 0], [1, 0], [2, 0], [3, 0], [3, 0], [3, 1], [3, 0], [3, 0], [3, 0], [2, 0], [2, 1], [1, 1], [2, 1], [3, 1], [3, 2], [3, 1], [3, 0], [3, 0], [3, 0], [3, 0], [3, 0]]
1 20 0.99 0.0 [[0, 0], [0, 1], [1, 1], [1, 2], [2, 2], [3, 2], [3, 2], [3, 2], [3, 1], [3, 1], [3, 1], [3, 1], [2, 1], [2, 2], [2, 1], [1, 1], [1, 0], [0, 0], [0, 0], [0, 0], [0, 0]]
2 20 0.9801 0.0 [[0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 1], [0, 1], [1, 1], [1, 2], [1, 3], [0, 3], [1, 3], [1, 2], [2, 2], [2, 1]]
3 14 0.9702989999999999 0.07142857142857142 [[0, 0], [0, 0], [0, 0], [1, 0], [1, 0], [1, 1], [2, 1], [1, 1], [2, 1], [2, 0], [3, 0], [3, 1], [3, 1], [3, 2], [3, 3]]
4 20 0.96059601 0.0 [[0, 0], [0, 0], [0, 1], [1, 1], [0, 1], [0, 0], [0, 1], [0, 2], [0, 1], [0, 2], [0, 2], [0, 2], [0, 2], [0, 3], [0, 3], [0, 3], [1, 3], [1, 2], [1, 1], [1, 2], [1, 3]]
5 20 0.9509900498999999 0.0 [[0, 0], [1, 0], [2, 0], [2, 0], [3, 0], [3, 0], [3, 1], [3, 2], [3, 2

### 5 x 5

In [8]:
# records for each episode
time_steps = [] # number of time steps in total
epsilons = [] # epsilon at the end of each episode
greedy = [] # the ratio of greedy choices
trajectory = []

q_class = []

# parameters for training
train_episodes = 1000
size = 5
max_steps = size * 5

# initialize the environment and the q tables
env = Grid(x_size=size)
q = QTables(observation_space=env.observation_space, action_space=env.action_space, eps_start=1, eps_end=0, gamma=0.5, r=0.99, lr=0.1)

# training
for episode in range(train_episodes):
    env.reset([0, 0])
    state = env.get_agent_obs()
    eps_tmp = q.eps

    greedy_count = 0
    epi_trajectory = []
    epi_trajectory.append(env.get_agent_obs())

    for step in range(max_steps):
        action, greedy_tf = q.get_action(obs=state)
        next_state, reward, done = env.step(action)
        q.train(state, next_state, action, reward, done)

        greedy_count += greedy_tf * 1
        epi_trajectory.append(env.get_agent_obs())

        if done:
            break
    
        # update the observation
        state = next_state

    # record
    time_steps.append(len(epi_trajectory)-1)
    epsilons.append(eps_tmp)
    greedy.append(greedy_count / (step + 1))
    q_class.append(copy.deepcopy(q))
    trajectory.append(epi_trajectory)

    # update epsilon
    q.update_eps()

    print(episode, time_steps[episode], epsilons[episode], greedy[episode], trajectory[episode])



0 13 1 0.0 [[0, 0], [1, 0], [1, 0], [2, 0], [2, 1], [3, 1], [4, 1], [4, 1], [3, 1], [3, 2], [3, 3], [4, 3], [4, 3], [4, 4]]
1 25 0.99 0.04 [[0, 0], [0, 0], [1, 0], [0, 0], [0, 0], [1, 0], [1, 0], [1, 0], [1, 1], [2, 1], [2, 0], [2, 0], [3, 0], [2, 0], [2, 1], [2, 2], [3, 2], [4, 2], [3, 2], [3, 1], [2, 1], [1, 1], [0, 1], [0, 1], [0, 1], [0, 0]]
2 25 0.9801 0.0 [[0, 0], [1, 0], [2, 0], [2, 0], [2, 1], [2, 0], [3, 0], [2, 0], [2, 1], [2, 2], [2, 1], [2, 0], [1, 0], [0, 0], [0, 0], [1, 0], [2, 0], [1, 0], [1, 0], [1, 1], [1, 0], [0, 0], [0, 0], [0, 1], [1, 1], [1, 2]]
3 19 0.9702989999999999 0.0 [[0, 0], [0, 1], [0, 1], [0, 0], [1, 0], [2, 0], [3, 0], [4, 0], [4, 0], [4, 0], [4, 1], [4, 2], [3, 2], [3, 3], [3, 2], [3, 1], [3, 2], [3, 3], [3, 4], [4, 4]]
4 25 0.96059601 0.0 [[0, 0], [0, 0], [0, 0], [0, 1], [0, 1], [0, 0], [1, 0], [2, 0], [3, 0], [2, 0], [1, 0], [1, 0], [1, 1], [1, 0], [1, 0], [2, 0], [2, 0], [2, 0], [2, 0], [3, 0], [4, 0], [4, 0], [4, 0], [4, 0], [4, 0], [3, 0]]
5 25 0.95

### Random Initial Position

#### 3 x 3

In [10]:
# records for each episode
time_steps = [] # number of time steps in total
epsilons = [] # epsilon at the end of each episode
greedy = [] # the ratio of greedy choices
trajectory = []

q_class = []

# parameters for training
train_episodes = 1000
size = 3
max_steps = size * 5

# initialize the environment and the q tables
env = Grid(x_size=size)
q = QTables(observation_space=env.observation_space, action_space=env.action_space, eps_start=1, eps_end=0, gamma=0.5, r=0.99, lr=0.1)

# training
for episode in range(train_episodes):
    env.reset()
    state = env.get_agent_obs()
    eps_tmp = q.eps

    greedy_count = 0
    epi_trajectory = []
    epi_trajectory.append(env.get_agent_obs())

    for step in range(max_steps):
        action, greedy_tf = q.get_action(obs=state)
        next_state, reward, done = env.step(action)
        q.train(state, next_state, action, reward, done)

        greedy_count += greedy_tf * 1
        epi_trajectory.append(env.get_agent_obs())

        if done:
            break
    
        # update the observation
        state = next_state

    # record
    time_steps.append(len(epi_trajectory)-1)
    epsilons.append(eps_tmp)
    greedy.append(greedy_count / (step + 1))
    q_class.append(copy.deepcopy(q))
    trajectory.append(epi_trajectory)

    # update epsilon
    q.update_eps()

    print(episode, time_steps[episode], epsilons[episode], greedy[episode], trajectory[episode])



0 13 1 0.0 [[1, 1], [0, 1], [1, 1], [0, 1], [0, 0], [0, 0], [0, 0], [0, 0], [1, 0], [1, 1], [1, 2], [1, 1], [2, 1], [2, 2]]
1 15 0.99 0.0 [[1, 0], [2, 0], [2, 0], [2, 0], [2, 0], [2, 0], [2, 0], [1, 0], [1, 0], [0, 0], [0, 0], [0, 0], [0, 0], [1, 0], [1, 1], [0, 1]]
2 15 0.9801 0.0 [[1, 0], [1, 1], [1, 0], [2, 0], [2, 0], [2, 0], [2, 0], [2, 1], [2, 0], [2, 0], [2, 0], [1, 0], [2, 0], [2, 0], [1, 0], [1, 1]]
3 15 0.9702989999999999 0.0 [[1, 1], [1, 2], [1, 1], [0, 1], [0, 0], [0, 0], [1, 0], [1, 1], [1, 2], [1, 1], [1, 0], [2, 0], [2, 0], [2, 0], [2, 0], [2, 0]]
4 15 0.96059601 0.06666666666666667 [[1, 0], [0, 0], [0, 0], [0, 1], [0, 2], [0, 2], [0, 2], [0, 2], [0, 1], [0, 0], [0, 1], [1, 1], [1, 0], [2, 0], [2, 0], [2, 1]]
5 8 0.9509900498999999 0.5 [[1, 0], [1, 1], [2, 1], [2, 1], [1, 1], [1, 2], [0, 2], [1, 2], [2, 2]]
6 15 0.9414801494009999 0.06666666666666667 [[0, 1], [0, 0], [1, 0], [1, 1], [1, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 1], [0, 0], [0, 1], [0, 0], [1, 0], [

#### 4 x 4

In [11]:
# records for each episode
time_steps = [] # number of time steps in total
epsilons = [] # epsilon at the end of each episode
greedy = [] # the ratio of greedy choices
trajectory = []

q_class = []

# parameters for training
train_episodes = 1000
size = 4
max_steps = size * 5

# initialize the environment and the q tables
env = Grid(x_size=size)
q = QTables(observation_space=env.observation_space, action_space=env.action_space, eps_start=1, eps_end=0, gamma=0.5, r=0.99, lr=0.1)

# training
for episode in range(train_episodes):
    env.reset()
    state = env.get_agent_obs()
    eps_tmp = q.eps

    greedy_count = 0
    epi_trajectory = []
    epi_trajectory.append(env.get_agent_obs())

    for step in range(max_steps):
        action, greedy_tf = q.get_action(obs=state)
        next_state, reward, done = env.step(action)
        q.train(state, next_state, action, reward, done)

        greedy_count += greedy_tf * 1
        epi_trajectory.append(env.get_agent_obs())

        if done:
            break
    
        # update the observation
        state = next_state

    # record
    time_steps.append(len(epi_trajectory)-1)
    epsilons.append(eps_tmp)
    greedy.append(greedy_count / (step + 1))
    q_class.append(copy.deepcopy(q))
    trajectory.append(epi_trajectory)

    # update epsilon
    q.update_eps()

    print(episode, time_steps[episode], epsilons[episode], greedy[episode], trajectory[episode])



0 2 1 0.0 [[2, 2], [2, 3], [3, 3]]
1 20 0.99 0.0 [[1, 0], [1, 0], [2, 0], [2, 0], [2, 1], [2, 0], [2, 0], [2, 0], [1, 0], [0, 0], [0, 0], [0, 0], [0, 1], [0, 0], [0, 0], [0, 0], [1, 0], [0, 0], [0, 0], [1, 0], [0, 0]]
2 20 0.9801 0.05 [[0, 1], [0, 2], [0, 1], [0, 0], [0, 0], [0, 0], [1, 0], [2, 0], [3, 0], [2, 0], [2, 0], [2, 1], [2, 2], [2, 1], [2, 0], [1, 0], [2, 0], [2, 0], [3, 0], [3, 0], [3, 1]]
3 6 0.9702989999999999 0.16666666666666666 [[2, 2], [1, 2], [1, 3], [0, 3], [1, 3], [2, 3], [3, 3]]
4 20 0.96059601 0.0 [[0, 1], [1, 1], [1, 2], [0, 2], [0, 2], [1, 2], [1, 1], [2, 1], [2, 0], [2, 0], [2, 1], [1, 1], [1, 0], [1, 1], [0, 1], [0, 1], [0, 0], [0, 0], [0, 0], [1, 0], [1, 0]]
5 17 0.9509900498999999 0.0 [[1, 0], [1, 0], [2, 0], [3, 0], [3, 0], [3, 0], [3, 0], [2, 0], [1, 0], [1, 0], [0, 0], [1, 0], [2, 0], [3, 0], [3, 0], [3, 1], [3, 2], [3, 3]]
6 20 0.9414801494009999 0.0 [[1, 0], [0, 0], [0, 1], [1, 1], [1, 0], [2, 0], [3, 0], [3, 0], [3, 0], [3, 1], [3, 2], [2, 2], [2, 1], [

#### 5 x 5

In [12]:
# records for each episode
time_steps = [] # number of time steps in total
epsilons = [] # epsilon at the end of each episode
greedy = [] # the ratio of greedy choices
trajectory = []

q_class = []

# parameters for training
train_episodes = 1000
size = 5
max_steps = size * 5

# initialize the environment and the q tables
env = Grid(x_size=size)
q = QTables(observation_space=env.observation_space, action_space=env.action_space, eps_start=1, eps_end=0, gamma=0.5, r=0.99, lr=0.1)

# training
for episode in range(train_episodes):
    env.reset()
    state = env.get_agent_obs()
    eps_tmp = q.eps

    greedy_count = 0
    epi_trajectory = []
    epi_trajectory.append(env.get_agent_obs())

    for step in range(max_steps):
        action, greedy_tf = q.get_action(obs=state)
        next_state, reward, done = env.step(action)
        q.train(state, next_state, action, reward, done)

        greedy_count += greedy_tf * 1
        epi_trajectory.append(env.get_agent_obs())

        if done:
            break
    
        # update the observation
        state = next_state

    # record
    time_steps.append(len(epi_trajectory)-1)
    epsilons.append(eps_tmp)
    greedy.append(greedy_count / (step + 1))
    q_class.append(copy.deepcopy(q))
    trajectory.append(epi_trajectory)

    # update epsilon
    q.update_eps()

    print(episode, time_steps[episode], epsilons[episode], greedy[episode], trajectory[episode])



0 25 1 0.0 [[2, 0], [3, 0], [3, 1], [3, 0], [4, 0], [4, 1], [4, 2], [4, 1], [4, 1], [4, 1], [4, 2], [3, 2], [3, 3], [3, 2], [3, 3], [3, 2], [3, 1], [2, 1], [3, 1], [3, 0], [4, 0], [3, 0], [3, 0], [2, 0], [2, 0], [3, 0]]
1 25 0.99 0.0 [[1, 1], [1, 0], [0, 0], [0, 0], [0, 0], [0, 0], [1, 0], [1, 0], [1, 0], [1, 1], [0, 1], [1, 1], [2, 1], [1, 1], [1, 0], [2, 0], [3, 0], [3, 1], [2, 1], [3, 1], [3, 0], [2, 0], [2, 0], [1, 0], [2, 0], [3, 0]]
2 25 0.9801 0.04 [[0, 3], [0, 3], [1, 3], [1, 4], [0, 4], [0, 4], [0, 3], [1, 3], [1, 2], [2, 2], [2, 1], [2, 0], [2, 0], [2, 0], [2, 1], [1, 1], [1, 2], [1, 3], [2, 3], [2, 2], [1, 2], [1, 1], [1, 2], [2, 2], [2, 1], [2, 0]]
3 25 0.9702989999999999 0.04 [[0, 0], [0, 1], [0, 2], [0, 3], [1, 3], [2, 3], [1, 3], [2, 3], [2, 2], [2, 3], [2, 4], [2, 4], [1, 4], [1, 3], [0, 3], [1, 3], [1, 4], [2, 4], [2, 3], [2, 2], [3, 2], [4, 2], [3, 2], [2, 2], [1, 2], [1, 1]]
4 11 0.96059601 0.0 [[2, 1], [2, 2], [2, 3], [1, 3], [1, 4], [1, 4], [2, 4], [2, 4], [2, 3], 