Assumptions:
- The number of blocks in the world == number of blocks needed in the locations
- Blocks won't move in the map, unless picked up
- Once a block has been picked and placed on a location, it will not be picked up again
- A block picked up, will be dropped at a location
- The other locobot can observe the world same as us (no partial observability)
- The other locobot is also rational 
- The time taken to pick and drop block is the proportional to distance -> Modelling it as SMDP
- Robots will not collide with each other
- Belief update is instantaneous upon action selection
- Computing certainty by looking at current configuration, and action selected. Not accounting distance

In [1]:
import numpy as np
import itertools
import copy

In [2]:
station_locations_xy_rad = np.matrix([[-0.5, 1.0, 0.15],
[0.5, 1.25, 0.2],
[1.0, 0.0, 0.3],
[0.0, -0.75, 0.15]])

target_config_rows_rgby_cols_station_ABC = np.matrix([[2,1,0],
                                                       [1,0,1],
                                                       [1,1,1],
                                                       [1,1,0]])
robot_1_colors = np.matrix([1,1,0,0]).T #RGBY
robot_2_colors = np.matrix([0,0,1,1]).T #RGBY

info = [station_locations_xy_rad, target_config_rows_rgby_cols_station_ABC, robot_1_colors, robot_2_colors]

dict_labels = ["station_locations","target_config","robot_1_colors","robot_2_colors"]
info_dictionary = dict(zip(dict_labels,info))
print(info_dictionary)

{'station_locations': matrix([[-0.5 ,  1.  ,  0.15],
        [ 0.5 ,  1.25,  0.2 ],
        [ 1.  ,  0.  ,  0.3 ],
        [ 0.  , -0.75,  0.15]]), 'target_config': matrix([[2, 1, 0],
        [1, 0, 1],
        [1, 1, 1],
        [1, 1, 0]]), 'robot_1_colors': matrix([[1],
        [1],
        [0],
        [0]]), 'robot_2_colors': matrix([[0],
        [0],
        [1],
        [1]])}


In [3]:
# Based on assumption that number of cubes == number of cubes in config (no extra cubes)
num_cubes = sum(target_config_rows_rgby_cols_station_ABC.A1)
field_limits = 10 # field is +/- 10 on each direction (x-y)
robot_1_starting_location = (-10,0)
robot_2_starting_location = (10, 0)
# cubes_location = [np.random.rand(*(1,1)) for 
cubes_location = np.random.uniform(low=-field_limits, high=field_limits, size=(num_cubes,2))

In [4]:
class Cube(object):
  def __init__(self, location, color):
    self.color = color
    self.location = location
num_color_dict =  {}
num_color_dict[target_config_rows_rgby_cols_station_ABC.sum(axis=1).A1[:1].sum()] = 0 # red
num_color_dict[target_config_rows_rgby_cols_station_ABC.sum(axis=1).A1[:2].sum()] = 1 # greem
num_color_dict[target_config_rows_rgby_cols_station_ABC.sum(axis=1).A1[:3].sum()] = 2 # blue
num_color_dict[target_config_rows_rgby_cols_station_ABC.sum(axis=1).A1[:].sum()] = 3 # yellow

num_color_dict
color_list = [0,1,2,3] # ['red', 'green','blue', 'yellow']
cubes = []
for i, loc in enumerate(cubes_location):
  color_ind = np.where(i < target_config_rows_rgby_cols_station_ABC.sum(axis=1).A1.cumsum())[0][0]
  cubes.append(Cube(color=color_list[color_ind], location=loc))

for i, cube in enumerate(cubes):
  print(i,cube.color, cube.location)

0 0 [0.25269175 0.09634199]
1 0 [-0.23704383 -4.66899869]
2 0 [-7.80990593  2.54056133]
3 1 [-5.18772992 -7.01582824]
4 1 [ 5.84017722 -2.80077935]
5 2 [ 1.01797033 -5.42664637]
6 2 [-0.82036645  1.55479731]
7 2 [6.02492188 2.25782726]
8 3 [7.35320455 2.38748904]
9 3 [-1.43870025  3.30008733]


### State: current config of the 4 locations

### Action: Block to be picked up. Once picked up, action is not available
### Belief: Probability of the location being base station

In [8]:
class Env(object):
    """MDP for a tree with a discrete actions."""

    def __init__(self, 
                 cubes,
                 robot_1_colors,
                 robot_2_colors, 
                 robot_1_starting_location, 
                 robot_2_starting_location, 
                 target_config_rows_rgby_cols_station_ABC, 
                 station_locations_xy_rad,
                 leader=None):
      self.cubes = cubes
      self.robot_1_colors = np.array(color_list)[robot_1_colors.A1 == 1]
      self.robot_2_colors = np.array(color_list)[robot_2_colors.A1 == 1]
      self.robot_1_starting_location = robot_1_starting_location
      self.robot_2_starting_location = robot_2_starting_location
      # Number of stations
      self.number_stations = target_config_rows_rgby_cols_station_ABC.shape[1]
      # NUmber of locations
      self.number_locations = station_locations_xy_rad.shape[0]
      # Creating a mapping of stations to number and vice-versa
      self.num2station = {}
      self.station2num = {}
      for i in range(self.number_stations):
        self.num2station[i] = chr(ord('A') + i)
        self.station2num[chr(ord('A') + i)] = i
      # Setting target config up
      self.target_config = target_config_rows_rgby_cols_station_ABC.T
      
      # Coordinates of the possible locations
      self.station_locations = station_locations_xy_rad

      # Creates a map of stations to locations based on current belief
      self.station_2_location_map = {}
      for i in self.num2station:
        self.station_2_location_map[self.num2station[i]] = None


      self.robot_1_current_location = self.robot_1_starting_location
      self.robot_2_current_location = self.robot_2_starting_location
      self.robot_1_actions = self.get_actions(self.robot_1_colors)
      self.robot_2_actions = self.get_actions(self.robot_2_colors)
      print("Robot 1 actions:", self.robot_1_actions)
      print("Robot 2 actions:", self.robot_2_actions)

      # Belief is the Categorical Distribution of Station being at locations
      self.belief = self.init_belief()
      print("Belief: ", self.belief)
      # self.print_belief()

      # State of the 4 locations, which is used to build belief
      self.state = self.init_state()

      self.current_config = np.matrix([np.zeros(4),
                                       np.zeros(4),
                                       np.zeros(4)])
      
      self.color2num = {'red': 0, 'green': 1, 'blue': 2, 'yellow': 3}
      self.num2color = {0: 'red', 1: 'green', 2: 'blue', 3:'yellow'}

      print("Init state:", self.state)
      print("Init config: ", self.current_config)
      self.leader = leader
      self.dt = 1

    def get_actions(self, color_assigned):
      actions = []
      for i, cube in enumerate(self.cubes):
        if(cube.color in color_assigned):
          actions.append(i)
      return actions
          
    def init_state(self):
      return np.zeros((self.number_locations, 4))
    
    def init_belief(self):
      # SUm of each column = 1
      belief = (1/self.number_locations) *np.ones((self.number_locations, self.number_stations))
      # for i in self.num2station:
      #   belief[self.num2station[i]] = Categorical([i for i in range(self.number_locations)])
      return belief
    
    def euclidean_distance(self, loc_1, loc_2):
      # print(loc_1, loc_2)
      return np.linalg.norm(loc_1 - loc_2)
    
    def get_min_dist_cube_of_color(self, color, robot_location, possible_actions, goal_location):
      distances = {}
      for i in possible_actions:
        if self.cubes[i].color == color:
          distances[i] = self.euclidean_distance(robot_location, self.cubes[i].location) + self.euclidean_distance(goal_location, self.cubes[i].location)
      # print("Distances:", distances)
      dist_list = [distances[idx] for idx in distances]
      idx_list = [idx for idx in distances]
      return idx_list[np.argmin(dist_list)], np.min(dist_list)

    def possible_check(self, location_state, target_config):
      return (location_state <= target_config).all()

    def get_all_possible_action_station(self, robot_colors, station_2_location_map):
      possible_action_station_list = []
      for station in station_2_location_map.keys():
        for color in robot_colors:
          # print("Simulating placing a block of color {} in station {}".format(self.num2color[color], station))
          pseudo_state = self.state[station_2_location_map[station]].copy()
          pseudo_state[color] += 1
          if self.possible_check(pseudo_state, self.target_config[self.station2num[station],:].A1):
            possible_action_station_list.append((color, station))
      return possible_action_station_list
    
    def get_color_station_for_min_certainty(self, robot_colors, station_2_location_map):
      # print("station_2_location_map:", station_2_location_map)
      # print("Robot colors:", robot_colors)
      # print("state:", self.state)
      possible_action_station_list = self.get_all_possible_action_station(robot_colors, station_2_location_map)
      # Choose the action with the change in belief
      min_diff = np.inf
      min_diff_action = None
      for action in possible_action_station_list:
        # print("Action", action)
        color, station = action
        location = station_2_location_map[station]

        pseudo_belief = self.belief.copy()
        assert id(pseudo_belief) != id(self.belief)
        pseudo_state = self.state.copy()
        pseudo_state[location][color] += 1

        psuedo_belief = self.update_belief(pseudo_belief, pseudo_state)
        diff = psuedo_belief - self.belief
        diff_mag = np.linalg.norm(diff.flatten())
        # print("Diff:", diff_mag)

        if diff_mag < min_diff:
          min_diff = diff_mag
          min_diff_action = action
      return min_diff_action

    def get_color_station_for_max_certainty(self, robot_colors, station_2_location_map):
      # print("station_2_location_map:", station_2_location_map)
      # print("Robot colors:", robot_colors)
      # print("state:", self.state)
      possible_action_station_list = self.get_all_possible_action_station(robot_colors, station_2_location_map)
      # Choose the action with the change in belief
      max_diff = 0
      max_diff_action = None
      for action in possible_action_station_list:
        # print("Action", action)
        color, station = action
        location = station_2_location_map[station]

        pseudo_belief = self.belief.copy()
        assert id(pseudo_belief) != id(self.belief)
        pseudo_state = self.state.copy()
        pseudo_state[location][color] += 1
        # print("Pseudo state")
        # print(pseudo_state)
        # print("Current belief")
        # print(pseudo_belief)
        psuedo_belief = self.update_belief(pseudo_belief, pseudo_state)
        # print("Updated belief")
        # print(pseudo_belief)
        diff = psuedo_belief - self.belief
        diff_mag = np.linalg.norm(diff.flatten())
        # print("Diff:", diff_mag)

        if diff_mag > max_diff:
          max_diff = diff_mag
          max_diff_action = action
      return max_diff_action
    
    def column_lookup(self, column_data, disallowed_rows):
      max_prob = 0
      max_val = None
      for i, prob in enumerate(column_data):
        if i not in disallowed_rows:
          if prob > max_prob:
            max_prob = prob
            max_val = i
      return max_val, max_prob

    def stations_from_belief_one_pass(self, disallowed_locations):
      location_list = []
      location_prob_list = []
      station_list = []
      for station in range(self.belief.shape[1]):
        # print(station)
        # print(f"For station {station}, belief: {self.belief[:,station]}")
        location, location_prob = self.column_lookup(self.belief[:,station], disallowed_locations)
        location_list.append(location)
        location_prob_list.append(location_prob)
        station_list.append(station)
      # print(station_list)

      return location_list, location_prob_list, station_list

    def stations_from_belief(self):
      disallowed_locations = []
      location_list, location_prob_list, station_list = self.stations_from_belief_one_pass(disallowed_locations)      
      # print("location_list:", location_list)
      # print("location_prob_list:", location_prob_list)
      if len(set(location_list)) != self.number_stations: # There are repetitions
      # Sort the location list and station list in terms of probability. idx match each other
        sorted_locations = np.array([x for _, x in sorted(zip(location_prob_list, location_list))])
        sorted_stations = np.array([x for _, x in sorted(zip(location_prob_list, station_list))])
        # print("sorted_location:", sorted_locations)
        # print("sorted_stations:", sorted_stations)
        # Iteratively remove possibilities
        for i in range(1, len(sorted_locations)):
          disallowed_locations.append(sorted_locations[i-1])
          location, location_prob = self.column_lookup(self.belief[sorted_stations[i]], disallowed_locations)
          sorted_locations[i] = location

      else:
        sorted_stations = station_list
        sorted_locations = location_list  
      location_map = {}
      for i in range(len(sorted_locations)):
        location_map[self.num2station[sorted_stations[i]]] = sorted_locations[i]

      return location_map  

    def remove_action(self, action_list, action_taken):
      action_list.remove(action_taken)
      return action_list

    def update_belief(self, belief, state):
      for loc in range(self.number_locations):
        for station in range(self.number_stations):
          # print(f"Station: {station}, location: {loc}")
          likelihood = 1 if self.check_possible(state, loc, station) else 0
          # print(likelihood)
          belief[loc,station] = likelihood * belief[loc,station]
          # If any of the belief in a location is 0, then we need to normalize row-wise
          if (belief[loc,:] == 0).any():
            belief[loc,:] = belief[loc,:]/(sum(belief[loc,:])+1e-6)

      # There is a row which should be zero-ed out
      if sum([sum(state[loc]) > 0 for loc in range(self.number_locations)]) >= self.number_stations:
        for loc in range(self.number_locations):
          if sum(state[loc]) == 0:
            belief[loc,:] = np.zeros(self.number_stations)
      #  Normalize column-wise
      for station in range(self.number_stations):    
        belief[:,station] = belief[:,station]/(sum(belief[:,station]) +1e-6)
        # print("For station:", self.belief[:][station])
      # print(belief)
      return belief

      # for station in range(self.number_stations):
      #   print(sum(self.belief[:,station]))

    def check_possible(self, state, location, station):
      # print(self.target_config)
      location_info = state[location]
      target_info = self.target_config[station,:].A1
      # print(location_info)
      # print(target_info)
      return (location_info <= target_info).all()

    def psuedo_state_update(self, color, location):
      psuedo_state = copy.deepcopy(self.state)
      psuedo_state[location][color] += 1
      return psuedo_state
    
    def update_state(self, color, location):
      # print('color', color)
      # print("location:", location)
      self.state[location][color] +=1

    def update_config(self, color, station):
      self.current_config[self.station2num[station], color] += 1 

    def run(self):
      # In the beginning, coin toss on who's the leader: 0: robot_1, 1: robot_1
      if self.leader is None:
        self.leader = np.random.choice([0,1]) 
      print("Leader:", self.leader+1)

      t = 0
      robot_1_time_left = 0
      robot_2_time_left = 0
      while True:
        print(50*'*')
        print("Curr time:", t)
        print("Curr state:")
        print(self.state)
        print("Current belief:")
        print(self.belief)
        print("Current config: ")
        print(self.current_config)
        print("Target config: ")
        print(self.target_config)
        station_2_location_map = self.stations_from_belief() 
        print(20*'#')

        if robot_1_time_left == 0 and robot_2_time_left == 0: # Both robots are free
          print("Both robots are free")
          if len(self.robot_1_actions)> 0 and len(self.robot_2_actions) > 0 and self.leader == 0: # If robot 1 is leader
            print("Robot 1 is leader and both robots have actions remaining")
            robot_1_cube_color, robot_1_goal = self.get_color_station_for_max_certainty(self.robot_1_colors, station_2_location_map)
            robot_2_cube_color, robot_2_goal = self.get_color_station_for_min_certainty(self.robot_2_colors, station_2_location_map)
          elif len(self.robot_1_actions)> 0 and len(self.robot_2_actions) > 0 and self.leader == 1: # If robot 2 is leader
            print("Robot 2 is leader and both robots have actions remaining")
            robot_1_cube_color, robot_1_goal = self.get_color_station_for_min_certainty(self.robot_1_colors, station_2_location_map)
            robot_2_cube_color, robot_2_goal = self.get_color_station_for_max_certainty(self.robot_2_colors, station_2_location_map)
          elif len(self.robot_1_actions)> 0: # Robot 2 is done with it's task. Only robot 1 left
            print("Robot 2 is done with it's task. Only robot 1 left")
            robot_1_cube_color, robot_1_goal = self.get_color_station_for_max_certainty(self.robot_1_colors, station_2_location_map)
          elif len(self.robot_2_actions)> 0: # Robot 1 is done with it's task. Only robot 2 left
            print("Robot 1 is done with it's task. Only robot 2 left")
            robot_2_cube_color, robot_2_goal = self.get_color_station_for_max_certainty(self.robot_2_colors, station_2_location_map)

        elif robot_1_time_left == 0:
          print("Robot 1 is free")
          if len(self.robot_1_actions)> 0 : # Robot 2 still executing. Robot 1 can act like leader
            print("Robot 1 has actions left")
            robot_1_cube_color, robot_1_goal = self.get_color_station_for_max_certainty(self.robot_1_colors, station_2_location_map)
        elif robot_2_time_left == 0:
          print("Robot 2 is free")
          if len(self.robot_2_actions)> 0:
            print("Robot 2 has actions left")
            robot_2_cube_color, robot_2_goal = self.get_color_station_for_max_certainty(self.robot_2_colors, station_2_location_map)
        else:
          print("ERROR")

        
        station_2_location_map = self.stations_from_belief() 
        print("Station_2_location_map:", station_2_location_map)
        if robot_1_time_left == 0: 
          robot_1_goal_location = station_2_location_map[robot_1_goal]
          print("Possible actions for robot 1: ", self.robot_1_actions)
          # Compute distances of robot from each cube of color and selected goal location 
          robot_1_cube, robot_1_dist = self.get_min_dist_cube_of_color(robot_1_cube_color, self.robot_1_current_location, self.robot_1_actions, robot_1_goal_location)
          print(f"Robot_1 chooses to put cube {robot_1_cube} which is {self.num2color[robot_1_cube_color]} to goal {robot_1_goal} which it assumes is location {robot_1_goal_location}")
          robot_1_time = np.ceil(robot_1_dist)
          robot_1_time_left += robot_1_time

          self.robot_1_location = robot_1_goal_location
          self.robot_1_actions = self.remove_action(self.robot_1_actions, robot_1_cube)
          
          self.update_config(robot_1_cube_color, robot_1_goal)
          self.update_state(robot_1_cube_color, robot_1_goal_location)

        
        if robot_2_time_left == 0:
          robot_2_goal_location = station_2_location_map[robot_2_goal]
          print("Possible actions for robot 2: ", self.robot_2_actions)
          # Compute distances of robot from each cube of color and selected goal location 
          robot_2_cube, robot_2_dist = self.get_min_dist_cube_of_color(robot_2_cube_color, self.robot_2_current_location, self.robot_2_actions, robot_2_goal_location)
          print(f"Robot_2 chooses to put cube {robot_2_cube} which is {self.num2color[robot_2_cube_color]} to goal {robot_2_goal} which it assumes is location {robot_2_goal_location}")
          robot_2_time = np.ceil(robot_2_dist)
          robot_2_time_left += robot_2_time

          self.robot_2_location = robot_2_goal_location
          self.robot_2_location = robot_2_goal_location
          self.robot_2_actions = self.remove_action(self.robot_2_actions, robot_2_cube)
          self.update_config(robot_2_cube_color, robot_2_goal)
          self.update_state(robot_2_cube_color, robot_2_goal_location)

        # print("Updated config: ", self.current_config)
        # print("Updated state: ", self.state)
        self.belief = self.update_belief(self.belief, self.state)
        t_step = min(robot_1_time_left, robot_2_time_left)
        print("Time step: ", t_step)
        robot_1_time_left = max(0, robot_1_time_left - t_step)
        robot_2_time_left = max(0, robot_2_time_left - t_step)
        print("Time left for robot 1: ", robot_1_time_left)
        print("Time left for robot 2: ", robot_2_time_left)

        t += t_step
        if (self.current_config == self.target_config).all():# break condition
          print(50*'*')
          print(50*'*')
          print("Reached desired config")
          print(50*'*')
          print(50*'*')
          break
        
        if len(self.robot_1_actions) == 0 and len(self.robot_2_actions) == 0:
          print("All actions completed")
          break

      print("Final State:")
      print(self.state)
      print("Final config:", self.current_config)
      print("Final belief:", self.belief)


In [9]:
env = Env(cubes,
          robot_1_colors,
          robot_2_colors, 
          robot_1_starting_location, 
          robot_2_starting_location, 
          target_config_rows_rgby_cols_station_ABC, 
          station_locations_xy_rad,
          leader=1)

Robot 1 actions: [0, 1, 2, 3, 4]
Robot 2 actions: [5, 6, 7, 8, 9]
Belief:  [[0.25 0.25 0.25]
 [0.25 0.25 0.25]
 [0.25 0.25 0.25]
 [0.25 0.25 0.25]]
Init state: [[0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]]
Init config:  [[0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]]


In [10]:
env.run()

Leader: 2
**************************************************
Curr time: 0
Curr state:
[[0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]]
Current belief:
[[0.25 0.25 0.25]
 [0.25 0.25 0.25]
 [0.25 0.25 0.25]
 [0.25 0.25 0.25]]
Current config: 
[[0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]]
Target config: 
[[2 1 1 1]
 [1 0 1 1]
 [0 1 1 0]]
####################
Both robots are free
Robot 2 is leader and both robots have actions remaining
Station_2_location_map: {'A': 0, 'B': 1, 'C': 2}
Possible actions for robot 1:  [0, 1, 2, 3, 4]
Robot_1 chooses to put cube 0 which is red to goal B which it assumes is location 1
Possible actions for robot 2:  [5, 6, 7, 8, 9]
Robot_2 chooses to put cube 8 which is yellow to goal A which it assumes is location 0
Time step:  12.0
Time left for robot 1:  0
Time left for robot 2:  0
**************************************************
Curr time: 12.0
Curr state:
[[0. 0. 0. 1.]
 [1. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]]
Current belief:
[[0.33333289 

In [None]:
def update_belief(self,):
  for station in range(self.number_stations):
    for loc in range(self.number_locations):
      # print(f"Station: {station}, location: {loc}")
      likelihood = 1 if check_possible(self, loc, station) else 0
      # print(likelihood)
      self.belief[loc,station] = likelihood * self.belief[loc,station]
    # Normalize
    # print((self.belief[:][station]))
    # print(sum(self.belief[:][station]))  
    self.belief[:,station] = self.belief[:,station]/sum(self.belief[:,station])
    # print("For station:", self.belief[:][station])
  print(self.belief)

  # for station in range(self.number_stations):
  #   print(sum(self.belief[:,station]))
  # Now check for each row, if there is only 

def check_possible(self, location, station):
  # print(self.target_config)
  location_info = self.state[location]
  target_info = self.target_config[station,:].A1
  # print(location_info)
  # print(target_info)
  return (location_info <= target_info).all()
update_belief(env)

In [None]:


cc, station = get_color_station_for_min_certainty(env, env.robot_1_colors)
print(cc, station)
# def get_station_for_color(self, color_selected):
#   sequence = env.target_config[color_selected]-env.current_config[color_selected]
#   print(sequence)
# get_station_for_color(env, cc)

In [90]:
def get_color_station_for_min_certainty(self, robot_colors, station_2_location_map):
  # print("station_2_location_map:", station_2_location_map)
  # print("Robot colors:", robot_colors)
  # print("state:", self.state)
  possible_action_station_list = self.get_all_possible_action_station(robot_colors, station_2_location_map)
  # Choose the action with the change in belief
  min_diff = np.inf
  min_diff_action = None
  for action in possible_action_station_list:
    print("Action", action)
    color, station = action
    location = station_2_location_map[station]

    pseudo_belief = self.belief.copy()
    assert id(pseudo_belief) != id(self.belief)
    pseudo_state = self.state.copy()
    pseudo_state[location][color] += 1

    psuedo_belief = self.update_belief(pseudo_belief, pseudo_state)
    diff = psuedo_belief - self.belief
    diff_mag = np.linalg.norm(diff.flatten())
    # print("Diff:", diff_mag)

    if diff_mag < min_diff:
       min_diff = diff_mag
       min_diff_action = action
  return min_diff_action
  

def get_color_station_for_max_certainty(self, robot_colors, station_2_location_map):
  # print("station_2_location_map:", station_2_location_map)
  # print("Robot colors:", robot_colors)
  # print("state:", self.state)
  possible_action_station_list = self.get_all_possible_action_station(robot_colors, station_2_location_map)
  # Choose the action with the change in belief
  max_diff = 0
  max_diff_action = None
  for action in possible_action_station_list:
    print("Action", action)
    color, station = action
    location = station_2_location_map[station]

    pseudo_belief = self.belief.copy()
    assert id(pseudo_belief) != id(self.belief)
    pseudo_state = self.state.copy()
    pseudo_state[location][color] += 1
    # print("Pseudo state")
    # print(pseudo_state)
    # print("Current belief")
    # print(pseudo_belief)
    psuedo_belief = self.update_belief(pseudo_belief, pseudo_state)
    # print("Updated belief")
    # print(pseudo_belief)
    diff = psuedo_belief - self.belief
    diff_mag = np.linalg.norm(diff.flatten())
    print("Diff:", diff_mag)

    if diff_mag > max_diff:
       max_diff = diff_mag
       max_diff_action = action
  return max_diff_action
    # print("color:", color)
    # print("station:", station)
    # print("location:", station_2_location_map[station])
    # print("belief:", self.belief[location, self.station2num[station]])
    # if self.belief[location, self.station2num[station]] > max_certainty:
    #   max_certainty = self.belief[location, self.station2num[station]]
    #   max_certainty_action = action

  # possible_action_station_list = []
  # for station in self.station_2_location_map.keys():
  #   print("Station:", station)
  #   print("Location:", station_2_location_map[station])
  #   print(self.state[station_2_location_map[station]] )
  #   for color in robot_colors:
  #     pseudo_state = self.state[station_2_location_map[station]].copy()
  #     print("Simulating placing a block of color {} in station {}".format(self.num2color[color], station))
  #     pseudo_state[color] += 1
  #     # print("Pseudo state:", pseudo_state)
  #     # print("Target config:", self.target_config[self.station2num[station],:].A1)
  #     print(possible_check(self, pseudo_state, self.target_config[self.station2num[station],:].A1))
  #     if possible_check(self, pseudo_state, self.target_config[self.station2num[station],:].A1):
  #       possible_action_station_list.append((color, station))
  print("Possible action station list:", possible_action_station_list)
      
    # for color in robot_colors:
    #   print("Color:", color)
    #   sequence = np.array(list(set(list((self.target_config[:,color]-self.current_config[:,color]).A1))))
    #   print("seq", sequence)
    #   max_number_list.append(sequence[sequence> 0].max())
  # for color in robot_colors:
  # #   print("s:",(self.target_config[:,color]-self.current_config[:,color]).A1)
  #   sequence = np.array(list(set(list((self.target_config[:,color]-self.current_config[:,color]).A1))))
  #   print("seq", sequence)
  #   max_number_list.append(sequence[sequence> 0].max())
  # print("m:",max_number_list)
  # color_selected_idx = np.argmin(max_number_list)
  # color_selected = robot_colors[color_selected_idx]
  # print("color selected", color_selected)
  # # Now finding station

  # number_cubes = max_number_list[color_selected_idx]
  # print("number_cubes:", number_cubes)
  # sequence = self.target_config[:,color_selected]-self.current_config[:,color_selected]
  # print("max:",sequence)
  # # print(np.where(sequence.A1==number_cubes))
  # possible_stations = np.where(sequence.A1==number_cubes)[0]
  # print("possible stations:", possible_stations)
  # for station in possible_stations:
  #   goal_location = station_2_location_map[self.num2station[station]]
  #   psuedo_state = self.psuedo_state_update(goal_location, color_selected)
  #   print("psuedo state:", psuedo_state)
  # station = self.num2station[np.where(sequence.A1==number_cubes)[0][0]]


  # return color_selected, station
  return 0, 0

station_2_location_map = {'A': 1, 'B': 0, 'C': 2}
env.state[2][1] = 1
cc, station = get_color_station_for_max_certainty(env, env.robot_2_colors, station_2_location_map)
print(cc, station)
# cc, station = get_color_station_for_min_certainty(env, env.robot_2_colors)
# print(cc, station)
# cc, station = get_color_station_for_max_certainty(env, env.robot_1_colors)
# print(cc, station)
# cc, station = get_color_station_for_min_certainty(env, env.robot_1_colors)
# print(cc, station)

Action (2, 'A')
[[0.2        0.33333333 0.2       ]
 [0.2        0.33333333 0.2       ]
 [0.4        0.         0.4       ]
 [0.2        0.33333333 0.2       ]]
Diff: 0.37859388972001823
Action (3, 'A')
[[0.16666667 0.25       0.25      ]
 [0.33333333 0.5        0.        ]
 [0.33333333 0.         0.5       ]
 [0.16666667 0.25       0.25      ]]
Diff: 0.5270462766947299
Action (2, 'B')
[[0.2        0.33333333 0.2       ]
 [0.2        0.33333333 0.2       ]
 [0.4        0.         0.4       ]
 [0.2        0.33333333 0.2       ]]
Diff: 0.37859388972001823
Action (3, 'B')
[[0.33333333 0.5        0.        ]
 [0.16666667 0.25       0.25      ]
 [0.33333333 0.         0.5       ]
 [0.16666667 0.25       0.25      ]]
Diff: 0.5270462766947299
Action (2, 'C')
[[0.2        0.33333333 0.2       ]
 [0.2        0.33333333 0.2       ]
 [0.4        0.         0.4       ]
 [0.2        0.33333333 0.2       ]]
Diff: 0.37859388972001823
3 A
