In [1]:
import numpy as np
import random
from collections import defaultdict
from fractions import Fraction

In [None]:
# map + 
# agents
# boxes
# step

In [2]:
class segment:
    #class for 1-d segment 
    def __init__(self, begin, end):
        self.begin = min(begin, end)
        self.end = max(end, begin)
    def __sub__(self, seg_b):
        if(seg_b.begin >= self.end or seg_b.end <= self.begin):
            return segment(self.begin, self.end)
        elif (self.begin >= seg_b.begin and self.end <= seg_b.end):
            return segment(0, 0)
        else:
            if seg_b.end <= self.end:
                return segment(seg_b.end, self.end)
            else:
                return segment(self.begin, seg_b.begin)
            return segment
    def __add__(self, seg_b):
        if(seg_b.begin >= self.end or seg_b.end <= self.begin):
            return segment(self.begin, self.end)
        else:
            return segment(min(self.begin, seg_b.begin), max(self.end, seg_b.end))
    def len(self):
        return self.end - self.begin

def intersect(seg_a, seg_b):
    #Returns True if two segments intersect and False otherwise
    #seg_a = segment
    #seg_b = segment
    if(seg_b.begin >= seg_a.end or seg_b.end <= seg_a.begin):
        return False
    return True

def diagonal_vision(map_array, player_coordinates, direction_array, length = 100):
    #Returns vision mask with same shape as map_array, where 1 stands if the cell is at least partly visible and 0 otherwise
    #This function is for diagonal directions only
    #map_array = np.array(n, n)
    #player_coordinates = (x, y) - tuple
    #direction_array = [x, y]
    obstacles = []
    vision_mask = np.zeros(map_array.shape, dtype=int)
    vision_mask[player_coordinates] = 1
    for sight in range(1, length + 1):
        for x in range(sight + 1):
            coordinates = (player_coordinates[0] + x * direction_array[0], player_coordinates[1] + (sight - x) * direction_array[1])
            if coordinates[0] >= len(map_array) or coordinates[1] >= len(map_array[0]) or coordinates[0] < 0 or coordinates[1] < 0:
                pass
            else:
                if len(obstacles) == 0:
                    vision_mask[coordinates] = 1
                else:
                    cur_seg = segment(Fraction(x, (sight + 1)), Fraction((x + 1), (sight + 1)))
                    for i in obstacles:
                        if intersect(cur_seg, i):
                            cur_seg = cur_seg - i
                    if cur_seg.len() > 0:
                        vision_mask[coordinates] = 1
                    else:
                        vision_mask[coordinates] = 0
                if(map_array[coordinates] != 0):
                    obstacles.append(segment(Fraction(x, (sight + 1)), Fraction((x + 1), (sight + 1))))
    return vision_mask

def vision(map_array, player_coordinates, direction_number, length = 100):
    #Returns vision mask with same shape as map_array, where 1 stands if the cell is at least partly visible and 0 otherwise
    #This function is for all directions
    #map_array = np.array(n, n)
    #player_coordinates = (x, y)
    #direction_number = number from 0 to 7
    direction_arrays = [(-1, 0), (-1, 1), (0, 1), (1, 1), (1, 0), (1, -1), (0, -1), (-1, -1)]

    direction_array = direction_arrays[direction_number]
    if direction_array[0] != 0 and direction_array[1] != 0:
        return diagonal_vision(map_array, player_coordinates, direction_array, length = 100)
    elif direction_array[0] != 0:
        diagonal_vision_mask1 = diagonal_vision(map_array, player_coordinates, (direction_array[0], 1), length = 100)
        diagonal_vision_mask2 = diagonal_vision(map_array, player_coordinates, (direction_array[0], -1), length = 100)
        linear_vision_mask = np.zeros(map_array.shape, dtype=int)
        
        linear_vision_mask[player_coordinates] = 1
        for x in range(1, length + 1):
            for y in range(x + 1):
                coordinates = (player_coordinates[0] + x * direction_array[0], player_coordinates[1] + y)
                if coordinates[0] >= len(map_array) or coordinates[1] >= len(map_array[0]) or coordinates[0] < 0 or coordinates[1] < 0 or x + y > length:
                    pass
                else:
                    linear_vision_mask[coordinates] = 1
                coordinates = (player_coordinates[0] + x * direction_array[0], player_coordinates[1] - y)
                if coordinates[0] >= len(map_array) or coordinates[1] >= len(map_array[0]) or coordinates[0] < 0 or coordinates[1] < 0 or x + y > length:
                    pass
                else:
                    linear_vision_mask[coordinates] = 1
        return np.maximum(diagonal_vision_mask1, diagonal_vision_mask2) * linear_vision_mask
    else:
        diagonal_vision_mask1 = diagonal_vision(map_array, player_coordinates, (1, direction_array[1]), length = 100)
        diagonal_vision_mask2 = diagonal_vision(map_array, player_coordinates, (-1, direction_array[1]), length = 100)
        linear_vision_mask = np.zeros(map_array.shape, dtype=int)
        
        linear_vision_mask[player_coordinates] = 1
        for x in range(1, length + 1):
            for y in range(x + 1):
                coordinates = (player_coordinates[0] + y, player_coordinates[1] + x * direction_array[1])
                if coordinates[0] >= len(map_array) or coordinates[1] >= len(map_array[0]) or coordinates[0] < 0 or coordinates[1] < 0 or x + y > length:
                    pass
                else:
                    linear_vision_mask[coordinates] = 1
                coordinates = (player_coordinates[0] - y, player_coordinates[1] + x * direction_array[1])
                if coordinates[0] >= len(map_array) or coordinates[1] >= len(map_array[0]) or coordinates[0] < 0 or coordinates[1] < 0 or x + y > length:
                    pass
                else:
                    linear_vision_mask[coordinates] = 1
        return np.maximum(diagonal_vision_mask1, diagonal_vision_mask2) * linear_vision_mask

In [55]:
class env:
    def __init__(self, 
                 map_version, 
                 map_size, 
                 step_limit = 240, 
                 preparation = 0.4,
                 agents_number = [2, 2] , 
                 boxes_number = 4, 
                 debug = 0, 
                 use_seed = False, 
                 seed = 42):
        
        # self.seed = seed
        # self.use_seed = use_seed
        # if self.use_seed:
        #     random.seed(self.seed)
            
        self.debug = debug
        self.map_size = map_size
        self.base_step_limit = step_limit
        self.step_limit = self.base_step_limit
        self.box_amount = boxes_number
        self.seakers_amount = agents_number[0]
        self.hiders_amount = agents_number[1]
        self.team_mask = {0: 'seekers', 1:'hiders'}
        
        # self.valid_rotations = ['-90 deg', '-45 deg', 'pass', '+45 deg', '+90 deg']
        self.valid_rotations = [-2, -1, 0, 1, 2]
        self.sights = [0,1,2,3,4,5,6,7]
        
        # self.valid_moves = ['backward', 'pass', 'forward']
        self.valid_moves = [-1, 0, 1]
        # self.grabull ['pass', 'pull/push']
        self.valid_handle = [0, 1]
        # self.box_interactions = ['pass', 'lock', 'unlock']
        self.box_interactions = [0, 1, 2]
        
        self.move_map = {0:[-1,0], 
                         1:[-1,1], 
                         2:[0,1], 
                         3:[1,1],
                         4:[1,0],
                         5:[1,-1],
                         6:[0,-1],
                         7:[-1,-1]}
        
        if map_version == 1:
            self.create_empty_map_v1()
        self.create_boxes()
        self.initialize_agents()

    def create_empty_map_v1_with_walls(self):
        if isinstance(self.map_size, int):
            if self.map_size >=6:
                # create empty map with aaditional place for external walls
                self.map = np.zeros((self.map_size+2, self.map_size+2), dtype=int)
                # add external walls
                self.map[0], self.map[-1] = 1, 1
                for i in range(self.map_size+2):
                    self.map[i][0], self.map[i][-1] = 1, 1

                # add room, with side of half of the map size
                self.room_size = self.map_size//2
                self.map[-(self.room_size+1)][-(self.room_size+1):] = 1
                for j in range(self.room_size):
                    self.map[-(j+1)][-(self.room_size+1)]=1

                # add doors in both walls of the room
                self.map[-(self.room_size+1)][-2:-1] = 0
                self.map[-(self.room_size-1)][-(self.room_size+1)] = 0
                #return(map_obj)
            elif self.map_size < 6 and self.map_size >= 0:
                print('Error: map size is too small, correct size is 6+')
        else:
            print('Error: incorrect map size type: expected int')
            
    def create_empty_map_v1(self):
        if isinstance(self.map_size, int):
            if self.map_size >=6:
                # create empty map with
                self.map = np.zeros((self.map_size, self.map_size), dtype=int)

                # add room, with side of half of the map size
                self.room_size = self.map_size//2
                self.map[-(self.room_size+1)][-(self.room_size+1):] = 1
                for j in range(self.room_size):
                    self.map[-(j+1)][-(self.room_size+1)]=1

                # add doors in both walls of the room
                rand_1 = random.randint(-(self.room_size), -1)
                rand_2 = random.randint(-(self.room_size), -1)
                self.map[-(self.room_size+1)][rand_1] = 0
                self.map[rand_2][-(self.room_size+1)] = 0
                #return(map_obj)
            elif self.map_size < 6 and self.map_size >= 0:
                print('Error: map size is too small, correct size is 6+')
        else:
            print('Error: incorrect map size type: expected int')
            
    def create_boxes(self):
        self.boxes = defaultdict()
        for i in range(self.box_amount):
            
            filt = (self.map == 0)
            
            for j in self.boxes.keys():
                coords = self.boxes[j]['coords']
                filt[coords[0], coords[1]] = 0
                
            filt = np.array(filt, dtype = 'int')
            rnd_p = random.choice(np.argwhere(filt))
            self.boxes[i] = defaultdict()
            self.boxes[i]['coords'] = list(rnd_p)
            self.boxes[i]['locked'] = False
            self.boxes[i]['locked_team'] = None
            
    def initialize_agents(self):
        self.agents = defaultdict()
        self.agents['seekers'] = defaultdict()
        self.agents['hiders'] = defaultdict()
        
        for s in range(self.seakers_amount):
            filt = (self.map == 0)
            for i in self.boxes.keys():
                coords = self.boxes[i]['coords']
                filt[coords[0], coords[1]] = 0
            filt = np.array(filt, dtype = 'int')
            
            for agent_id in self.agents['seekers'].keys():
                ag_coords = self.agents['seekers'][agent_id]['coords']
                filt[ag_coords[0], ag_coords[1]] = 0
            
            new_agent_coords = rnd_p = random.choice(np.argwhere(filt))
            new_agent = defaultdict()
            new_agent['coords'] = new_agent_coords
            new_agent['sight'] = random.randint(0, 8)
            self.agents['seekers'][s] = new_agent
            
        for h in range(self.hiders_amount):
            filt = (self.map == 0)
            for i in self.boxes.keys():
                coords = self.boxes[i]['coords']
                filt[coords[0], coords[1]] = 0
            filt = np.array(filt, dtype = 'int')
            
            for agent_id in self.agents['seekers'].keys():
                ag_coords = self.agents['seekers'][agent_id]['coords']
                filt[ag_coords[0], ag_coords[1]] = 0
                
            for agent_id in self.agents['hiders'].keys():
                ag_coords = self.agents['hiders'][agent_id]['coords']
                filt[ag_coords[0], ag_coords[1]] = 0
                
            new_agent_coords = rnd_p = random.choice(np.argwhere(filt))
            new_agent = defaultdict()
            new_agent['coords'] = new_agent_coords
            new_agent['sight'] = random.randint(0, 8)
            self.agents['hiders'][h] = new_agent
            
    def check_coords(self, coords):
        # check if it's outside the map
        if (coords[0] >= 0)&(coords[0] < self.map_size)&(coords[1] >= 0)&(coords[1] < self.map_size):
            # check if it is wall
            map_section = self.map[coords[0], coords[1]]
            # if it is free
            if map_section == 0:
                # check if agent there
                no_agent = True
                for team in ['hiders', 'seekers']:
                    for agent_key in list(self.agents[team].keys()):
                        if list(self.agents[team][agent_key]['coords']) == list(coords):
                            no_agent = False
                # if there is no agent
                if no_agent:
                    # check if there a box
                    no_box = True
                    
                    for b_ in range(self.box_amount):
                        # check collision
                        if list(self.boxes[b_]['coords']) == list(coords):
                            # if collision check if it's a free space
                            saved_box = b_
                            no_box = False
                    if no_box:
                        return [True]
                    
                    else:
                        return [False, 'box', saved_box]
                else:
                    return [False, 'agent']
            else:
                return [False, 'wall']
        else:
            return [False, 'map edge']
                
    def move(self, agent_teem_id, agent_id, rotation_code, move_code, handle, box_action_code):
        # check step limit
        if self.step_limit == 0:
            if self.debug:
                print('step limit reached, error')
        # if not reached
        else:
            # check imputs
            self.step_limit -= 1
            
            if self.debug:
                print(self.step_limit, 'steps left')

            if rotation_code not in self.valid_rotations:
                if self.debug:
                    print('icorrect rotation') 
            else:
                old_sight = self.agents[self.team_mask[agent_teem_id]][agent_id]['sight']
                new_sight = self.sights[(old_sight + rotation_code)%8]
                self.agents[self.team_mask[agent_teem_id]][agent_id]['sight'] = new_sight
                    
            if move_code not in self.valid_moves:
                if self.debug:
                    print('icorrect move')
    
            # MOVE FORWARD
            elif move_code == 1:
                
                agent_old_coords = self.agents[self.team_mask[agent_teem_id]][agent_id]['coords']
                direction = self.agents[self.team_mask[agent_teem_id]][agent_id]['sight']
                move = self.move_map[direction][0], self.move_map[direction][1]
                
                y_old, x_old = agent_old_coords[0], agent_old_coords[1]
                y_shift, x_shift = move[0], move[1]
                
                new_coords = [y_old + y_shift, x_old + x_shift]
                new_crds_check = self.check_coords(new_coords)
                if new_crds_check[0] == True:
                    self.agents[self.team_mask[agent_teem_id]][agent_id]['coords'] = np.array(new_coords)
                    if self.debug:
                        print('moving forward')
                    
                if new_crds_check[0] == False:
                    # if there is a box
                    if new_crds_check[1] == 'box':
                        if handle == 1:
                            # check if box can be moved
                            old_box_coords = self.boxes[new_crds_check[2]]['coords']
                            new_box_coords = [old_box_coords[0] + y_shift, old_box_coords[1] + x_shift]
                            new_box_crds_check = self.check_coords(new_box_coords)
                            if new_box_crds_check[0] == True:
                                self.agents[self.team_mask[agent_teem_id]][agent_id]['coords'] =  np.array(new_coords)
                                if self.debug:
                                    print('moving forward')
                                self.boxes[new_crds_check[2]]['coords'] =  np.array(new_box_coords)
                                if self.debug:
                                    print('pushing the box forward')
                                    print(old_box_coords)
                                    print(new_box_coords)
                            else:
                                if self.debug:
                                    print('box stuck')
                        elif handle == 0:
                            if self.debug:
                                print('box should be pushed')
                    else:
                        print("can't move, stuck: ", new_crds_check[1])
                        
            # MOVE BACKWARD        
            elif move_code == -1:

                agent_old_coords = self.agents[self.team_mask[agent_teem_id]][agent_id]['coords']

                direction_forward = self.agents[self.team_mask[agent_teem_id]][agent_id]['sight']
                direction_backward = (direction_forward + 4)%8

                move_forward = self.move_map[direction_forward][0], self.move_map[direction_forward][1]
                move_backward = self.move_map[direction_backward][0], self.move_map[direction_backward][1]

                y_old, x_old = agent_old_coords[0], agent_old_coords[1]

                y_shift_forw, x_shift_forw = move_forward[0], move_forward[1]
                y_shift_back, x_shift_back = move_backward[0], move_backward[1]

                new_coords_forw = [y_old + y_shift_forw, x_old + x_shift_forw]
                new_coords_back = [y_old + y_shift_back, x_old + x_shift_back]

                new_crds_check_forw = self.check_coords(new_coords_forw)
                new_crds_check_back = self.check_coords(new_coords_back)

                if new_crds_check_back[0] == True:
                    if handle == 1:
                        if new_crds_check_forw[1] == 'box':
                            self.agents[self.team_mask[agent_teem_id]][agent_id]['coords'] = np.array(new_coords_back)
                            self.boxes[new_crds_check_forw[2]]['coords'] =  np.array(agent_old_coords)
                            if self.debug:
                                print('moving backward and pulling the box')
                        else:
                            self.agents[self.team_mask[agent_teem_id]][agent_id]['coords'] = np.array(new_coords_back)
                            if self.debug:
                                print('tried to pull the box, no box, moving backward')
                    elif handle == 0:
                        self.agents[self.team_mask[agent_teem_id]][agent_id]['coords'] = np.array(new_coords_back)
                        if self.debug:
                            print('moving backward')

                elif new_crds_check_back[0] == False:
                    # if there is a box
                    if new_crds_check_back[1] == 'box':
                        if self.debug:
                            print("can't move backward, box should be pushed")
                    else:
                        print("can't move backward, stuck: ", new_crds_check_back[1])
                            
            if box_action_code not in self.box_interactions:
                if self.debug:
                    print('icorrect interaction')
                    
    def show_map(self):
        map_to_show = np.array(self.map, dtype=object)
        for team_ in list(self.agents.keys()):
            for agent_key in list(self.agents[team_].keys()):
                target = self.agents[team_][agent_key]['coords']
                y_t, x_t = target[0], target[1]
                if team_ == 'hiders':
                    map_to_show[y_t, x_t] = 2
                else:
                    map_to_show[y_t, x_t] = 3
        for box_id in list(self.boxes.keys()):
            target = self.boxes[box_id]['coords']
            y_t, x_t = target[0], target[1]
            map_to_show[y_t, x_t] = 8
        print(np.array2string(map_to_show, separator=', ', formatter={'str_kind': lambda x: x}))
        # print(map_to_show, sep =', ')
            
    # def observation(self, agent_team, agent_number):
    def show_map_mas(self):
        map_to_show = np.array(self.map, dtype=object)
        for team_ in list(self.agents.keys()):
            for agent_key in list(self.agents[team_].keys()):
                target = self.agents[team_][agent_key]['coords']
                y_t, x_t = target[0], target[1]
                if team_ == 'hiders':
                    map_to_show[y_t, x_t] = 2
                else:
                    map_to_show[y_t, x_t] = 3
        for box_id in list(self.boxes.keys()):
            target = self.boxes[box_id]['coords']
            y_t, x_t = target[0], target[1]
            map_to_show[y_t, x_t] = 8
        return map_to_show

                                
    def reward(self):
        # we need to define a reward
        reward = 0
        
        return reward

    def reset(self):
        
        if self.use_seed:
            random.seed(self.seed)
            
        self.step_limit = self.base_step_limit
        self.initialize_map()

    def observation(self, agent_teem_id, agent_id):
        agents = self.agents
        boxes = self.boxes
        #get visibility mask
        vision_map = vision(self.map, 
                            tuple(agents[agent_teem_id][agent_id]['coords'].tolist()), 
                            int(agents[agent_teem_id][agent_id]['sight']))
        
        observation_dict = dict()
        observation_dict['agents'] = dict()
        observation_dict['boxes'] = dict()
        
        names = []
        if agent_teem_id == 0:
            names = ['hiders', 'seekers']
        else:
            names = ['seekers', 'hiders']
        
        i1 = 0
        for team_ in names:
            for j in agents[team_]:
                if(team_ != names[0] or j != agent_id):
                    observation_dict['agents'][i1] = dict()
                    observation_dict['agents'][i1]['coords'] = agents[team_][j]['coords']
                    agent_visibility = bool(vision_map[agents[team_][j]['coords'][0]][agents[team_][j]['coords'][1]])
                    observation_dict['agents'][i1]['visible'] = agent_visibility
                    i1 += 1
        for i in boxes:
            observation_dict['boxes'][i] = dict()
            observation_dict['boxes'][i]['coords'] = boxes[i]['coords']
            observation_dict['boxes'][i]['visible'] = bool(vision_map[boxes[i]['coords'][0]][boxes[i]['coords'][1]])
        return observation_dict
    
    def show_vision(self, agent_teem_id, agent_id):
        agent_teem = self.team_mask[agent_teem_id]
        return vision(self.map, 
                      tuple(self.agents[agent_teem][agent_id]['coords'].tolist()), 
                      int(self.agents[agent_teem][agent_id]['sight']))
    
    def reward(self):
        #Returns reward for hiders team
        reward = 0
        hiders_hidden = True

        for i in self.agents['seekers'].keys():
            obs = self.observation('seekers', i)['agents']
            #(len(list(obs.keys())) + 1) // 2 - 1
            #This is number of the first agent from the enemy team.
            #So this is number of the first hider in the observation.
            for j in range((len(list(obs.keys())) + 1) // 2 - 1, len(list(obs.keys()))):
                if obs[j]['visible']:
                    hiders_hidden = False
                    break
            if not hiders_hidden:
                break


        if hiders_hidden:
            reward = 1
        else:
            reward = -1
        
        return reward

In [21]:
from random import randint

In [27]:
randint(0, 1)

1

In [11]:
def draw(map, probs1, probs2, probs3, probs4, i1):
    fig = plt.figure(figsize=(16, 8))
    axs = [0, 0, 0, 0, 0]
    axs[0] = plt.subplot2grid((1, 5), (0, 0), colspan=2)
    axs[0].set_xlim(-0.5, 19.5)
    axs[0].set_ylim(-0.5, 19.5)
    axs[1] = plt.subplot2grid((4, 5), (0, 2))
    axs[1].set_ylim(0, 1)
    axs[2] = plt.subplot2grid((4, 5), (1, 2))
    axs[2].set_ylim(0, 1)
    axs[3] = plt.subplot2grid((4, 5), (2, 2))
    axs[3].set_ylim(0, 1)
    axs[4] = plt.subplot2grid((4, 5), (3, 2))
    axs[4].set_ylim(0, 1)
    for i in range(20):
        for j in range(20):
            if map[i][j] == 1:
                axs[0].scatter(i, j, marker='s', c='yellow')
    for i in range(20):
        for j in range(20):
            if map[i][j] == 8:
                axs[0].scatter(i, j, marker='s', c='blue')

    for i in range(20):
        for j in range(20):
            if map[i][j] == 2:
                axs[0].scatter(i, j, marker='^', c='green')
    for i in range(20):
        for j in range(20):
            if map[i][j] == 3:
                axs[0].scatter(i, j, marker='^', c='red')

    axs[1].bar([0, 1, 2, 3, 4], probs1)
    axs[2].bar([0, 1, 2, 3, 4], probs2)
    axs[3].bar([0, 1, 2, 3, 4], probs3)
    axs[4].bar([0, 1, 2, 3, 4], probs4)
    fig.savefig('/' + str(i1) + '.png')
    fig.clf()
    

In [59]:
def make_move(steps=40):
    new_env = env(1,20)
    for i1 in range(steps):
        for i in range(2):
            for j in range(2):
                #print(i1, i, j)
                new_env.move(i, j, randint(-2, 2), randint(-1, 1), 0, 0)
        draw(new_env.show_map_mas(), np.zeros(5), np.zeros(5), np.zeros(5), np.zeros(5), i1)

In [60]:
make_move()

can't move, stuck:  map edge
can't move, stuck:  agent
can't move backward, stuck:  wall
can't move backward, stuck:  map edge


  


can't move backward, stuck:  map edge
can't move backward, stuck:  agent


<Figure size 1152x576 with 0 Axes>

<Figure size 1152x576 with 0 Axes>

<Figure size 1152x576 with 0 Axes>

<Figure size 1152x576 with 0 Axes>

<Figure size 1152x576 with 0 Axes>

<Figure size 1152x576 with 0 Axes>

<Figure size 1152x576 with 0 Axes>

<Figure size 1152x576 with 0 Axes>

<Figure size 1152x576 with 0 Axes>

<Figure size 1152x576 with 0 Axes>

<Figure size 1152x576 with 0 Axes>

<Figure size 1152x576 with 0 Axes>

<Figure size 1152x576 with 0 Axes>

<Figure size 1152x576 with 0 Axes>

<Figure size 1152x576 with 0 Axes>

<Figure size 1152x576 with 0 Axes>

<Figure size 1152x576 with 0 Axes>

<Figure size 1152x576 with 0 Axes>

<Figure size 1152x576 with 0 Axes>

<Figure size 1152x576 with 0 Axes>

<Figure size 1152x576 with 0 Axes>

<Figure size 1152x576 with 0 Axes>

<Figure size 1152x576 with 0 Axes>

<Figure size 1152x576 with 0 Axes>

<Figure size 1152x576 with 0 Axes>

<Figure size 1152x576 with 0 Axes>

<Figure size 1152x576 with 0 Axes>

<Figure size 1152x576 with 0 Axes>

<Figure size 1152x576 with 0 Axes>

<Figure size 1152x576 with 0 Axes>

<Figure size 1152x576 with 0 Axes>

<Figure size 1152x576 with 0 Axes>

<Figure size 1152x576 with 0 Axes>

<Figure size 1152x576 with 0 Axes>

<Figure size 1152x576 with 0 Axes>

<Figure size 1152x576 with 0 Axes>

<Figure size 1152x576 with 0 Axes>

<Figure size 1152x576 with 0 Axes>

<Figure size 1152x576 with 0 Axes>

<Figure size 1152x576 with 0 Axes>

In [61]:
import imageio
images = []
for i in range(40):
    images.append(imageio.imread('/' + str(i) + '.png'))
imageio.mimsave('/movie.gif', images)

In [9]:
import matplotlib.pyplot as plt


In [20]:
draw(new_env.show_map_mas(), np.zeros(5), np.zeros(5), np.zeros(5), np.zeros(5), 77)

<Figure size 1152x576 with 0 Axes>

In [None]:
def play_ep():
    new_env = env(1,20)
    new_env.self.map()

In [37]:
#new_env = env(1,20)
new_env.show_map_mas()

array([[8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 8, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 3, 0, 0, 0, 0, 0,

In [17]:
np.array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
 [0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
 [0, 0, 8, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0],
 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
 [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1],
 [0, 0, 0, 0, 0, 0, 3, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
 [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
 [0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
 [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
 [0, 0, 0, 3, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
 [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
 [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
 [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0],
 [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
 [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]])

array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 8, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1],
       [0, 0, 0, 0, 0, 0, 3, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 3, 0, 0, 0, 0, 0,

In [14]:
new_env.show_map

<bound method env.show_map of <__main__.env object at 0x7f356e7b7c88>>

In [None]:
new_env.debug = 1

In [None]:
new_env.boxes

defaultdict(None,
            {0: defaultdict(None,
                         {'coords': [11, 15],
                          'locked': False,
                          'locked_team': None}),
             1: defaultdict(None,
                         {'coords': [17, 16],
                          'locked': False,
                          'locked_team': None}),
             2: defaultdict(None,
                         {'coords': [3, 1],
                          'locked': False,
                          'locked_team': None}),
             3: defaultdict(None,
                         {'coords': [15, 4],
                          'locked': False,
                          'locked_team': None})})

In [None]:
new_env.agents

defaultdict(None,
            {'seekers': defaultdict(None,
                         {0: defaultdict(None,
                                      {'coords': array([10, 18], dtype=int64),
                                       'sight': 3}),
                          1: defaultdict(None,
                                      {'coords': array([4, 7], dtype=int64),
                                       'sight': 1})}),
             'hiders': defaultdict(None,
                         {0: defaultdict(None,
                                      {'coords': array([ 1, 10], dtype=int64),
                                       'sight': 0}),
                          1: defaultdict(None,
                                      {'coords': array([12, 15], dtype=int64),
                                       'sight': 0})})})

In [None]:
new_env.show_map()

[[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 8 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 3 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 1 0 1 1 1 1 1 1 1 1 1]
 [0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 3 0]
 [0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 8 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 2 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 8 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 8 0 0 0]
 [0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0]]


In [None]:
new_env.check_coords([2, 12])[1] == 'box'

True

In [None]:
# def move(self, agent_teem_id, agent_id, rotation_code, move_code, handle, box_action_code):
# self.team_mask = {0: 'seekers', 1:'hiders'}
new_env.move(1, 1, 0, -1, 1, 0)
new_env.show_map()

231 steps left
moving backward and pulling the box
[[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 8 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 3 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 1 0 1 1 1 1 1 1 1 1 1]
 [0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 3 0]
 [0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 8 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 2 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 8 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 8 0 0 0]
 [0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0]]


In [None]:
new_env.agents

defaultdict(None,
            {'hiders': defaultdict(None,
                         {0: defaultdict(None,
                                      {'coords': array([0, 0]), 'sight': 3}),
                          1: defaultdict(None,
                                      {'coords': array([1, 5]), 'sight': 7})}),
             'seekers': defaultdict(None,
                         {0: defaultdict(None,
                                      {'coords': array([5, 0]), 'sight': 0}),
                          1: defaultdict(None,
                                      {'coords': array([6, 0]),
                                       'sight': 1})})})

In [None]:
new_env.boxes[0]['coords']

[7, 0]

In [None]:
new_env.boxes

defaultdict(None,
            {0: defaultdict(None,
                         {'coords': [7, 0],
                          'locked': False,
                          'locked_team': None}),
             1: defaultdict(None,
                         {'coords': array([0, 4]),
                          'locked': False,
                          'locked_team': None}),
             2: defaultdict(None,
                         {'coords': [2, 1],
                          'locked': False,
                          'locked_team': None}),
             3: defaultdict(None,
                         {'coords': [2, 0],
                          'locked': False,
                          'locked_team': None})})

In [None]:
def move(teem, agent, rotation, move, interaction):
    # self.valid_rotations = ['-90 deg', '-45 deg', 'pass', '+45 deg', '+90 deg']
    self.valid_rotations = [-2, -1, 0, 1, 2]
    
    # self.valid_moves = ['backward', 'pass', 'forward']
    self.valid_moves = [0, 1, 2]
    
    # self.valid_interactions = ['pass', 'lock', 'unlock']
    self.valid_interactions = [0, 1, 2]

In [None]:
        # self.valid_rotations = ['-90 deg', '-45 deg', 'pass', '+45 deg', '+90 deg']
        self.valid_rotations = [-2, -1, 0, 1, 2]
        
        # self.valid_moves = ['backward', 'pass', 'forward']
        self.valid_moves = [0, 1, 2]
        
        # self.valid_interactions = ['pass', 'lock', 'unlock']
        self.valid_interactions = [0, 1, 2]

In [None]:
test = [0,1,2,3,4,5,6,7]

In [None]:
move = 1
look = 7

In [None]:
test[(look + move)%8]

0

In [None]:
len(test)

9

In [None]:
test_aaa = np.array([[1,2,3],[4,5,6],[7,8,9]])

In [None]:
test_aaa[0]

array([1, 2, 3])

In [None]:
#1 row
#2 column

In [None]:
#0 pass [0, 0]
#1 up [-1, 0]
#2 up + right [-1, 1]
#3 right [0, 1]
#4 bottom + right [1, 1]
#5 bottom [1, 0]
#6 bottom + left [1, -1]
#7 left [0, -1]
#8 up + lef [-1, -1]

self.move_map = {0:[0,0], 
                 1:[-1,0], 
                 2:[-1,1], 
                 3:[0,1], 
                 4:[1,1],
                 5:[1,0],
                 6:[1,-1],
                 7:[0,-1],
                 8:[-1,-1]}

NameError: ignored