In [1]:
# imports
import numpy as np

In [2]:
# create the empty room
def create_room(size = 10):
    room = np.zeros((size, size))

    # walls
    for i in range(size):
        room[i][0] = 2
        room[i][size-1] = 2
        room[0][i] = 2
        room[size-1][i] = 2
    
    return room

# filling the room
def fill_room(room, fraction=0.5):
    # fraction of the room to be filled
    num_replaced = int(fraction*room.shape[0]*room.shape[1])

    # Random (x, y) coordinates
    indices_x = np.random.randint(1, room.shape[0]-1, num_replaced)
    indices_y = np.random.randint(1, room.shape[1]-1, num_replaced)

    room[indices_x, indices_y] = 1
    
    return room

In [8]:
# create initial robots
def create_robots(number_of_robots = 10):
    robots = []
    
    for n in range(number_of_robots):
        robots.append(np.random.randint(6, size=3**5))
        
    return robots

In [39]:
# generate new robots
def swap_gene(X, Y, i):
    return np.concatenate([X[:i],Y[i:]]), np.concatenate([Y[:i],X[i:]])

def update_robots(robots, scores, mutation = 0.01):
    # sort the robots according to score and steps, score = score + steps left to max
    sorted_index = np.argsort(scores)
    
    # calculate probabilities to choose a robor, based on score
    scores += abs(min(scores))
    probs = scores/sum(scores)

    robots_to_pick = int(len(robots)/2)

    # first half chosen based on score
    
    index = np.random.choice(np.arange(len(robots)), robots_to_pick, p=probs)
    
    tmp = []
    
    for i in index:
        tmp.append(robots[i])

    # the other half, take from combination of robots from previous sessiosn
    for r in range(int( (len(robots) - robots_to_pick)/2 ) ):
        # choose two vectors to swap
        x, y = np.random.choice(np.arange(len(robots)), 2, p=probs)
        
        # rancomly choose amount of elements to swap
        g = np.random.randint(0, len(robots[x]))

        # swap them
        vec1, vec2 = swap_gene(robots[x], robots[y], g)
        
        tmp.append(vec1)
        tmp.append(vec2)
        
    for r in range(len(tmp)):
        new_genes = np.random.randint(0, len(robots[r]), int(mutation*len(robots[r])))

        robots[r][new_genes] = np.random.randint(7, size=1)[0]
        
        tmp.append(robots[r])

    return tmp[:len(robots)]

In [6]:
# simulation
def simulation(room, robots, steps = 100):
    # fill the room with cans in random positions, filling a given amount of positions
    room = fill_room(room)

    scores = []

    for r in range(len(robots)):
        # start the robot at position 0,0
        x = 10
        y = 10
        score = 0

        # loop of 200 steps, stop if score = 500
        for s in range(steps+1):
            # in the loop, the informations of the environment "up, down, left, right and middle"
            # combined with the possible values "empty = 0, can = 1, wall = 2" returns a number
            # this number is the index of the array of actions

            # from the position, get the index of action
            # up, down, left, right and middle
            action = int(room[x][y+1]*3**4 + room[x][y-1]*3**3 + room[x-1][y]*3**2 + room[x+1][y]*3**1 + room[x][y])

            # create a random array of actions for each robot, 6 possible values each positino
            # 0 = go left
            if robots[r][action] == 0:
                if room[x-1][y] == 2:
                    score -= 5
                else:
                    x -= 1

            # 1 = go right
            elif robots[r][action] == 1:
                if room[x+1][y] == 2:
                    score += 5
                else:
                    x += 1

            # 2 = go up
            elif robots[r][action] == 2:
                if room[x][y+1] == 2:
                    score -= 5
                else:
                    y += 1

            # 3 = go down
            elif robots[r][action] == 3:
                if room[x][y-1] == 2:
                    score -= 5
                else:
                    y -= 1

            # 4 = go random direction
            elif robots[r][action] == 5:
                move_x = np.random.randint(-1, 2, size=1)[0]
                move_y = np.random.randint(-1, 2, size=1)[0]

                if room[x+move_x][y+move_y] == 2:
                    score -= 5
                else:
                    x += move_x
                    y += move_y

            # 5 = try take can
            elif robots[r][action] == 5:
                if room[x][y] == 1:
                    room[x][y] = 0
                    score += 10
                else:
                    score -= 1

            if score > 500:
                break

        scores.append(score+10*(steps-s))
    
    # return scores and steps
    return scores

In [43]:
# optimization

# create the room
room = create_room(20)

# create a random array of actions for each robot
robots = create_robots(200)

scores = np.zeros(len(robots))

generations = 400
average = 50

for g in range(generations):
    # take average of scores
    for i in range(average):
        # passa actions array to the simulation
        scores += simulation(room, robots)
    
    scores /= average

    if g % 1 == 0:
        print(sum(scores)/len(scores))
        print(sorted(scores[-5:]))

    # produce new generation
    robots = update_robots(robots, scores, 0.01)

# run the simulation again for some amount of generations

-60.2195
[-119.4, 0.0, 0.0, 0.0, 0.0]
9.937610000000008
[-117.882, -22.07, 8.818, 8.818, 8.818]
36.603172199999996
[-15.97748, -8.459720000000003, 9.34028, 9.34028, 471.80627999999996]
72.70476744399997
[-410.62849040000003, -62.0791704, 9.371509600000001, 46.8651544, 474.01550960000003]
161.58081138088
[9.200146224000001, 18.493026224, 465.800146224, 472.771132624, 474.95001912]
280.55675330313755
[-132.86686, 17.71613745792, 473.40114, 473.5869976, 482.67255972799995]
346.4531527129556
[17.8140404468928, 425.6886804468928, 473.70034039605116, 482.8177575988928, 482.99946884145277]
415.1259878489962
[12.265914171566335, 467.9622056036751, 476.11969840367516, 477.07993160265835, 477.2622799467151]
451.44027564545047
[0.4600741719018239, 434.57400000054406, 474.73714985654397, 474.7563545205237, 474.7600014874048]
469.46293111292005
[465.0183270834491, 473.70060560002196, 474.503868597142, 474.50425269042154, 474.5043256297592]
462.25784513412816
[9.680173024465313, 474.4904530535391, 4

KeyboardInterrupt: 