### Imports

In [3]:
import numpy as np
import random
# import to do training
from tpg.trainer import Trainer
# import to run an agent (always needed)
from tpg.agent import Agent
# faster training
import multiprocessing as mp
# visual tools
from IPython.display import clear_output
import time
import matplotlib.pyplot as plt

In [4]:
def animate_path(sequence):
    current_map = np.zeros((10, 10))
    # add barrier
    for i in range(5):
        current_map[(4, i)] = 5
    # animate the run!
    for i in range(len(sequence)):
        time.sleep(0.5)
        if i == 0:
            current_map[sequence[i]] = 1
            clear_output(wait=True)
            print(0)
            print(current_map)
        else:
            current_map[sequence[i-1]] = 0
            current_map[sequence[i]] = 1
            clear_output(wait=True)
            print(i)
            print(current_map)

In [5]:
# example of above 
seq = [(0,1), (0,2), (0, 3), (1, 3), (2, 4)]
animate_path(seq)

4
[[0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [5. 5. 5. 5. 5. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]]


In [6]:
# todo: make generalizable for any GridWorld
def heat_map(sequence):
    x = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
    y = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
    plt.xticks(ticks=np.arange(len(x)),labels=x)
    plt.yticks(ticks=np.arange(len(y)),labels=y)
    plt.imshow(sequence, cmap='cool')

### Grid World

In [7]:
# based (for now) on Downing's Figure 4.1
class GridWorldv1:
    def __init__(self, rows, cols, win_state, starting_state):  
        self.rows = rows
        self.cols = cols
        self.starting_state = starting_state
        self.win_state = win_state
        self.current_state = self.starting_state

    def sample_action (self):
        rand = random.uniform(0, 1)
        if (rand >= 0) and (rand < 0.25):
            return 0
        elif (rand >= 0.25) and (rand < 0.5):
            return 1
        elif (rand >= 0.5) and (rand < 0.75):
            return 2
        else:
            return 3
        
    def reset (self):
        self.current_state = self.starting_state
        return self.current_state
        
    def check_win (self):
        if self.current_state == self.win_state:
            return True
        return False

    # just reset for now...
    def close (self):
        self.current_state = self.starting_state
        return 1
    
    def step (self, action):
        # north
        if action == 0:
            next = (self.current_state[0] - 1, self.current_state[1])
        # south
        elif action == 1:
            next = (self.current_state[0] + 1, self.current_state[1])
        # east
        elif action == 2:
            next = (self.current_state[0], self.current_state[1] + 1)
        # west
        else:
            next = (self.current_state[0], self.current_state[1] - 1)

        terminate = False
        reward = 0
        # check if move is legal
        if (next[0] >= 0 and next[0] <= 9) and (next[1] >= 0 and next[1] <= 9):            
            illegal = 0
            for i in range(5):
                if next == (4, i):
                    illegal = 1
                    
            if (illegal == 0):
                self.current_state = next
                reward -= 0.01
                #print(self.current_state)
            else:
                #print('illegal move!')
                reward -= 1
                #terminate = True
        else:
            #print('out of bounds!')
            reward -= 1
            #terminate = True
        
        if self.check_win():
            reward += 100
            terminate = True
        
        return self.current_state, reward, terminate

### GridWorld Test

In [8]:
env = GridWorldv1(10, 10, (9, 0), (0, 0))
state = env.reset()
#action = agent.act(state)
action = env.sample_action()
state, reward, isDone = env.step(action)
print(state)
print(reward)
print(isDone)

(0, 0)
-1
False


### TPG Agent

In [11]:
def runAgent(args):
    agent = args[0]
    envName = args[1]
    scoreList = args[2]
    numEpisodes = args[3] # number of times to repeat game
    numFrames = args[4] 
    gen = args[5]
    
    # skip if task already done by agent
    if agent.taskDone(envName):
        print('Agent #' + str(agent.agentNum) + ' can skip.')
        scoreList.append((agent.team.id, agent.team.outcomes))
        return
    
    env = GridWorldv1(10, 10, (9, 0), (0, 0))
    valActs = 4
    
    limit = 100000000
    
    # create environment object from above
    scoreTotal = 0
    for ep in range(numEpisodes):
        state = env.reset()
        scoreEp = 0
        states = []
        step_map = np.zeros((10, 10))
        if numEpisodes > 1:
            numRandFrames = random.randint(0,30)
            limitReached = 0
            for i in range(numFrames): # frame loop
                if i < numRandFrames:
                    env.step(env.sample_action())
                    continue
                    
                # action selection
                act = agent.act(state)
                
                state, reward, isDone = env.step(act)
                # for heatmap
                step_map[state] = step_map[state] + 1
                states.append(state)
                #print('State: ' + str(state))
                scoreEp += reward
                       
                if i > limit:
                    print('Limit Reached!')
                    limitReached = True
                
                # animate the winners to see what they are doing
                if isDone:
                    print('win!')
                    animate_path(states)
                
                if isDone or limitReached:
                    break
                    
            print('Agent #' + str(agent.agentNum) + 
              ' | Ep #' + str(ep) + ' | Score: ' + str(scoreEp))
            
        scoreTotal += scoreEp
    scoreTotal /= numEpisodes
    env.close()
    agent.reward(scoreTotal, envName)
    scoreList.append((agent.team.id, agent.team.outcomes))

### All Together Now!

In [12]:
acts = 4
# pAtomic equal to 1 so teams are single nodes
    # starting off, teams don't reference teams
trainer = Trainer(actions=acts, teamPopSize=10, pActAtom=1.0)

envName = 'GridWorldv1'
allScores = []
for gen in range(100):
    scoreList = []

    agents = trainer.getAgents()

    for agent in agents:
        runAgent([agent, envName, scoreList, 10, 1000, gen])

    # apply scores, must do this when multiprocessing
    # because agents can't refer to trainer
    teams = trainer.applyScores(scoreList)

    trainer.evolve(tasks=[envName]) # go into next gen

    # an easier way to track stats than the above example
    scoreStats = trainer.fitnessStats

    allScores.append((scoreStats['min'], scoreStats['max'], scoreStats['average']))

    print('Gen: ' + str(gen))
    print('Results so far: ' + str(allScores))

print('Results:\nMin, Max, Avg')
for score in allScores:
    print(score[0],score[1],score[2])

31
[[0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [5. 5. 5. 5. 5. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0. 0. 0. 0. 0.]]
Agent #5 | Ep #5 | Score: 99.67999999999999
Agent #5 | Ep #6 | Score: -83.03000000000058
Agent #5 | Ep #7 | Score: -83.0500000000006
Agent #5 | Ep #8 | Score: -82.98000000000056
Agent #5 | Ep #9 | Score: -85.22000000000068
Agent #6 | Ep #0 | Score: -85.12000000000064
Agent #6 | Ep #1 | Score: -82.96000000000056
Agent #6 | Ep #2 | Score: -77.10000000000026
Agent #6 | Ep #3 | Score: -86.09000000000076
Agent #6 | Ep #4 | Score: -85.21000000000068
Agent #6 | Ep #5 | Score: -85.22000000000068
Agent #6 | Ep #6 | Score: -82.05000000000058
Agent #6 | Ep #7 | Score: -83.18000000000065
Agent #6 | Ep #8 | Score: -85.03000000000064
Agent #6 | Ep #9 | Score: -82.07000000

Agent #5 | Ep #6 | Score: -84.22000000000068
Agent #5 | Ep #7 | Score: -81.98000000000056
Agent #5 | Ep #8 | Score: -82.1300000000006
Agent #5 | Ep #9 | Score: -84.0500000000006
Agent #6 | Ep #0 | Score: -82.11000000000061
Agent #6 | Ep #1 | Score: -85.06000000000067
Agent #6 | Ep #2 | Score: -84.94000000000065
Agent #6 | Ep #3 | Score: -83.16000000000062
Agent #6 | Ep #4 | Score: -83.16000000000064
Agent #6 | Ep #5 | Score: -82.1000000000006
Agent #6 | Ep #6 | Score: -79.08000000000035
Agent #6 | Ep #7 | Score: -84.13000000000063
Agent #6 | Ep #8 | Score: -84.05000000000062
Agent #6 | Ep #9 | Score: -82.0700000000006
Agent #7 | Ep #0 | Score: -971.01
Agent #7 | Ep #1 | Score: -995
Agent #7 | Ep #2 | Score: -997
Agent #7 | Ep #3 | Score: -974.02
Agent #7 | Ep #4 | Score: -975.02
Agent #7 | Ep #5 | Score: -986.01
Agent #7 | Ep #6 | Score: -990
Agent #7 | Ep #7 | Score: -986.03
Agent #7 | Ep #8 | Score: -984.03
Agent #7 | Ep #9 | Score: -976
Agent #8 | Ep #0 | Score: -85.16000000000066
A

Agent #7 | Ep #5 | Score: -85.10000000000068
Agent #7 | Ep #6 | Score: -83.01000000000056
Agent #7 | Ep #7 | Score: -84.15000000000063
Agent #7 | Ep #8 | Score: -84.04000000000059
Agent #7 | Ep #9 | Score: -84.22000000000068
Agent #8 | Ep #0 | Score: -77.13000000000024
Agent #8 | Ep #1 | Score: -83.23000000000067
Agent #8 | Ep #2 | Score: -85.05000000000067
Agent #8 | Ep #3 | Score: -84.06000000000064
Agent #8 | Ep #4 | Score: -86.03000000000073
Agent #8 | Ep #5 | Score: -84.25000000000068
Agent #8 | Ep #6 | Score: -84.17000000000066
Agent #8 | Ep #7 | Score: -79.00000000000036
Agent #8 | Ep #8 | Score: -83.10000000000062
Agent #8 | Ep #9 | Score: -84.11000000000062
Agent #9 | Ep #0 | Score: -999
Agent #9 | Ep #1 | Score: -991.02
Agent #9 | Ep #2 | Score: -997.01
Agent #9 | Ep #3 | Score: -968.02
Agent #9 | Ep #4 | Score: -980
Agent #9 | Ep #5 | Score: -974.02
Agent #9 | Ep #6 | Score: -974.03
Agent #9 | Ep #7 | Score: -993
Agent #9 | Ep #8 | Score: -981.01
Agent #9 | Ep #9 | Score: -9

Agent #8 | Ep #4 | Score: -972.03
Agent #8 | Ep #5 | Score: -1000
Agent #8 | Ep #6 | Score: -978.03
Agent #8 | Ep #7 | Score: -977
Agent #8 | Ep #8 | Score: -992.01
Agent #8 | Ep #9 | Score: -973.03
Agent #9 | Ep #0 | Score: -955.1700000000001
Agent #9 | Ep #1 | Score: -973.14
Agent #9 | Ep #2 | Score: -980.16
Agent #9 | Ep #3 | Score: -978.1700000000001
Agent #9 | Ep #4 | Score: -965.12
Agent #9 | Ep #5 | Score: -954.16
Agent #9 | Ep #6 | Score: -972.1700000000001
Agent #9 | Ep #7 | Score: -967.15
Agent #9 | Ep #8 | Score: -978.1800000000001
Agent #9 | Ep #9 | Score: -980.1700000000001
Adding dffdec26-861c-4cb4-9444-a69599b68a42 to trainer learners
Adding 55dc8f0c-8a16-4293-8cdf-ce97651f4c21 to trainer learners
Adding afbc768a-d829-41e0-bed3-e2ef99dcb053 to trainer learners
Adding dbd1faf9-36bb-48d8-b59f-b67d6ff1d7c0 to trainer learners
Adding ea142b59-808e-4bb2-9712-23e423b21dd2 to trainer learners
Adding dc802921-dcd8-48f9-924e-5f623d4cdc1d to trainer learners
Adding 59fb3c5c-234b-4

Agent #5 | Ep #0 | Score: -85.14000000000064
Agent #5 | Ep #1 | Score: -84.11000000000064
Agent #5 | Ep #2 | Score: -83.05000000000061
Agent #5 | Ep #3 | Score: -84.20000000000067
Agent #5 | Ep #4 | Score: -83.98000000000062
Agent #5 | Ep #5 | Score: -84.94000000000065
Agent #5 | Ep #6 | Score: -85.11000000000065
Agent #5 | Ep #7 | Score: -83.08000000000061
Agent #5 | Ep #8 | Score: -84.07000000000059
Agent #5 | Ep #9 | Score: -84.14000000000064
Agent #6 | Ep #0 | Score: -956.14
Agent #6 | Ep #1 | Score: -980.16
Agent #6 | Ep #2 | Score: -960.14
Agent #6 | Ep #3 | Score: -969.11
Agent #6 | Ep #4 | Score: -964.0699999999999
Agent #6 | Ep #5 | Score: -966.0799999999999
Agent #6 | Ep #6 | Score: -969.12
Agent #6 | Ep #7 | Score: -962.15
Agent #6 | Ep #8 | Score: -959.13
Agent #6 | Ep #9 | Score: -980.16
Agent #7 | Ep #0 | Score: -986.03
Agent #7 | Ep #1 | Score: -979.03
Agent #7 | Ep #2 | Score: -976.03
Agent #7 | Ep #3 | Score: -974
Agent #7 | Ep #4 | Score: -989.02
Agent #7 | Ep #5 | Sc

Agent #5 | Ep #2 | Score: -84.04000000000063
Agent #5 | Ep #3 | Score: -84.16000000000065
Agent #5 | Ep #4 | Score: -83.26000000000067
Agent #5 | Ep #5 | Score: -85.1300000000007
Agent #5 | Ep #6 | Score: -84.19000000000068
Agent #5 | Ep #7 | Score: -85.01000000000069
Agent #5 | Ep #8 | Score: -85.13000000000068
Agent #5 | Ep #9 | Score: -82.0600000000006
Agent #6 | Ep #0 | Score: -958.16
Agent #6 | Ep #1 | Score: -955.16
Agent #6 | Ep #2 | Score: -962.16
Agent #6 | Ep #3 | Score: -963.15
Agent #6 | Ep #4 | Score: -969.13
Agent #6 | Ep #5 | Score: -970.16
Agent #6 | Ep #6 | Score: -966.14
Agent #6 | Ep #7 | Score: -973.1800000000001
Agent #6 | Ep #8 | Score: -957.1700000000001
Agent #6 | Ep #9 | Score: -972.13
Agent #7 | Ep #0 | Score: -78.11000000000028
Agent #7 | Ep #1 | Score: -81.01000000000056
Agent #7 | Ep #2 | Score: -84.15000000000066
Agent #7 | Ep #3 | Score: -76.06000000000022
Agent #7 | Ep #4 | Score: -84.21000000000069
Agent #7 | Ep #5 | Score: -83.09000000000061
Agent #7 |

Agent #6 | Ep #1 | Score: -661.159999999998
Agent #6 | Ep #2 | Score: -678.1399999999978
Agent #6 | Ep #3 | Score: -673.129999999998
Agent #6 | Ep #4 | Score: -672.1699999999979
Agent #6 | Ep #5 | Score: -666.1499999999979
Agent #6 | Ep #6 | Score: -677.1599999999979
Agent #6 | Ep #7 | Score: -668.189999999998
Agent #6 | Ep #8 | Score: -666.119999999998
Agent #6 | Ep #9 | Score: -679.239999999998
Agent #7 | Ep #0 | Score: -962.0799999999999
Agent #7 | Ep #1 | Score: -976.09
Agent #7 | Ep #2 | Score: -991.09
Agent #7 | Ep #3 | Score: -972.09
Agent #7 | Ep #4 | Score: -985.04
Agent #7 | Ep #5 | Score: -981.0699999999999
Agent #7 | Ep #6 | Score: -986.06
Agent #7 | Ep #7 | Score: -966.05
Agent #7 | Ep #8 | Score: -968.05
Agent #7 | Ep #9 | Score: -974.09
Agent #8 | Ep #0 | Score: -982.01
Agent #8 | Ep #1 | Score: -971
Agent #8 | Ep #2 | Score: -973.02
Agent #8 | Ep #3 | Score: -976.02
Agent #8 | Ep #4 | Score: -980
Agent #8 | Ep #5 | Score: -982.02
Agent #8 | Ep #6 | Score: -969.02
Agent 

Agent #6 | Ep #8 | Score: -973.13
Agent #6 | Ep #9 | Score: -958.13
Agent #7 | Ep #0 | Score: -957.15
Agent #7 | Ep #1 | Score: -970.16
Agent #7 | Ep #2 | Score: -963.15
Agent #7 | Ep #3 | Score: -966.16
Agent #7 | Ep #4 | Score: -977.1700000000001
Agent #7 | Ep #5 | Score: -969.15
Agent #7 | Ep #6 | Score: -977.1700000000001
Agent #7 | Ep #7 | Score: -966.15
Agent #7 | Ep #8 | Score: -977.14
Agent #7 | Ep #9 | Score: -965.15
Agent #8 | Ep #0 | Score: -969.01
Agent #8 | Ep #1 | Score: -1000
Agent #8 | Ep #2 | Score: -999
Agent #8 | Ep #3 | Score: -993.02
Agent #8 | Ep #4 | Score: -980.03
Agent #8 | Ep #5 | Score: -972.01
Agent #8 | Ep #6 | Score: -975.01
Agent #8 | Ep #7 | Score: -984.01
Agent #8 | Ep #8 | Score: -971
Agent #8 | Ep #9 | Score: -971.01
Agent #9 | Ep #0 | Score: -968.1700000000001
Agent #9 | Ep #1 | Score: -959.1700000000001
Agent #9 | Ep #2 | Score: -968.16
Agent #9 | Ep #3 | Score: -976.1700000000001
Agent #9 | Ep #4 | Score: -962.1
Agent #9 | Ep #5 | Score: -961.16
Ag

Agent #6 | Ep #8 | Score: -974.14
Agent #6 | Ep #9 | Score: -978.1700000000001
Agent #7 | Ep #0 | Score: -977.01
Agent #7 | Ep #1 | Score: -986.01
Agent #7 | Ep #2 | Score: -974.01
Agent #7 | Ep #3 | Score: -975.01
Agent #7 | Ep #4 | Score: -980.02
Agent #7 | Ep #5 | Score: -992.02
Agent #7 | Ep #6 | Score: -968.03
Agent #7 | Ep #7 | Score: -983.01
Agent #7 | Ep #8 | Score: -993.01
Agent #7 | Ep #9 | Score: -996
Agent #8 | Ep #0 | Score: -85.0100000000007
Agent #8 | Ep #1 | Score: -85.18000000000066
Agent #8 | Ep #2 | Score: -82.0900000000006
Agent #8 | Ep #3 | Score: -85.1300000000007
Agent #8 | Ep #4 | Score: -83.02000000000059
Agent #8 | Ep #5 | Score: -84.24000000000069
Agent #8 | Ep #6 | Score: -83.02000000000058
Agent #8 | Ep #7 | Score: -85.21000000000069
Agent #8 | Ep #8 | Score: -84.12000000000062
Agent #8 | Ep #9 | Score: -84.13000000000063
Agent #9 | Ep #0 | Score: -997.01
Agent #9 | Ep #1 | Score: -980.03
Agent #9 | Ep #2 | Score: -992
Agent #9 | Ep #3 | Score: -988
Agent #

Agent #7 | Ep #3 | Score: -83.09000000000063
Agent #7 | Ep #4 | Score: -85.22000000000068
Agent #7 | Ep #5 | Score: -82.1000000000006
Agent #7 | Ep #6 | Score: -84.08000000000061
Agent #7 | Ep #7 | Score: -85.22000000000068
Agent #7 | Ep #8 | Score: -82.05000000000058
Agent #7 | Ep #9 | Score: -85.18000000000069
Agent #8 | Ep #0 | Score: -985.03
Agent #8 | Ep #1 | Score: -970.03
Agent #8 | Ep #2 | Score: -981.02
Agent #8 | Ep #3 | Score: -969.02
Agent #8 | Ep #4 | Score: -976.01
Agent #8 | Ep #5 | Score: -985.03
Agent #8 | Ep #6 | Score: -975.03
Agent #8 | Ep #7 | Score: -972
Agent #8 | Ep #8 | Score: -978.02
Agent #8 | Ep #9 | Score: -996
Agent #9 | Ep #0 | Score: -963.13
Agent #9 | Ep #1 | Score: -979.14
Agent #9 | Ep #2 | Score: -969.13
Agent #9 | Ep #3 | Score: -961.12
Agent #9 | Ep #4 | Score: -976.16
Agent #9 | Ep #5 | Score: -982.1800000000001
Agent #9 | Ep #6 | Score: -981.1700000000001
Agent #9 | Ep #7 | Score: -961.1800000000001
Agent #9 | Ep #8 | Score: -957.15
Agent #9 | Ep

Agent #8 | Ep #3 | Score: -666.089999999998
Agent #8 | Ep #4 | Score: -671.129999999998
Agent #8 | Ep #5 | Score: -671.1699999999979
Agent #8 | Ep #6 | Score: -678.209999999998
Agent #8 | Ep #7 | Score: -672.119999999998
Agent #8 | Ep #8 | Score: -667.119999999998
Agent #8 | Ep #9 | Score: -678.1799999999979
Agent #9 | Ep #0 | Score: -82.03000000000058
Agent #9 | Ep #1 | Score: -83.14000000000065
Agent #9 | Ep #2 | Score: -86.12000000000076
Agent #9 | Ep #3 | Score: -84.18000000000067
Agent #9 | Ep #4 | Score: -84.18000000000066
Agent #9 | Ep #5 | Score: -84.11000000000062
Agent #9 | Ep #6 | Score: -84.25000000000068
Agent #9 | Ep #7 | Score: -82.03000000000057
Agent #9 | Ep #8 | Score: -84.22000000000067
Agent #9 | Ep #9 | Score: -83.95000000000063
Adding 3f4539c4-e346-4bed-a385-f7cb0e1904e4 to trainer learners
Adding 18ad3cfc-a4df-4102-8d5d-0d029e3c7a33 to trainer learners
Adding b5303f59-452f-4997-94a0-6d079e287f3c to trainer learners
Adding 0a7f2b09-f277-4916-89c1-f75a21f193c0 to t

Agent #8 | Ep #4 | Score: -80.98000000000053
Agent #8 | Ep #5 | Score: -86.05000000000071
Agent #8 | Ep #6 | Score: -83.25000000000068
Agent #8 | Ep #7 | Score: -85.00000000000064
Agent #8 | Ep #8 | Score: -83.16000000000062
Agent #8 | Ep #9 | Score: -82.05000000000058
Agent #9 | Ep #0 | Score: -83.00000000000057
Agent #9 | Ep #1 | Score: -85.24000000000069
Agent #9 | Ep #2 | Score: -84.20000000000067
Agent #9 | Ep #3 | Score: -83.03000000000058
Agent #9 | Ep #4 | Score: -81.00000000000054
Agent #9 | Ep #5 | Score: -84.10000000000062
Agent #9 | Ep #6 | Score: -85.21000000000068
Agent #9 | Ep #7 | Score: -84.08000000000064
Agent #9 | Ep #8 | Score: -85.24000000000069
Agent #9 | Ep #9 | Score: -84.2300000000007
Adding 9e0d4911-8ea7-4e38-a18e-af5c0cfaa1a1 to trainer learners
Adding 185c50a8-2b7e-4827-be3a-e8835311de25 to trainer learners
Adding 68863215-c422-4839-a877-ca357c4d8c24 to trainer learners
Adding 6c3a2b6a-5133-4f0b-b753-b6fa2a42ad09 to trainer learners
Adding 345493ef-7f95-4408

Agent #7 | Ep #2 | Score: -83.21000000000065
Agent #7 | Ep #3 | Score: -84.18000000000066
Agent #7 | Ep #4 | Score: -82.00000000000058
Agent #7 | Ep #5 | Score: -85.16000000000065
Agent #7 | Ep #6 | Score: -83.19000000000065
Agent #7 | Ep #7 | Score: -84.18000000000066
Agent #7 | Ep #8 | Score: -83.98000000000063
Agent #7 | Ep #9 | Score: -86.13000000000076
Agent #8 | Ep #0 | Score: -981.16
Agent #8 | Ep #1 | Score: -967.1800000000001
Agent #8 | Ep #2 | Score: -957.14
Agent #8 | Ep #3 | Score: -971.14
Agent #8 | Ep #4 | Score: -969.15
Agent #8 | Ep #5 | Score: -980.1700000000001
Agent #8 | Ep #6 | Score: -969.13
Agent #8 | Ep #7 | Score: -974.14
Agent #8 | Ep #8 | Score: -963.1
Agent #8 | Ep #9 | Score: -972.15
Agent #9 | Ep #0 | Score: -973.01
Agent #9 | Ep #1 | Score: -980.01
Agent #9 | Ep #2 | Score: -975.01
Agent #9 | Ep #3 | Score: -989
Agent #9 | Ep #4 | Score: -988
Agent #9 | Ep #5 | Score: -991
Agent #9 | Ep #6 | Score: -975.03
Agent #9 | Ep #7 | Score: -986.01
Agent #9 | Ep #8

Agent #8 | Ep #1 | Score: -84.0600000000006
Agent #8 | Ep #2 | Score: -84.0100000000006
Agent #8 | Ep #3 | Score: -77.17000000000021
Agent #8 | Ep #4 | Score: -80.98000000000053
Agent #8 | Ep #5 | Score: -85.06000000000066
Agent #8 | Ep #6 | Score: -82.06000000000058
Agent #8 | Ep #7 | Score: -82.97000000000055
Agent #8 | Ep #8 | Score: -82.11000000000061
Agent #8 | Ep #9 | Score: -77.09000000000022
Agent #9 | Ep #0 | Score: -330.70999999999697
Agent #9 | Ep #1 | Score: -323.5699999999971
Agent #9 | Ep #2 | Score: -326.63999999999703
Agent #9 | Ep #3 | Score: -325.62999999999704
Agent #9 | Ep #4 | Score: -326.659999999997
Agent #9 | Ep #5 | Score: -330.58999999999696
Agent #9 | Ep #6 | Score: -324.5799999999971
Agent #9 | Ep #7 | Score: -327.599999999997
Agent #9 | Ep #8 | Score: -326.549999999997
Agent #9 | Ep #9 | Score: -331.73999999999694
Adding 3d49d07d-7fe5-46cb-9347-0cf84e2df1c2 to trainer learners
Adding 5dee8916-19a3-465f-af12-7aba0ab67ecc to trainer learners
Adding 4aeefe70-8

Agent #7 | Ep #8 | Score: -977.16
Agent #7 | Ep #9 | Score: -963.1700000000001
Agent #8 | Ep #0 | Score: -976.02
Agent #8 | Ep #1 | Score: -993.01
Agent #8 | Ep #2 | Score: -993.01
Agent #8 | Ep #3 | Score: -988
Agent #8 | Ep #4 | Score: -984.01
Agent #8 | Ep #5 | Score: -976
Agent #8 | Ep #6 | Score: -985
Agent #8 | Ep #7 | Score: -971
Agent #8 | Ep #8 | Score: -989.03
Agent #8 | Ep #9 | Score: -1000
Agent #9 | Ep #0 | Score: -77.17000000000026
Agent #9 | Ep #1 | Score: -84.17000000000064
Agent #9 | Ep #2 | Score: -84.20000000000068
Agent #9 | Ep #3 | Score: -83.10000000000062
Agent #9 | Ep #4 | Score: -83.12000000000062
Agent #9 | Ep #5 | Score: -83.13000000000063
Agent #9 | Ep #6 | Score: -83.17000000000063
Agent #9 | Ep #7 | Score: -85.11000000000065
Agent #9 | Ep #8 | Score: -84.08000000000062
Agent #9 | Ep #9 | Score: -84.13000000000063
Adding 473fe39e-c396-4890-a448-b62ea7eee16b to trainer learners
Adding 895ae43b-57dc-4b13-b277-9b251abbd444 to trainer learners
Adding 7ab669c3-a

Agent #7 | Ep #5 | Score: -979.02
Agent #7 | Ep #6 | Score: -996.01
Agent #7 | Ep #7 | Score: -994
Agent #7 | Ep #8 | Score: -977.01
Agent #7 | Ep #9 | Score: -990.02
Agent #8 | Ep #0 | Score: -83.09000000000063
Agent #8 | Ep #1 | Score: -82.99000000000056
Agent #8 | Ep #2 | Score: -83.04000000000059
Agent #8 | Ep #3 | Score: -84.08000000000061
Agent #8 | Ep #4 | Score: -82.06000000000057
Agent #8 | Ep #5 | Score: -84.21000000000068
Agent #8 | Ep #6 | Score: -76.1900000000002
Agent #8 | Ep #7 | Score: -84.0600000000006
Agent #8 | Ep #8 | Score: -82.96000000000058
Agent #8 | Ep #9 | Score: -83.11000000000064
Agent #9 | Ep #0 | Score: -83.10000000000062
Agent #9 | Ep #1 | Score: -82.01000000000056
Agent #9 | Ep #2 | Score: -84.04000000000062
Agent #9 | Ep #3 | Score: -85.02000000000069
Agent #9 | Ep #4 | Score: -85.02000000000064
Agent #9 | Ep #5 | Score: -82.1300000000006
Agent #9 | Ep #6 | Score: -84.08000000000061
Agent #9 | Ep #7 | Score: -82.08000000000061
Agent #9 | Ep #8 | Score: 

Agent #6 | Ep #6 | Score: -82.00000000000058
Agent #6 | Ep #7 | Score: -83.15000000000065
Agent #6 | Ep #8 | Score: -82.0600000000006
Agent #6 | Ep #9 | Score: -85.15000000000066
Agent #7 | Ep #0 | Score: -84.15000000000065
Agent #7 | Ep #1 | Score: -83.0500000000006
Agent #7 | Ep #2 | Score: -84.21000000000068
Agent #7 | Ep #3 | Score: -85.00000000000065
Agent #7 | Ep #4 | Score: -83.13000000000062
Agent #7 | Ep #5 | Score: -83.18000000000065
Agent #7 | Ep #6 | Score: -84.10000000000062
Agent #7 | Ep #7 | Score: -84.0200000000006
Agent #7 | Ep #8 | Score: -84.0700000000006
Agent #7 | Ep #9 | Score: -83.12000000000063
Agent #8 | Ep #0 | Score: -995
Agent #8 | Ep #1 | Score: -968.02
Agent #8 | Ep #2 | Score: -978
Agent #8 | Ep #3 | Score: -971.02
Agent #8 | Ep #4 | Score: -979.01
Agent #8 | Ep #5 | Score: -972
Agent #8 | Ep #6 | Score: -977
Agent #8 | Ep #7 | Score: -980.02
Agent #8 | Ep #8 | Score: -994.02
Agent #8 | Ep #9 | Score: -986.01
Agent #9 | Ep #0 | Score: -83.95000000000061
A

Agent #5 | Ep #6 | Score: -979.09
Agent #5 | Ep #7 | Score: -965.0799999999999
Agent #5 | Ep #8 | Score: -982.05
Agent #5 | Ep #9 | Score: -990.09
Agent #6 | Ep #0 | Score: -82.0900000000006
Agent #6 | Ep #1 | Score: -84.02000000000059
Agent #6 | Ep #2 | Score: -84.25000000000068
Agent #6 | Ep #3 | Score: -84.22000000000068
Agent #6 | Ep #4 | Score: -84.2200000000007
Agent #6 | Ep #5 | Score: -83.0700000000006
Agent #6 | Ep #6 | Score: -84.04000000000059
Agent #6 | Ep #7 | Score: -84.03000000000061
Agent #6 | Ep #8 | Score: -82.06000000000058
Agent #6 | Ep #9 | Score: -85.05000000000065
Agent #7 | Ep #0 | Score: -80.99000000000055
Agent #7 | Ep #1 | Score: -84.0700000000006
Agent #7 | Ep #2 | Score: -85.22000000000068
Agent #7 | Ep #3 | Score: -83.0500000000006
Agent #7 | Ep #4 | Score: -83.19000000000067
Agent #7 | Ep #5 | Score: -84.14000000000064
Agent #7 | Ep #6 | Score: -83.17000000000063
Agent #7 | Ep #7 | Score: -80.99000000000055
Agent #7 | Ep #8 | Score: -85.08000000000068
Age

Agent #5 | Ep #0 | Score: -972.1800000000001
Agent #5 | Ep #1 | Score: -956.16
Agent #5 | Ep #2 | Score: -980.16
Agent #5 | Ep #3 | Score: -957.16
Agent #5 | Ep #4 | Score: -964.16
Agent #5 | Ep #5 | Score: -960.16
Agent #5 | Ep #6 | Score: -968.1700000000001
Agent #5 | Ep #7 | Score: -969.14
Agent #5 | Ep #8 | Score: -955.1700000000001
Agent #5 | Ep #9 | Score: -953.1700000000001
Agent #6 | Ep #0 | Score: -976.02
Agent #6 | Ep #1 | Score: -975.02
Agent #6 | Ep #2 | Score: -989.02
Agent #6 | Ep #3 | Score: -975.03
Agent #6 | Ep #4 | Score: -990.03
Agent #6 | Ep #5 | Score: -1000
Agent #6 | Ep #6 | Score: -976
Agent #6 | Ep #7 | Score: -993
Agent #6 | Ep #8 | Score: -984.01
Agent #6 | Ep #9 | Score: -979
Agent #7 | Ep #0 | Score: -86.1400000000007
Agent #7 | Ep #1 | Score: -84.16000000000065
Agent #7 | Ep #2 | Score: -83.03000000000058
Agent #7 | Ep #3 | Score: -85.07000000000065
Agent #7 | Ep #4 | Score: -83.08000000000061
Agent #7 | Ep #5 | Score: -84.08000000000061
Agent #7 | Ep #6 |

Agent #5 | Ep #0 | Score: -998
Agent #5 | Ep #1 | Score: -980
Agent #5 | Ep #2 | Score: -974.02
Agent #5 | Ep #3 | Score: -977
Agent #5 | Ep #4 | Score: -983.03
Agent #5 | Ep #5 | Score: -978.03
Agent #5 | Ep #6 | Score: -969.02
Agent #5 | Ep #7 | Score: -992
Agent #5 | Ep #8 | Score: -993
Agent #5 | Ep #9 | Score: -991.01
Agent #6 | Ep #0 | Score: -975.1800000000001
Agent #6 | Ep #1 | Score: -979.1700000000001
Agent #6 | Ep #2 | Score: -977.15
Agent #6 | Ep #3 | Score: -982.1700000000001
Agent #6 | Ep #4 | Score: -973.1700000000001
Agent #6 | Ep #5 | Score: -975.11
Agent #6 | Ep #6 | Score: -966.14
Agent #6 | Ep #7 | Score: -981.1700000000001
Agent #6 | Ep #8 | Score: -961.16
Agent #6 | Ep #9 | Score: -971.1700000000001
Agent #7 | Ep #0 | Score: -83.99000000000062
Agent #7 | Ep #1 | Score: -82.01000000000057
Agent #7 | Ep #2 | Score: -84.23000000000069
Agent #7 | Ep #3 | Score: -83.02000000000058
Agent #7 | Ep #4 | Score: -84.03000000000061
Agent #7 | Ep #5 | Score: -83.9900000000006


Agent #5 | Ep #0 | Score: -664.099999999998
Agent #5 | Ep #1 | Score: -676.1999999999979
Agent #5 | Ep #2 | Score: -678.1999999999979
Agent #5 | Ep #3 | Score: -667.079999999998
Agent #5 | Ep #4 | Score: -678.2199999999979
Agent #5 | Ep #5 | Score: -665.1499999999979
Agent #5 | Ep #6 | Score: -676.1899999999979
Agent #5 | Ep #7 | Score: -668.1499999999979
Agent #5 | Ep #8 | Score: -675.1699999999979
Agent #5 | Ep #9 | Score: -675.1799999999979
Agent #6 | Ep #0 | Score: -970.0799999999999
Agent #6 | Ep #1 | Score: -974.04
Agent #6 | Ep #2 | Score: -976.01
Agent #6 | Ep #3 | Score: -980.04
Agent #6 | Ep #4 | Score: -991.0699999999999
Agent #6 | Ep #5 | Score: -966.09
Agent #6 | Ep #6 | Score: -972.0799999999999
Agent #6 | Ep #7 | Score: -987.09
Agent #6 | Ep #8 | Score: -990.09
Agent #6 | Ep #9 | Score: -975.03
Agent #7 | Ep #0 | Score: -81.99000000000056
Agent #7 | Ep #1 | Score: -82.96000000000055
Agent #7 | Ep #2 | Score: -86.09000000000073
Agent #7 | Ep #3 | Score: -84.14000000000064

Agent #5 | Ep #0 | Score: -85.24000000000069
Agent #5 | Ep #1 | Score: -81.97000000000054
Agent #5 | Ep #2 | Score: -84.07000000000062
Agent #5 | Ep #3 | Score: -83.19000000000067
Agent #5 | Ep #4 | Score: -83.18000000000063
Agent #5 | Ep #5 | Score: -82.0900000000006
Agent #5 | Ep #6 | Score: -84.04000000000059
Agent #5 | Ep #7 | Score: -77.12000000000025
Agent #5 | Ep #8 | Score: -83.0700000000006
Agent #5 | Ep #9 | Score: -85.08000000000068
Agent #6 | Ep #0 | Score: -977.03
Agent #6 | Ep #1 | Score: -976
Agent #6 | Ep #2 | Score: -999
Agent #6 | Ep #3 | Score: -988
Agent #6 | Ep #4 | Score: -985.02
Agent #6 | Ep #5 | Score: -970
Agent #6 | Ep #6 | Score: -980.02
Agent #6 | Ep #7 | Score: -974.04
Agent #6 | Ep #8 | Score: -990.02
Agent #6 | Ep #9 | Score: -969.01
Agent #7 | Ep #0 | Score: -1000
Agent #7 | Ep #1 | Score: -969.01
Agent #7 | Ep #2 | Score: -990.01
Agent #7 | Ep #3 | Score: -994
Agent #7 | Ep #4 | Score: -968.0699999999999
Agent #7 | Ep #5 | Score: -993.02
Agent #7 | Ep 