In [1]:
import numpy as np
import sys
from six import StringIO

from gym import spaces, utils
from gym.envs.toy_text import discrete

In [2]:
MAP = [
    "+---------+",
    "|R: | : :G|",
    "| : : : : |",
    "| : : : : |",
    "| | : | : |",
    "|Y| : |B: |",
    "+---------+",
]

In [3]:
class TaxiEnv(discrete.DiscreteEnv):
    """
    The Taxi Problem
    from "Hierarchical Reinforcement Learning with the MAXQ Value Function Decomposition"
    by Tom Dietterich

    rendering:
    - blue: passenger
    - magenta: destination
    - yellow: empty taxi
    - green: full taxi
    - other letters: locations

    """
    metadata = {'render.modes': ['human', 'ansi']}

    def __init__(self):
        self.desc = np.asarray(MAP,dtype='c')

        self.locs = locs = [(0,0), (0,4), (4,0), (4,3)]

        nS = 500
        nR = 5 # rows number
        nC = 5 # columns number
        maxR = nR - 1
        maxC = nC - 1
        isd = np.zeros(nS)
        nA = 6
        P = {s : {a : [] for a in range(nA)} for s in range(nS)}
        for row in range(5):
            for col in range(5):
                for passidx in range(5):
                    for destidx in range(4):
                        state = self.encode(row, col, passidx, destidx)
                        if passidx < 4 and passidx != destidx:
                            isd[state] += 1
                        for a in range(nA):
                            # defaults
                            newrow, newcol, newpassidx = row, col, passidx
                            reward = 0
                            done = False
                            taxiloc = (row, col)

                            if a==0:
                                newrow = min(row+1, maxR)
                            elif a==1:
                                newrow = max(row-1, 0)
                            if a==2 and self.desc[1+row,2*col+2]==b":":
                                newcol = min(col+1, maxC)
                            elif a==3 and self.desc[1+row,2*col]==b":":
                                newcol = max(col-1, 0)
                            elif a==4: # pickup
                                if (passidx < 4 and taxiloc == locs[passidx]):
                                    newpassidx = 4
                                    reward = 10
#                                 else:
#                                     reward = -10
                            elif a==5: # dropoff
                                if (taxiloc == locs[destidx]) and passidx==4:
                                    done = True
                                    reward = 2000
                                elif (taxiloc in locs) and passidx==4:
                                    newpassidx = locs.index(taxiloc)
                                    reward = -10
#                                 else:
#                                     reward = -10
                            newstate = self.encode(newrow, newcol, newpassidx, destidx)
                            P[state][a].append((1.0, newstate, reward, done))
        isd /= isd.sum()
        discrete.DiscreteEnv.__init__(self, nS, nA, P, isd)

    def encode(self, taxirow, taxicol, passloc, destidx):
        # (5) 5, 5, 4
        i = taxirow
        i *= 5
        i += taxicol
        i *= 5
        i += passloc
        i *= 4
        i += destidx
        return i

    def decode(self, i):
        out = []
        out.append(i % 4)
        i = i // 4
        out.append(i % 5)
        i = i // 5
        out.append(i % 5)
        i = i // 5
        out.append(i)
        assert 0 <= i < 5
        return reversed(out)

    def _render(self, mode='human', close=False):
        if close:
            return

        outfile = StringIO() if mode == 'ansi' else sys.stdout

        out = self.desc.copy().tolist()
        out = [[c.decode('utf-8') for c in line] for line in out]
        taxirow, taxicol, passidx, destidx = self.decode(self.s)
        def ul(x): return "_" if x == " " else x
        if passidx < 4:
            out[1+taxirow][2*taxicol+1] = utils.colorize(out[1+taxirow][2*taxicol+1], 'yellow', highlight=True)
            pi, pj = self.locs[passidx]
            out[1+pi][2*pj+1] = utils.colorize(out[1+pi][2*pj+1], 'blue', bold=True)
        else: # passenger in taxi
            out[1+taxirow][2*taxicol+1] = utils.colorize(ul(out[1+taxirow][2*taxicol+1]), 'green', highlight=True)

        di, dj = self.locs[destidx]
        out[1+di][2*dj+1] = utils.colorize(out[1+di][2*dj+1], 'magenta')
        outfile.write("\n".join(["".join(row) for row in out])+"\n")
        if self.lastaction is not None:
            outfile.write("  ({})\n".format(["South", "North", "East", "West", "Pickup", "Dropoff"][self.lastaction]))
        else: outfile.write("\n")

        # No need to return anything for human
        if mode != 'human':
            return outfile

In [4]:
taxiEnv = TaxiEnv()

In [5]:
taxiEnv.reset()

88

In [6]:
taxiEnv.render()

+---------+
|[35mR[0m: | : :[43mG[0m|
| : : : : |
| : : : : |
| | : | : |
|[34;1mY[0m| : |B: |
+---------+



# Genetic algorithm

In [7]:
n_states = taxiEnv.observation_space.n
n_actions = taxiEnv.action_space.n

In [8]:
print(n_states, n_actions)

500 6


In [9]:
action_to_i = {
    'up':0,
    'down':1,
    'right':2,
    'up':3,
    'pick up':4,
    'drop off':5
}

In [10]:
def get_random_policy():
    """
    Build a numpy array representing agent policy.
    """
    return np.random.randint(0, n_actions, n_states)

In [11]:
def sample_reward(env, policy, t_max=100):
    """
    Interact with an environment, return sum of all rewards.
    If game doesn't end on t_max (e.g. agent walks into a wall), 
    force end the game and return whatever reward you got so far.
    Tip: see signature of env.step(...) method above.
    """
    state = env.reset()
    total_reward = 0
    
    for t in range(t_max):
        action = policy[state]
        new_state, reward, is_done, info = env.step(action)
        state = new_state
        total_reward += reward
        if is_done:
            return total_reward
    return total_reward

In [12]:
def evaluate(env, policy, n_times=20):
    """Run several evaluations and average the score the policy gets."""
    rewards = []
    for _ in range(n_times):
        rewards.append(sample_reward(env, policy))
    return float(np.mean(np.array(rewards)))  

In [13]:
import random

def crossover(policy1, policy2, p=0.5):
    """
    for each state, with probability p take action from policy1, else policy2
    """
    result_policy = []
    for i in range(len(policy1)):
        if random.random() < p:
            result_policy.append(policy1[i])
        else:
            result_policy.append(policy2[i])
    return result_policy

In [14]:
def mutation(policy, p=0.1):
    """
    for each state, with probability p replace action with random action
    Tip: mutation can be written as crossover with random policy
    """  
    return crossover(np.random.randint(0, n_actions, len(policy)), policy)

In [15]:
np.random.seed(1234)
policies = [crossover(get_random_policy(), get_random_policy()) 
            for i in range(10 ** 4)]

assert all([len(p) == n_states for p in policies]), 'policy length should always be n_states'
assert np.min(policies) == 0, 'minimal action id should be 0'
assert np.max(policies) == n_actions-1, 'maximal action id should be n_actions-1'

assert any([np.mean(crossover(np.zeros(n_states), np.ones(n_states))) not in (0, 1)
               for _ in range(100)]), "Make sure your crossover changes each action independently"
print("Seems fine!")

Seems fine!


In [16]:
n_epochs = 200 #how many cycles to make
pool_size = 100 #how many policies to maintain
n_crossovers = 100 #how many crossovers to make on each step
n_mutations = 100 #how many mutations to make on each tick

In [17]:
print("initializing...")
pool = [get_random_policy() for _ in range(pool_size)]
pool_scores = [evaluate(taxiEnv, policy) for policy in pool]

initializing...


In [18]:
assert type(pool) == type(pool_scores) == list
assert len(pool) == len(pool_scores) == pool_size
assert all([type(score) in (float, int) for score in pool_scores])

In [19]:
from tqdm import tqdm

In [20]:
#main loop
for epoch in tqdm(range(n_epochs)):
    print("Epoch %s:"%epoch)
    
    crossovered = []
    for _ in range(n_crossovers):
        policy1 = pool[random.randint(0, len(pool) - 1)]
        policy2 = pool[random.randint(0, len(pool) - 1)]
        crossovered.append(crossover(policy1, policy2))
    mutated = []
    for i in range(n_mutations):
        mutated.append(mutation(pool[random.randint(0, len(pool) - 1)]))
    
    assert type(crossovered) == type(mutated) == list
    
    #add new policies to the pool
    pool = pool + crossovered + mutated
    pool_scores = [evaluate(taxiEnv, policy) for policy in pool]
    
    #select pool_size best policies
    selected_indices = np.argsort(pool_scores)[-pool_size:]
    pool = [pool[i] for i in selected_indices]
    pool_scores = [pool_scores[i] for i in selected_indices]

    #print the best policy so far (last in ascending score order)
    print("best score:", pool_scores[-1])
#     print_policy(pool[-1])

  0%|          | 0/200 [00:00<?, ?it/s]

Epoch 0:


  0%|          | 1/200 [00:14<48:44, 14.70s/it]

best score: 1.0
Epoch 1:


  1%|          | 2/200 [00:28<46:22, 14.05s/it]

best score: 2.0
Epoch 2:


  2%|▏         | 3/200 [00:41<45:18, 13.80s/it]

best score: 1.5
Epoch 3:


  2%|▏         | 4/200 [00:54<44:47, 13.71s/it]

best score: 1.5
Epoch 4:


  2%|▎         | 5/200 [01:08<44:24, 13.67s/it]

best score: 1.5
Epoch 5:


  3%|▎         | 6/200 [01:21<44:10, 13.66s/it]

best score: 1.0
Epoch 6:


  4%|▎         | 7/200 [01:35<43:50, 13.63s/it]

best score: 2.0
Epoch 7:


  4%|▍         | 8/200 [01:48<43:33, 13.61s/it]

best score: 2.0
Epoch 8:


  4%|▍         | 9/200 [02:02<43:28, 13.65s/it]

best score: 201.0
Epoch 9:


  5%|▌         | 10/200 [02:16<43:10, 13.63s/it]

best score: 1.5
Epoch 10:


  6%|▌         | 11/200 [02:29<42:56, 13.63s/it]

best score: 2.0
Epoch 11:


  6%|▌         | 12/200 [02:43<42:40, 13.62s/it]

best score: 2.0
Epoch 12:


  6%|▋         | 13/200 [02:56<42:25, 13.61s/it]

best score: 2.5
Epoch 13:


  7%|▋         | 14/200 [03:10<42:09, 13.60s/it]

best score: 2.5
Epoch 14:


  8%|▊         | 15/200 [03:23<41:48, 13.56s/it]

best score: 2.5
Epoch 15:


  8%|▊         | 16/200 [03:36<41:30, 13.53s/it]

best score: 2.0
Epoch 16:


  8%|▊         | 17/200 [03:49<41:11, 13.51s/it]

best score: 2.0
Epoch 17:


  9%|▉         | 18/200 [04:02<40:55, 13.49s/it]

best score: 2.0
Epoch 18:


 10%|▉         | 19/200 [04:15<40:38, 13.47s/it]

best score: 2.0
Epoch 19:


 10%|█         | 20/200 [04:27<40:07, 13.38s/it]

best score: 1.5
Epoch 20:


 10%|█         | 21/200 [04:39<39:43, 13.31s/it]

best score: 2.5
Epoch 21:


 11%|█         | 22/200 [04:51<39:18, 13.25s/it]

best score: 2.5
Epoch 22:


 12%|█▏        | 23/200 [05:03<38:53, 13.19s/it]

best score: 2.5
Epoch 23:


 12%|█▏        | 24/200 [05:15<38:33, 13.15s/it]

best score: 3.0
Epoch 24:


 12%|█▎        | 25/200 [05:27<38:09, 13.08s/it]

best score: 2.5
Epoch 25:


 13%|█▎        | 26/200 [05:38<37:47, 13.03s/it]

best score: 2.5
Epoch 26:


 14%|█▎        | 27/200 [05:50<37:24, 12.97s/it]

best score: 2.0
Epoch 27:


 14%|█▍        | 28/200 [06:01<37:02, 12.92s/it]

best score: 3.0
Epoch 28:


 14%|█▍        | 29/200 [06:13<36:40, 12.87s/it]

best score: 2.0
Epoch 29:


 15%|█▌        | 30/200 [06:24<36:19, 12.82s/it]

best score: 2.0
Epoch 30:


 16%|█▌        | 31/200 [06:36<36:01, 12.79s/it]

best score: 3.0
Epoch 31:


 16%|█▌        | 32/200 [06:47<35:41, 12.75s/it]

best score: 2.0
Epoch 32:


 16%|█▋        | 33/200 [07:00<35:26, 12.73s/it]

best score: 2.0
Epoch 33:


 17%|█▋        | 34/200 [07:13<35:15, 12.74s/it]

best score: 2.5
Epoch 34:


 18%|█▊        | 35/200 [07:26<35:03, 12.75s/it]

best score: 2.0
Epoch 35:


 18%|█▊        | 36/200 [07:38<34:50, 12.75s/it]

best score: 2.0
Epoch 36:


 18%|█▊        | 37/200 [07:51<34:37, 12.75s/it]

best score: 2.5
Epoch 37:


 19%|█▉        | 38/200 [08:04<34:25, 12.75s/it]

best score: 2.5
Epoch 38:


 20%|█▉        | 39/200 [08:17<34:12, 12.75s/it]

best score: 2.5
Epoch 39:


 20%|██        | 40/200 [08:29<33:59, 12.74s/it]

best score: 3.0
Epoch 40:


 20%|██        | 41/200 [08:42<33:45, 12.74s/it]

best score: 2.5
Epoch 41:


 21%|██        | 42/200 [08:54<33:31, 12.73s/it]

best score: 3.5
Epoch 42:


 22%|██▏       | 43/200 [09:07<33:17, 12.73s/it]

best score: 3.0
Epoch 43:


 22%|██▏       | 44/200 [09:19<33:04, 12.72s/it]

best score: 3.0
Epoch 44:


 22%|██▎       | 45/200 [09:32<32:50, 12.71s/it]

best score: 3.0
Epoch 45:


 23%|██▎       | 46/200 [09:44<32:37, 12.71s/it]

best score: 3.0
Epoch 46:


 24%|██▎       | 47/200 [09:57<32:25, 12.72s/it]

best score: 3.0
Epoch 47:


 24%|██▍       | 48/200 [10:10<32:12, 12.72s/it]

best score: 4.0
Epoch 48:


 24%|██▍       | 49/200 [10:23<32:00, 12.72s/it]

best score: 3.0
Epoch 49:


 25%|██▌       | 50/200 [10:35<31:47, 12.72s/it]

best score: 3.5
Epoch 50:


 26%|██▌       | 51/200 [10:48<31:34, 12.72s/it]

best score: 3.0
Epoch 51:


 26%|██▌       | 52/200 [11:01<31:21, 12.72s/it]

best score: 4.0
Epoch 52:


 26%|██▋       | 53/200 [11:13<31:09, 12.72s/it]

best score: 2.5
Epoch 53:


 27%|██▋       | 54/200 [11:26<30:56, 12.72s/it]

best score: 2.5
Epoch 54:


 28%|██▊       | 55/200 [11:39<30:44, 12.72s/it]

best score: 3.5
Epoch 55:


 28%|██▊       | 56/200 [11:52<30:32, 12.72s/it]

best score: 2.5
Epoch 56:


 28%|██▊       | 57/200 [12:04<30:17, 12.71s/it]

best score: 4.0
Epoch 57:


 29%|██▉       | 58/200 [12:16<30:02, 12.69s/it]

best score: 3.0
Epoch 58:


 30%|██▉       | 59/200 [12:28<29:49, 12.69s/it]

best score: 4.5
Epoch 59:


 30%|███       | 60/200 [12:41<29:36, 12.69s/it]

best score: 4.5
Epoch 60:


 30%|███       | 61/200 [12:53<29:21, 12.67s/it]

best score: 3.0
Epoch 61:


 31%|███       | 62/200 [13:04<29:05, 12.65s/it]

best score: 3.5
Epoch 62:


 32%|███▏      | 63/200 [13:16<28:51, 12.64s/it]

best score: 3.5
Epoch 63:


 32%|███▏      | 64/200 [13:28<28:37, 12.63s/it]

best score: 4.5
Epoch 64:


 32%|███▎      | 65/200 [13:39<28:22, 12.61s/it]

best score: 3.0
Epoch 65:


 33%|███▎      | 66/200 [13:52<28:09, 12.61s/it]

best score: 4.5
Epoch 66:


 34%|███▎      | 67/200 [14:04<27:56, 12.61s/it]

best score: 4.0
Epoch 67:


 34%|███▍      | 68/200 [14:16<27:41, 12.59s/it]

best score: 3.5
Epoch 68:


 34%|███▍      | 69/200 [14:27<27:27, 12.57s/it]

best score: 4.0
Epoch 69:


 35%|███▌      | 70/200 [14:39<27:12, 12.56s/it]

best score: 3.5
Epoch 70:


 36%|███▌      | 71/200 [14:51<26:58, 12.55s/it]

best score: 4.5
Epoch 71:


 36%|███▌      | 72/200 [15:02<26:44, 12.54s/it]

best score: 3.5
Epoch 72:


 36%|███▋      | 73/200 [15:14<26:30, 12.52s/it]

best score: 4.0
Epoch 73:


 37%|███▋      | 74/200 [15:26<26:16, 12.51s/it]

best score: 3.5
Epoch 74:


 38%|███▊      | 75/200 [15:38<26:03, 12.51s/it]

best score: 5.0
Epoch 75:


 38%|███▊      | 76/200 [15:49<25:49, 12.50s/it]

best score: 3.5
Epoch 76:


 38%|███▊      | 77/200 [16:01<25:35, 12.48s/it]

best score: 3.5
Epoch 77:


 39%|███▉      | 78/200 [16:13<25:22, 12.48s/it]

best score: 3.5
Epoch 78:


 40%|███▉      | 79/200 [16:24<25:08, 12.46s/it]

best score: 4.5
Epoch 79:


 40%|████      | 80/200 [16:36<24:54, 12.45s/it]

best score: 4.0
Epoch 80:


 40%|████      | 81/200 [16:47<24:40, 12.44s/it]

best score: 5.0
Epoch 81:


 41%|████      | 82/200 [16:59<24:27, 12.43s/it]

best score: 4.5
Epoch 82:


 42%|████▏     | 83/200 [17:11<24:13, 12.42s/it]

best score: 5.0
Epoch 83:


 42%|████▏     | 84/200 [17:22<24:00, 12.42s/it]

best score: 4.0
Epoch 84:


 42%|████▎     | 85/200 [17:34<23:46, 12.41s/it]

best score: 4.0
Epoch 85:


 43%|████▎     | 86/200 [17:46<23:33, 12.40s/it]

best score: 4.0
Epoch 86:


 44%|████▎     | 87/200 [17:58<23:20, 12.39s/it]

best score: 4.5
Epoch 87:


 44%|████▍     | 88/200 [18:10<23:07, 12.39s/it]

best score: 5.0
Epoch 88:


 44%|████▍     | 89/200 [18:22<22:54, 12.38s/it]

best score: 4.5
Epoch 89:


 45%|████▌     | 90/200 [18:33<22:41, 12.38s/it]

best score: 4.5
Epoch 90:


 46%|████▌     | 91/200 [18:46<22:28, 12.37s/it]

best score: 5.0
Epoch 91:


 46%|████▌     | 92/200 [18:57<22:15, 12.37s/it]

best score: 5.0
Epoch 92:


 46%|████▋     | 93/200 [19:09<22:02, 12.36s/it]

best score: 4.5
Epoch 93:


 47%|████▋     | 94/200 [19:22<21:51, 12.37s/it]

best score: 4.5
Epoch 94:


 48%|████▊     | 95/200 [19:35<21:39, 12.37s/it]

best score: 4.5
Epoch 95:


 48%|████▊     | 96/200 [19:48<21:27, 12.38s/it]

best score: 4.5
Epoch 96:


 48%|████▊     | 97/200 [20:00<21:15, 12.38s/it]

best score: 4.5
Epoch 97:


 49%|████▉     | 98/200 [20:13<21:02, 12.38s/it]

best score: 4.5
Epoch 98:


 50%|████▉     | 99/200 [20:25<20:50, 12.38s/it]

best score: 5.0
Epoch 99:


 50%|█████     | 100/200 [20:38<20:38, 12.38s/it]

best score: 4.5
Epoch 100:


 50%|█████     | 101/200 [20:50<20:26, 12.38s/it]

best score: 5.0
Epoch 101:


 51%|█████     | 102/200 [21:03<20:13, 12.38s/it]

best score: 5.0
Epoch 102:


 52%|█████▏    | 103/200 [21:15<20:01, 12.38s/it]

best score: 5.0
Epoch 103:


 52%|█████▏    | 104/200 [21:27<19:48, 12.38s/it]

best score: 4.5
Epoch 104:


 52%|█████▎    | 105/200 [21:39<19:36, 12.38s/it]

best score: 6.0
Epoch 105:


 53%|█████▎    | 106/200 [21:52<19:23, 12.38s/it]

best score: 4.5
Epoch 106:


 54%|█████▎    | 107/200 [22:05<19:12, 12.39s/it]

best score: 5.0
Epoch 107:


 54%|█████▍    | 108/200 [22:17<18:59, 12.39s/it]

best score: 5.0
Epoch 108:


 55%|█████▍    | 109/200 [22:28<18:45, 12.37s/it]

best score: 5.0
Epoch 109:


 55%|█████▌    | 110/200 [22:40<18:32, 12.36s/it]

best score: 5.0
Epoch 110:


 56%|█████▌    | 111/200 [22:53<18:21, 12.38s/it]

best score: 5.0
Epoch 111:


 56%|█████▌    | 112/200 [23:06<18:09, 12.38s/it]

best score: 5.5
Epoch 112:


 56%|█████▋    | 113/200 [23:17<17:56, 12.37s/it]

best score: 5.5
Epoch 113:


 57%|█████▋    | 114/200 [23:29<17:43, 12.36s/it]

best score: 6.0
Epoch 114:


 57%|█████▊    | 115/200 [23:41<17:30, 12.36s/it]

best score: 5.0
Epoch 115:


 58%|█████▊    | 116/200 [23:52<17:17, 12.35s/it]

best score: 6.0
Epoch 116:


 58%|█████▊    | 117/200 [24:04<17:04, 12.34s/it]

best score: 5.5
Epoch 117:


 59%|█████▉    | 118/200 [24:15<16:51, 12.34s/it]

best score: 5.0
Epoch 118:


 60%|█████▉    | 119/200 [24:27<16:38, 12.33s/it]

best score: 5.0
Epoch 119:


 60%|██████    | 120/200 [24:38<16:25, 12.32s/it]

best score: 5.5
Epoch 120:


 60%|██████    | 121/200 [24:50<16:13, 12.32s/it]

best score: 4.5
Epoch 121:


 61%|██████    | 122/200 [25:02<16:00, 12.32s/it]

best score: 5.5
Epoch 122:


 62%|██████▏   | 123/200 [25:13<15:47, 12.31s/it]

best score: 5.5
Epoch 123:


 62%|██████▏   | 124/200 [25:25<15:35, 12.30s/it]

best score: 6.0
Epoch 124:


 62%|██████▎   | 125/200 [25:36<15:22, 12.30s/it]

best score: 5.5
Epoch 125:


 63%|██████▎   | 126/200 [25:48<15:09, 12.29s/it]

best score: 6.0
Epoch 126:


 64%|██████▎   | 127/200 [26:00<14:56, 12.29s/it]

best score: 5.5
Epoch 127:


 64%|██████▍   | 128/200 [26:11<14:44, 12.28s/it]

best score: 6.0
Epoch 128:


 64%|██████▍   | 129/200 [26:23<14:31, 12.27s/it]

best score: 5.5
Epoch 129:


 65%|██████▌   | 130/200 [26:34<14:18, 12.27s/it]

best score: 6.0
Epoch 130:


 66%|██████▌   | 131/200 [26:46<14:06, 12.26s/it]

best score: 6.0
Epoch 131:


 66%|██████▌   | 132/200 [26:57<13:53, 12.26s/it]

best score: 6.0
Epoch 132:


 66%|██████▋   | 133/200 [27:09<13:41, 12.26s/it]

best score: 5.5
Epoch 133:


 67%|██████▋   | 134/200 [27:21<13:28, 12.25s/it]

best score: 5.5
Epoch 134:


 68%|██████▊   | 135/200 [27:33<13:15, 12.25s/it]

best score: 6.0
Epoch 135:


 68%|██████▊   | 136/200 [27:44<13:03, 12.24s/it]

best score: 6.0
Epoch 136:


 68%|██████▊   | 137/200 [27:56<12:50, 12.24s/it]

best score: 5.5
Epoch 137:


 69%|██████▉   | 138/200 [28:07<12:38, 12.23s/it]

best score: 5.5
Epoch 138:


 70%|██████▉   | 139/200 [28:19<12:25, 12.23s/it]

best score: 5.5
Epoch 139:


 70%|███████   | 140/200 [28:32<12:13, 12.23s/it]

best score: 5.5
Epoch 140:


 70%|███████   | 141/200 [28:44<12:01, 12.23s/it]

best score: 5.5
Epoch 141:


 71%|███████   | 142/200 [28:57<11:49, 12.24s/it]

best score: 5.5
Epoch 142:


 72%|███████▏  | 143/200 [29:10<11:37, 12.24s/it]

best score: 100.5
Epoch 143:


 72%|███████▏  | 144/200 [29:22<11:25, 12.24s/it]

best score: 101.0
Epoch 144:


 72%|███████▎  | 145/200 [29:33<11:12, 12.23s/it]

best score: 102.0
Epoch 145:


 73%|███████▎  | 146/200 [29:45<11:00, 12.23s/it]

best score: 103.5
Epoch 146:


 74%|███████▎  | 147/200 [29:57<10:47, 12.22s/it]

best score: 102.0
Epoch 147:


 74%|███████▍  | 148/200 [30:09<10:35, 12.22s/it]

best score: 6.0
Epoch 148:


 74%|███████▍  | 149/200 [30:22<10:23, 12.23s/it]

best score: 5.5
Epoch 149:


 75%|███████▌  | 150/200 [30:34<10:11, 12.23s/it]

best score: 5.5
Epoch 150:


 76%|███████▌  | 151/200 [30:45<09:58, 12.22s/it]

best score: 6.0
Epoch 151:


 76%|███████▌  | 152/200 [30:57<09:46, 12.22s/it]

best score: 6.0
Epoch 152:


 76%|███████▋  | 153/200 [31:08<09:33, 12.21s/it]

best score: 6.0
Epoch 153:


 77%|███████▋  | 154/200 [31:19<09:21, 12.21s/it]

best score: 6.0
Epoch 154:


 78%|███████▊  | 155/200 [31:31<09:09, 12.20s/it]

best score: 6.0
Epoch 155:


 78%|███████▊  | 156/200 [31:43<08:56, 12.20s/it]

best score: 7.0
Epoch 156:


 78%|███████▊  | 157/200 [31:55<08:44, 12.20s/it]

best score: 5.5
Epoch 157:


 79%|███████▉  | 158/200 [32:06<08:32, 12.20s/it]

best score: 6.5
Epoch 158:


 80%|███████▉  | 159/200 [32:18<08:19, 12.19s/it]

best score: 6.0
Epoch 159:


 80%|████████  | 160/200 [32:29<08:07, 12.19s/it]

best score: 7.0
Epoch 160:


 80%|████████  | 161/200 [32:41<07:55, 12.18s/it]

best score: 6.0
Epoch 161:


 81%|████████  | 162/200 [32:53<07:42, 12.18s/it]

best score: 6.0
Epoch 162:


 82%|████████▏ | 163/200 [33:04<07:30, 12.17s/it]

best score: 7.5
Epoch 163:


 82%|████████▏ | 164/200 [33:15<07:18, 12.17s/it]

best score: 6.0
Epoch 164:


 82%|████████▎ | 165/200 [33:26<07:05, 12.16s/it]

best score: 7.5
Epoch 165:


 83%|████████▎ | 166/200 [33:38<06:53, 12.16s/it]

best score: 5.5
Epoch 166:


 84%|████████▎ | 167/200 [33:50<06:41, 12.16s/it]

best score: 6.5
Epoch 167:


 84%|████████▍ | 168/200 [34:02<06:29, 12.16s/it]

best score: 5.5
Epoch 168:


 84%|████████▍ | 169/200 [34:13<06:16, 12.15s/it]

best score: 7.0
Epoch 169:


 85%|████████▌ | 170/200 [34:25<06:04, 12.15s/it]

best score: 6.0
Epoch 170:


 86%|████████▌ | 171/200 [34:37<05:52, 12.15s/it]

best score: 6.5
Epoch 171:


 86%|████████▌ | 172/200 [34:48<05:40, 12.14s/it]

best score: 6.0
Epoch 172:


 86%|████████▋ | 173/200 [35:01<05:27, 12.15s/it]

best score: 6.0
Epoch 173:


 87%|████████▋ | 174/200 [35:12<05:15, 12.14s/it]

best score: 6.0
Epoch 174:


 88%|████████▊ | 175/200 [35:24<05:03, 12.14s/it]

best score: 6.5
Epoch 175:


 88%|████████▊ | 176/200 [35:36<04:51, 12.14s/it]

best score: 6.5
Epoch 176:


 88%|████████▊ | 177/200 [35:47<04:39, 12.13s/it]

best score: 7.5
Epoch 177:


 89%|████████▉ | 178/200 [35:59<04:26, 12.13s/it]

best score: 6.0
Epoch 178:


 90%|████████▉ | 179/200 [36:10<04:14, 12.13s/it]

best score: 6.5
Epoch 179:


 90%|█████████ | 180/200 [36:22<04:02, 12.12s/it]

best score: 7.0
Epoch 180:


 90%|█████████ | 181/200 [36:33<03:50, 12.12s/it]

best score: 6.0
Epoch 181:


 91%|█████████ | 182/200 [36:45<03:38, 12.12s/it]

best score: 6.5
Epoch 182:


 92%|█████████▏| 183/200 [36:57<03:25, 12.12s/it]

best score: 6.5
Epoch 183:


 92%|█████████▏| 184/200 [37:09<03:13, 12.12s/it]

best score: 6.0
Epoch 184:


 92%|█████████▎| 185/200 [37:21<03:01, 12.12s/it]

best score: 6.0
Epoch 185:


 93%|█████████▎| 186/200 [37:33<02:49, 12.11s/it]

best score: 6.0
Epoch 186:


 94%|█████████▎| 187/200 [37:45<02:37, 12.11s/it]

best score: 7.5
Epoch 187:


 94%|█████████▍| 188/200 [37:56<02:25, 12.11s/it]

best score: 7.0
Epoch 188:


 94%|█████████▍| 189/200 [38:08<02:13, 12.11s/it]

best score: 6.5
Epoch 189:


 95%|█████████▌| 190/200 [38:21<02:01, 12.11s/it]

best score: 6.0
Epoch 190:


 96%|█████████▌| 191/200 [38:33<01:49, 12.11s/it]

best score: 6.0
Epoch 191:


 96%|█████████▌| 192/200 [38:44<01:36, 12.11s/it]

best score: 6.5
Epoch 192:


 96%|█████████▋| 193/200 [38:56<01:24, 12.11s/it]

best score: 6.5
Epoch 193:


 97%|█████████▋| 194/200 [39:09<01:12, 12.11s/it]

best score: 7.0
Epoch 194:


 98%|█████████▊| 195/200 [39:21<01:00, 12.11s/it]

best score: 6.5
Epoch 195:


 98%|█████████▊| 196/200 [39:33<00:48, 12.11s/it]

best score: 6.5
Epoch 196:


 98%|█████████▊| 197/200 [39:44<00:36, 12.11s/it]

best score: 6.5
Epoch 197:


 99%|█████████▉| 198/200 [39:56<00:24, 12.10s/it]

best score: 6.5
Epoch 198:


100%|█████████▉| 199/200 [40:08<00:12, 12.10s/it]

best score: 6.5
Epoch 199:


100%|██████████| 200/200 [40:20<00:00, 12.10s/it]

best score: 6.0



