In [1]:
import gym
import numpy as np
import torch
import torch.nn as nn
import random


torch.cuda.is_available()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
env = gym.make('CartPole-v0').unwrapped


[33mWARN: gym.spaces.Box autodetected dtype as <class 'numpy.float32'>. Please provide explicit dtype.[0m


In [2]:
input_size = 4
num_classes = 2
hidden_size = 20
learning_rate = 0.001
gamma = 0.9

hidden_size = 100


class NN(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(NN, self).__init__()        
        self.fc1 = nn.Linear(input_size, hidden_size) 
        self.relu1 = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, hidden_size)
        self.relu2 = nn.ReLU()
        self.out = nn.Linear(hidden_size, num_classes)
    def forward(self, x):
        out = self.fc1(x)
        out = self.relu1(out)
        out = self.fc2(out)
        out = self.relu2(out)
        out = self.out(out)
        return out
    
class DQNAgent:
    def __init__(self, capacity = 20000, input_size = 4, \
                 hidden_size = 10, num_classes = 2, learning_rate = 0.001, \
                epsilon = 0.9, epsilon_decay_rate = 0.9999, batch_size = 32, \
                gamma = 0.95):
        self.replay = deque(maxlen=capacity)
        self.epsilon = epsilon
        self.batch_size = batch_size
        self.gamma = gamma
        self.epsilon_decay_rate = epsilon_decay_rate
        
        # Model initialization
        self.q_network = NN(input_size, hidden_size, num_classes)
        self.criterion = nn.MSELoss()
        self.optimizer = torch.optim.Adam(self.q_network.parameters(), lr = learning_rate)
    
    def act(self, state):
        if random.random()<self.epsilon:
            return random.choice([0,1])
        state = torch.tensor(state, dtype = torch.float32)
        return np.argmax(self.q_network(state).detach().numpy())
    
    def get_q(self, state):
        state = torch.tensor(state, dtype = torch.float32)
        return self.q_network(state).detach().numpy()
    
    def push(self, state, action, reward, next_state, done):
        self.replay.append((state, action, reward, next_state, done))
    
    def train(self, freeze = True):
        if self.batch_size >= len(self.replay):
            return -1
        minibatch = random.sample(self.replay, self.batch_size)  
        minibatch = np.array(minibatch).T
        
        
        
        targets = minibatch[2]
        done = np.logical_not(minibatch[4])

        next_states = np.stack(minibatch[3])
        q_updates = np.max(self.get_q(next_states), axis = 1)

        targets [np.nonzero(done)]+= q_updates[np.nonzero(done)]
        
        states = np.stack(minibatch[0])
        states = torch.tensor(states, dtype=torch.float32, requires_grad = True)
        outputs = self.q_network(states)
        
        final_targets = copy.deepcopy(outputs.detach().numpy())
        actions = list(minibatch[1])

        
        final_targets[range(self.batch_size), actions] = targets
        
        


        loss = self.criterion(outputs, torch.tensor(final_targets, dtype = torch.float32))
        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()
        self.epsilon*=self.epsilon_decay_rate

In [3]:
# Architecture 1
import copy
from collections import deque

num_episodes = 10000
batch_size = 64

agent = DQNAgent()


for i in range(num_episodes):
    state = env.reset()
    done = False
    t = 0
    while not done:
        t+=1
        action = agent.act(state)
        next_state, reward, done, _ = env.step(action)
        reward = reward if not done else -10 # reward shaping
        agent.push(state, action, reward, next_state, done)
        state = next_state
        debug = agent.train()
        if done:
            print("episode: {}/{}, score: {}, eps: {}" .format(i+1, num_episodes, t, agent.epsilon))
        

episode: 1/10000, score: 14, eps: 0.9
episode: 2/10000, score: 18, eps: 0.9
episode: 3/10000, score: 43, eps: 0.8961381159041989
episode: 4/10000, score: 23, eps: 0.8940792638807852
episode: 5/10000, score: 11, eps: 0.8930962682866179
episode: 6/10000, score: 15, eps: 0.8917575612290328
episode: 7/10000, score: 15, eps: 0.8904208608270006
episode: 8/10000, score: 28, eps: 0.8879310452923425
episode: 9/10000, score: 18, eps: 0.8863341272210353
episode: 10/10000, score: 13, eps: 0.8851825839428392
episode: 11/10000, score: 27, eps: 0.882795695369457
episode: 12/10000, score: 20, eps: 0.8810317802845797
episode: 13/10000, score: 27, eps: 0.878656084323888
episode: 14/10000, score: 22, eps: 0.8767250692814424
episode: 15/10000, score: 23, eps: 0.874710818184617
episode: 16/10000, score: 14, eps: 0.873487018707696
episode: 17/10000, score: 12, eps: 0.8724394105945552
episode: 18/10000, score: 13, eps: 0.8713059196140674
episode: 19/10000, score: 21, eps: 0.8694780057669936
episode: 20/10000

episode: 155/10000, score: 78, eps: 0.5281963642454592
episode: 156/10000, score: 73, eps: 0.5243543789926307
episode: 157/10000, score: 43, eps: 0.5221043836184135
episode: 158/10000, score: 72, eps: 0.5183585459597416
episode: 159/10000, score: 97, eps: 0.5133545265903174
episode: 160/10000, score: 30, eps: 0.511816694019924
episode: 161/10000, score: 154, eps: 0.5039947097011402
episode: 162/10000, score: 102, eps: 0.4988798381075517
episode: 163/10000, score: 161, eps: 0.4909117892228175
episode: 164/10000, score: 203, eps: 0.48104626053817845
episode: 165/10000, score: 122, eps: 0.4752128605815319
episode: 166/10000, score: 54, eps: 0.47265349965826053
episode: 167/10000, score: 60, eps: 0.46982592847607496
episode: 168/10000, score: 40, eps: 0.4679502847668233
episode: 169/10000, score: 139, eps: 0.46149045265827654
episode: 170/10000, score: 135, eps: 0.4553018889125556
episode: 171/10000, score: 181, eps: 0.44713465282351755
episode: 172/10000, score: 93, eps: 0.442995371080151

episode: 303/10000, score: 10, eps: 0.012000866901052199
episode: 304/10000, score: 8, eps: 0.011991269567102126
episode: 305/10000, score: 9, eps: 0.011980481740341664
episode: 306/10000, score: 10, eps: 0.011968506648380702
episode: 307/10000, score: 9, eps: 0.011957739300054351
episode: 308/10000, score: 10, eps: 0.011945786940302305
episode: 309/10000, score: 9, eps: 0.011935040031536037
episode: 310/10000, score: 10, eps: 0.011923110360840563
episode: 311/10000, score: 10, eps: 0.011911192614448866
episode: 312/10000, score: 10, eps: 0.011899286780442
episode: 313/10000, score: 10, eps: 0.011887392846912944
episode: 314/10000, score: 10, eps: 0.011875510801966575
episode: 315/10000, score: 11, eps: 0.011862454269656285
episode: 316/10000, score: 9, eps: 0.011851782330300837
episode: 317/10000, score: 10, eps: 0.011839935879850619
episode: 318/10000, score: 11, eps: 0.011826918460394317
episode: 319/10000, score: 10, eps: 0.011815096862628249
episode: 320/10000, score: 10, eps: 0.0

episode: 447/10000, score: 146, eps: 0.00031917923679502115
episode: 448/10000, score: 48, eps: 0.0003176507712858773
episode: 449/10000, score: 41, eps: 0.0003163510044769871
episode: 450/10000, score: 93, eps: 0.0003134224326719203
episode: 451/10000, score: 113, eps: 0.000309900519372158
episode: 452/10000, score: 106, eps: 0.0003066327601983436
episode: 453/10000, score: 35, eps: 0.00030556136799726594
episode: 454/10000, score: 16, eps: 0.0003050728363110532
episode: 455/10000, score: 12, eps: 0.00030470695018845105
episode: 456/10000, score: 11, eps: 0.0003043719400817998
episode: 457/10000, score: 11, eps: 0.00030403729830206556
episode: 458/10000, score: 13, eps: 0.0003036422868764327
episode: 459/10000, score: 12, eps: 0.00030327811646930407
episode: 460/10000, score: 13, eps: 0.00030288409138810895
episode: 461/10000, score: 14, eps: 0.0003024603291744693
episode: 462/10000, score: 10, eps: 0.0003021580049161542
episode: 463/10000, score: 13, eps: 0.0003017654351066116
episod

episode: 588/10000, score: 37, eps: 2.9012678173023594e-05
episode: 589/10000, score: 44, eps: 2.8885296465147447e-05
episode: 590/10000, score: 78, eps: 2.8660856384808543e-05
episode: 591/10000, score: 72, eps: 2.8455229083938266e-05
episode: 592/10000, score: 4416, eps: 1.8296501400970204e-05
episode: 593/10000, score: 132, eps: 1.8056562665072122e-05
episode: 594/10000, score: 17, eps: 1.802589105319256e-05
episode: 595/10000, score: 15, eps: 1.7998871135599057e-05
episode: 596/10000, score: 13, eps: 1.7975486637095875e-05
episode: 597/10000, score: 15, eps: 1.794854227322481e-05
episode: 598/10000, score: 15, eps: 1.792163829762023e-05
episode: 599/10000, score: 13, eps: 1.78983541415869e-05
episode: 600/10000, score: 12, eps: 1.7876887925593982e-05
episode: 601/10000, score: 10, eps: 1.7859019080123115e-05
episode: 602/10000, score: 11, eps: 1.7839383978649325e-05
episode: 603/10000, score: 12, eps: 1.7817988487944586e-05
episode: 604/10000, score: 11, eps: 1.7798398497562136e-05

episode: 728/10000, score: 167, eps: 5.983788989315565e-06
episode: 729/10000, score: 224, eps: 5.851235628545676e-06
episode: 730/10000, score: 175, eps: 5.749724740446775e-06
episode: 731/10000, score: 172, eps: 5.651670258131969e-06
episode: 732/10000, score: 154, eps: 5.565296998622141e-06
episode: 733/10000, score: 191, eps: 5.460003316992465e-06
episode: 734/10000, score: 170, eps: 5.36796321611193e-06
episode: 735/10000, score: 166, eps: 5.279586166831196e-06
episode: 736/10000, score: 152, eps: 5.199939324228623e-06
episode: 737/10000, score: 169, eps: 5.112794440811929e-06
episode: 738/10000, score: 163, eps: 5.030127325394929e-06
episode: 739/10000, score: 184, eps: 4.938414740317211e-06
episode: 740/10000, score: 167, eps: 4.856623978381508e-06
episode: 741/10000, score: 193, eps: 4.763785269048099e-06
episode: 742/10000, score: 168, eps: 4.684418257802522e-06
episode: 743/10000, score: 210, eps: 4.587066379272323e-06
episode: 744/10000, score: 165, eps: 4.511997055618458e-0

episode: 867/10000, score: 3929, eps: 1.948905336686946e-07
episode: 868/10000, score: 105, eps: 1.928547876471066e-07
episode: 869/10000, score: 99, eps: 1.9095485045852873e-07
episode: 870/10000, score: 70, eps: 1.8962276762957471e-07
episode: 871/10000, score: 64, eps: 1.8841299682333156e-07
episode: 872/10000, score: 57, eps: 1.8734204430733832e-07
episode: 873/10000, score: 54, eps: 1.8633307349181963e-07
episode: 874/10000, score: 47, eps: 1.8545931928886252e-07
episode: 875/10000, score: 40, eps: 1.8471892676375325e-07
episode: 876/10000, score: 34, eps: 1.8409191758143372e-07
episode: 877/10000, score: 36, eps: 1.8343034514388885e-07
episode: 878/10000, score: 30, eps: 1.8288085128623384e-07
episode: 879/10000, score: 25, eps: 1.8242419738017744e-07
episode: 880/10000, score: 24, eps: 1.8198688242821703e-07
episode: 881/10000, score: 24, eps: 1.8155061582603668e-07
episode: 882/10000, score: 22, eps: 1.8115162357368683e-07
episode: 883/10000, score: 21, eps: 1.8077158534176832e

episode: 1144/10000, score: 287, eps: 1.6173166863209314e-10
episode: 1145/10000, score: 312, eps: 1.5676330171965773e-10
episode: 1146/10000, score: 259, eps: 1.5275506255955186e-10
episode: 1147/10000, score: 244, eps: 1.4907276168420885e-10
episode: 1148/10000, score: 200, eps: 1.4612077709844353e-10
episode: 1149/10000, score: 209, eps: 1.4309839564360161e-10
episode: 1150/10000, score: 216, eps: 1.400404619795217e-10
episode: 1151/10000, score: 239, eps: 1.3673301104536568e-10
episode: 1152/10000, score: 247, eps: 1.3339690933771646e-10
episode: 1153/10000, score: 264, eps: 1.2992113918627515e-10
episode: 1154/10000, score: 349, eps: 1.2546488261646484e-10
episode: 1155/10000, score: 289, eps: 1.2189066501680533e-10
episode: 1156/10000, score: 304, eps: 1.182407658699529e-10
episode: 1157/10000, score: 336, eps: 1.1433368728911182e-10
episode: 1158/10000, score: 346, eps: 1.1044520594831388e-10
episode: 1159/10000, score: 391, eps: 1.0620992594372764e-10
episode: 1160/10000, score

episode: 1297/10000, score: 10, eps: 6.581406459355253e-13
episode: 1298/10000, score: 11, eps: 6.574170530937801e-13
episode: 1299/10000, score: 9, eps: 6.568256143609201e-13
episode: 1300/10000, score: 10, eps: 6.561690842392805e-13
episode: 1301/10000, score: 10, eps: 6.555132103524027e-13
episode: 1302/10000, score: 13, eps: 6.546615542918188e-13
episode: 1303/10000, score: 20, eps: 6.533534742941912e-13
episode: 1304/10000, score: 17, eps: 6.522436615044915e-13
episode: 1305/10000, score: 163, eps: 6.416977452409318e-13
episode: 1306/10000, score: 476, eps: 6.118670441954609e-13
episode: 1307/10000, score: 160, eps: 6.021545926782445e-13
episode: 1308/10000, score: 10, eps: 6.015527089828873e-13
episode: 1309/10000, score: 10, eps: 6.009514269004498e-13
episode: 1310/10000, score: 10, eps: 6.003507458295898e-13
episode: 1311/10000, score: 9, eps: 5.998106462341898e-13
episode: 1312/10000, score: 10, eps: 5.992111054307818e-13
episode: 1313/10000, score: 11, eps: 5.98552302682066e-

episode: 1438/10000, score: 109, eps: 5.255826256261696e-13
episode: 1439/10000, score: 140, eps: 5.18275373620117e-13
episode: 1440/10000, score: 150, eps: 5.10558875610586e-13
episode: 1441/10000, score: 202, eps: 5.003485473143176e-13
episode: 1442/10000, score: 455, eps: 4.780917573447335e-13
episode: 1443/10000, score: 469, eps: 4.561858671588516e-13
episode: 1444/10000, score: 369, eps: 4.396585859810758e-13
episode: 1445/10000, score: 22, eps: 4.3869235202649836e-13
episode: 1446/10000, score: 14, eps: 4.3807858178406154e-13
episode: 1447/10000, score: 13, eps: 4.3750942120377695e-13
episode: 1448/10000, score: 15, eps: 4.3685361625785655e-13
episode: 1449/10000, score: 13, eps: 4.362860471776332e-13
episode: 1450/10000, score: 14, eps: 4.3567564357312307e-13
episode: 1451/10000, score: 11, eps: 4.3519663991492455e-13
episode: 1452/10000, score: 12, eps: 4.346746910810874e-13
episode: 1453/10000, score: 13, eps: 4.341099529046553e-13
episode: 1454/10000, score: 11, eps: 4.336326

episode: 1582/10000, score: 9, eps: 3.835199553662446e-13
episode: 1583/10000, score: 9, eps: 3.8317492544138814e-13
episode: 1584/10000, score: 10, eps: 3.827919228986903e-13
episode: 1585/10000, score: 12, eps: 3.8233282514968583e-13
episode: 1586/10000, score: 10, eps: 3.819506643284356e-13
episode: 1587/10000, score: 11, eps: 3.8153072860753055e-13
episode: 1588/10000, score: 12, eps: 3.8107314345956455e-13
episode: 1589/10000, score: 13, eps: 3.805780455011593e-13
episode: 1590/10000, score: 10, eps: 3.8019763867011734e-13
episode: 1591/10000, score: 15, eps: 3.796277412466948e-13
episode: 1592/10000, score: 14, eps: 3.7909660773204753e-13
episode: 1593/10000, score: 16, eps: 3.7849050786338047e-13
episode: 1594/10000, score: 16, eps: 3.778853770275227e-13
episode: 1595/10000, score: 14, eps: 3.7735668123786484e-13
episode: 1596/10000, score: 14, eps: 3.768287251413917e-13
episode: 1597/10000, score: 14, eps: 3.7630150770320577e-13
episode: 1598/10000, score: 14, eps: 3.7577502788

episode: 1722/10000, score: 59, eps: 1.4505936045084217e-13
episode: 1723/10000, score: 58, eps: 1.442204095216518e-13
episode: 1724/10000, score: 49, eps: 1.4351542289294794e-13
episode: 1725/10000, score: 48, eps: 1.4282816523757929e-13
episode: 1726/10000, score: 44, eps: 1.422010705752982e-13
episode: 1727/10000, score: 51, eps: 1.4147765522122709e-13
episode: 1728/10000, score: 47, eps: 1.4081423732360142e-13
episode: 1729/10000, score: 51, eps: 1.4009787716583616e-13
episode: 1730/10000, score: 45, eps: 1.394688217016707e-13
episode: 1731/10000, score: 57, eps: 1.3867607126501122e-13
episode: 1732/10000, score: 45, eps: 1.3805339987167525e-13
episode: 1733/10000, score: 48, eps: 1.3739229840955408e-13
episode: 1734/10000, score: 49, eps: 1.3672068935236934e-13
episode: 1735/10000, score: 47, eps: 1.3607957784857564e-13
episode: 1736/10000, score: 46, eps: 1.354550181506337e-13
episode: 1737/10000, score: 50, eps: 1.3477939973205128e-13
episode: 1738/10000, score: 48, eps: 1.34133

episode: 1860/10000, score: 33, eps: 7.786775460446689e-14
episode: 1861/10000, score: 41, eps: 7.754913449689409e-14
episode: 1862/10000, score: 38, eps: 7.725499230258898e-14
episode: 1863/10000, score: 36, eps: 7.697736048560533e-14
episode: 1864/10000, score: 46, eps: 7.662406017579136e-14
episode: 1865/10000, score: 39, eps: 7.632579342575423e-14
episode: 1866/10000, score: 41, eps: 7.601348273135416e-14
episode: 1867/10000, score: 36, eps: 7.574031253617372e-14
episode: 1868/10000, score: 33, eps: 7.549076900072517e-14
episode: 1869/10000, score: 46, eps: 7.514429164806202e-14
episode: 1870/10000, score: 36, eps: 7.487424507107853e-14
episode: 1871/10000, score: 36, eps: 7.460516896240528e-14
episode: 1872/10000, score: 34, eps: 7.435192947683948e-14
episode: 1873/10000, score: 44, eps: 7.402548337268584e-14
episode: 1874/10000, score: 36, eps: 7.375945746498327e-14
episode: 1875/10000, score: 36, eps: 7.349438757648307e-14
episode: 1876/10000, score: 39, eps: 7.32083033872856e-1

episode: 2004/10000, score: 44, eps: 4.4758628740343925e-14
episode: 2005/10000, score: 30, eps: 4.462454737256048e-14
episode: 2006/10000, score: 39, eps: 4.445084189824659e-14
episode: 2007/10000, score: 29, eps: 4.432211476484195e-14
episode: 2008/10000, score: 29, eps: 4.419376041796207e-14
episode: 2009/10000, score: 30, eps: 4.406137120026041e-14
episode: 2010/10000, score: 36, eps: 4.3903027536239237e-14
episode: 2011/10000, score: 35, eps: 4.374962787576069e-14
episode: 2012/10000, score: 39, eps: 4.3579328112319546e-14
episode: 2013/10000, score: 42, eps: 4.3396669652459575e-14
episode: 2014/10000, score: 41, eps: 4.321909869740632e-14
episode: 2015/10000, score: 29, eps: 4.3093938622904545e-14
episode: 2016/10000, score: 29, eps: 4.2969141004926e-14
episode: 2017/10000, score: 41, eps: 4.279331941614586e-14
episode: 2018/10000, score: 26, eps: 4.2682195752753306e-14
episode: 2019/10000, score: 33, eps: 4.254156963606329e-14
episode: 2020/10000, score: 31, eps: 4.2409888397399

episode: 2143/10000, score: 22, eps: 2.987326791405943e-14
episode: 2144/10000, score: 29, eps: 2.9786756613490413e-14
episode: 2145/10000, score: 23, eps: 2.971832238104767e-14
episode: 2146/10000, score: 19, eps: 2.966190835806942e-14
episode: 2147/10000, score: 20, eps: 2.960264086517896e-14
episode: 2148/10000, score: 22, eps: 2.953758339180954e-14
episode: 2149/10000, score: 21, eps: 2.947561645634455e-14
episode: 2150/10000, score: 21, eps: 2.941377952139586e-14
episode: 2151/10000, score: 27, eps: 2.93344654730705e-14
episode: 2152/10000, score: 31, eps: 2.9243664903602264e-14
episode: 2153/10000, score: 29, eps: 2.915897689787443e-14
episode: 2154/10000, score: 23, eps: 2.909198497160614e-14
episode: 2155/10000, score: 27, eps: 2.901353864000703e-14
episode: 2156/10000, score: 28, eps: 2.89324103080021e-14
episode: 2157/10000, score: 23, eps: 2.8865938912078095e-14
episode: 2158/10000, score: 27, eps: 2.878810211207883e-14
episode: 2159/10000, score: 21, eps: 2.872770751438695e

episode: 2286/10000, score: 24, eps: 2.147195187934463e-14
episode: 2287/10000, score: 18, eps: 2.1433335200533644e-14
episode: 2288/10000, score: 24, eps: 2.138195430869922e-14
episode: 2289/10000, score: 19, eps: 2.134136513794374e-14
episode: 2290/10000, score: 19, eps: 2.1300853017244527e-14
episode: 2291/10000, score: 22, eps: 2.1254040312789328e-14
episode: 2292/10000, score: 22, eps: 2.1207330488218643e-14
episode: 2293/10000, score: 21, eps: 2.1162839601394348e-14
episode: 2294/10000, score: 15, eps: 2.1131117553347635e-14
episode: 2295/10000, score: 17, eps: 2.1095223377462693e-14
episode: 2296/10000, score: 14, eps: 2.1065709253710975e-14
episode: 2297/10000, score: 16, eps: 2.1032029385963183e-14
episode: 2298/10000, score: 16, eps: 2.0998403365606803e-14
episode: 2299/10000, score: 18, eps: 2.096063834997759e-14
episode: 2300/10000, score: 18, eps: 2.0922941253626838e-14
episode: 2301/10000, score: 18, eps: 2.0885311954403715e-14
episode: 2302/10000, score: 22, eps: 2.08394

episode: 2424/10000, score: 15, eps: 1.7191866669970393e-14
episode: 2425/10000, score: 20, eps: 1.7157515581586723e-14
episode: 2426/10000, score: 9, eps: 1.7142079992827892e-14
episode: 2427/10000, score: 13, eps: 1.71198086547582e-14
episode: 2428/10000, score: 13, eps: 1.709756625206272e-14
episode: 2429/10000, score: 11, eps: 1.7078768330026355e-14
episode: 2430/10000, score: 9, eps: 1.706340358545153e-14
episode: 2431/10000, score: 12, eps: 1.7042938759242252e-14
episode: 2432/10000, score: 17, eps: 1.7013988930163116e-14
episode: 2433/10000, score: 15, eps: 1.6988485803717204e-14
episode: 2434/10000, score: 11, eps: 1.6969807810197765e-14
episode: 2435/10000, score: 16, eps: 1.6942676471970825e-14
episode: 2436/10000, score: 11, eps: 1.6924048843528735e-14
episode: 2437/10000, score: 15, eps: 1.6898680532816598e-14
episode: 2438/10000, score: 19, eps: 1.6866601920179688e-14
episode: 2439/10000, score: 17, eps: 1.6837951624028716e-14
episode: 2440/10000, score: 17, eps: 1.6809349

episode: 2570/10000, score: 15, eps: 1.4187010583319794e-14
episode: 2571/10000, score: 10, eps: 1.4172829955189095e-14
episode: 2572/10000, score: 14, eps: 1.4153000885369602e-14
episode: 2573/10000, score: 15, eps: 1.4131786238254797e-14
episode: 2574/10000, score: 13, eps: 1.4113425934897652e-14
episode: 2575/10000, score: 12, eps: 1.4096499135532643e-14
episode: 2576/10000, score: 13, eps: 1.4078184677895187e-14
episode: 2577/10000, score: 10, eps: 1.406411282671131e-14
episode: 2578/10000, score: 17, eps: 1.40402229525391e-14
episode: 2579/10000, score: 13, eps: 1.4021981610060204e-14
episode: 2580/10000, score: 10, eps: 1.4007965936659526e-14
episode: 2581/10000, score: 14, eps: 1.3988367526499708e-14
episode: 2582/10000, score: 14, eps: 1.3968796536286693e-14
episode: 2583/10000, score: 19, eps: 1.3942279695979477e-14
episode: 2584/10000, score: 15, eps: 1.3921380909487357e-14
episode: 2585/10000, score: 10, eps: 1.3907465791529006e-14
episode: 2586/10000, score: 15, eps: 1.3886

episode: 2713/10000, score: 9, eps: 1.1881920186404428e-14
episode: 2714/10000, score: 14, eps: 1.1865296306367002e-14
episode: 2715/10000, score: 12, eps: 1.1851065779285144e-14
episode: 2716/10000, score: 13, eps: 1.1835668634214823e-14
episode: 2717/10000, score: 17, eps: 1.1815564086000561e-14
episode: 2718/10000, score: 16, eps: 1.1796673355525296e-14
episode: 2719/10000, score: 17, eps: 1.1776635046277736e-14
episode: 2720/10000, score: 9, eps: 1.1766040313335613e-14
episode: 2721/10000, score: 11, eps: 1.175310413837211e-14
episode: 2722/10000, score: 10, eps: 1.1741356321720475e-14
episode: 2723/10000, score: 14, eps: 1.1724929103231642e-14
episode: 2724/10000, score: 19, eps: 1.1702671776207359e-14
episode: 2725/10000, score: 13, eps: 1.1687467427636147e-14
episode: 2726/10000, score: 11, eps: 1.1674617639644788e-14
episode: 2727/10000, score: 11, eps: 1.1661781979354955e-14
episode: 2728/10000, score: 17, eps: 1.164197280208631e-14
episode: 2729/10000, score: 11, eps: 1.16291

episode: 2863/10000, score: 13, eps: 9.824786498552668e-15
episode: 2864/10000, score: 11, eps: 9.81398463541607e-15
episode: 2865/10000, score: 17, eps: 9.797314201883797e-15
episode: 2866/10000, score: 17, eps: 9.780672085428068e-15
episode: 2867/10000, score: 14, eps: 9.766988041360881e-15
episode: 2868/10000, score: 10, eps: 9.757225447292305e-15
episode: 2869/10000, score: 16, eps: 9.741625589784907e-15
episode: 2870/10000, score: 15, eps: 9.727023375675992e-15
episode: 2871/10000, score: 15, eps: 9.712443049562558e-15
episode: 2872/10000, score: 15, eps: 9.697884578635583e-15
episode: 2873/10000, score: 13, eps: 9.685284890260429e-15
episode: 2874/10000, score: 17, eps: 9.66883307135075e-15
episode: 2875/10000, score: 10, eps: 9.659168588094227e-15
episode: 2876/10000, score: 14, eps: 9.645654538399341e-15
episode: 2877/10000, score: 9, eps: 9.636976920940304e-15
episode: 2878/10000, score: 9, eps: 9.628307110213763e-15
episode: 2879/10000, score: 19, eps: 9.61002978178342e-15
ep

episode: 3003/10000, score: 12, eps: 8.300369206549065e-15
episode: 3004/10000, score: 14, eps: 8.288756239975369e-15
episode: 3005/10000, score: 8, eps: 8.282127555371025e-15
episode: 3006/10000, score: 9, eps: 8.274676621441518e-15
episode: 3007/10000, score: 12, eps: 8.26475246896234e-15
episode: 3008/10000, score: 15, eps: 8.252364014489657e-15
episode: 3009/10000, score: 21, eps: 8.235051369052954e-15
episode: 3010/10000, score: 12, eps: 8.225174740732692e-15
episode: 3011/10000, score: 15, eps: 8.21284561131374e-15
episode: 3012/10000, score: 19, eps: 8.197255240663177e-15
episode: 3013/10000, score: 10, eps: 8.189061673203874e-15
episode: 3014/10000, score: 14, eps: 8.177604435927514e-15
episode: 3015/10000, score: 11, eps: 8.168613567381395e-15
episode: 3016/10000, score: 17, eps: 8.154738028078585e-15
episode: 3017/10000, score: 9, eps: 8.147401698874107e-15
episode: 3018/10000, score: 16, eps: 8.134375628476883e-15
episode: 3019/10000, score: 10, eps: 8.126244912341484e-15
ep

episode: 3154/10000, score: 11, eps: 6.84754624634646e-15
episode: 3155/10000, score: 12, eps: 6.8393337087252465e-15
episode: 3156/10000, score: 10, eps: 6.832497451896114e-15
episode: 3157/10000, score: 8, eps: 6.8270333666513125e-15
episode: 3158/10000, score: 12, eps: 6.818845430951744e-15
episode: 3159/10000, score: 13, eps: 6.80998624864124e-15
episode: 3160/10000, score: 11, eps: 6.802499008136748e-15
episode: 3161/10000, score: 11, eps: 6.795019999480065e-15
episode: 3162/10000, score: 13, eps: 6.786191771653453e-15
episode: 3163/10000, score: 12, eps: 6.778052818921412e-15
episode: 3164/10000, score: 9, eps: 6.771955010914127e-15
episode: 3165/10000, score: 9, eps: 6.76586268873935e-15
episode: 3166/10000, score: 11, eps: 6.758423959890073e-15
episode: 3167/10000, score: 15, eps: 6.748293417221236e-15
episode: 3168/10000, score: 9, eps: 6.742222381964595e-15
episode: 3169/10000, score: 10, eps: 6.735483192773777e-15
episode: 3170/10000, score: 13, eps: 6.726732316374195e-15
ep

episode: 3295/10000, score: 15, eps: 5.797240626730805e-15
episode: 3296/10000, score: 17, eps: 5.787393197971871e-15
episode: 3297/10000, score: 12, eps: 5.780452144540875e-15
episode: 3298/10000, score: 9, eps: 5.7752518180880755e-15
episode: 3299/10000, score: 9, eps: 5.770056170057403e-15
episode: 3300/10000, score: 9, eps: 5.76486519623996e-15
episode: 3301/10000, score: 11, eps: 5.758527014248943e-15
episode: 3302/10000, score: 15, eps: 5.7498952675615906e-15
episode: 3303/10000, score: 12, eps: 5.742999186906702e-15
episode: 3304/10000, score: 13, eps: 5.7355377658610035e-15
episode: 3305/10000, score: 15, eps: 5.72694047891798e-15
episode: 3306/10000, score: 11, eps: 5.7206439932636776e-15
episode: 3307/10000, score: 14, eps: 5.712640295377402e-15
episode: 3308/10000, score: 15, eps: 5.704077330608176e-15
episode: 3309/10000, score: 9, eps: 5.6989457139993986e-15
episode: 3310/10000, score: 19, eps: 5.688127456819903e-15
episode: 3311/10000, score: 15, eps: 5.6796012355811814e-

episode: 3440/10000, score: 10, eps: 4.83544411912837e-15
episode: 3441/10000, score: 11, eps: 4.830127789293906e-15
episode: 3442/10000, score: 8, eps: 4.8262650392277986e-15
episode: 3443/10000, score: 13, eps: 4.819994657783568e-15
episode: 3444/10000, score: 11, eps: 4.814695313861928e-15
episode: 3445/10000, score: 11, eps: 4.809401796304837e-15
episode: 3446/10000, score: 14, eps: 4.802673008595505e-15
episode: 3447/10000, score: 11, eps: 4.797392708963923e-15
episode: 3448/10000, score: 9, eps: 4.79307678218431e-15
episode: 3449/10000, score: 11, eps: 4.787807033125439e-15
episode: 3450/10000, score: 11, eps: 4.782543077893039e-15
episode: 3451/10000, score: 15, eps: 4.775374282771029e-15
episode: 3452/10000, score: 13, eps: 4.769170019629953e-15
episode: 3453/10000, score: 9, eps: 4.764879483112944e-15
episode: 3454/10000, score: 15, eps: 4.757737164844362e-15
episode: 3455/10000, score: 11, eps: 4.752506269933605e-15
episode: 3456/10000, score: 10, eps: 4.747755901721291e-15
e

episode: 3586/10000, score: 11, eps: 4.015105743285069e-15
episode: 3587/10000, score: 11, eps: 4.010691334613255e-15
episode: 3588/10000, score: 9, eps: 4.007083155924136e-15
episode: 3589/10000, score: 11, eps: 4.0026775676873196e-15
episode: 3590/10000, score: 9, eps: 3.99907659850415e-15
episode: 3591/10000, score: 9, eps: 3.9954788688971995e-15
episode: 3592/10000, score: 12, eps: 3.9906869303917696e-15
episode: 3593/10000, score: 11, eps: 3.986299368987819e-15
episode: 3594/10000, score: 11, eps: 3.981916631488976e-15
episode: 3595/10000, score: 12, eps: 3.977140958720341e-15
episode: 3596/10000, score: 15, eps: 3.971179421471212e-15
episode: 3597/10000, score: 12, eps: 3.966416626270403e-15
episode: 3598/10000, score: 10, eps: 3.962451994055729e-15
episode: 3599/10000, score: 12, eps: 3.957699666009635e-15
episode: 3600/10000, score: 10, eps: 3.953743746833634e-15
episode: 3601/10000, score: 9, eps: 3.950186800477168e-15
episode: 3602/10000, score: 9, eps: 3.94663305409222e-15
e

episode: 3734/10000, score: 16, eps: 3.3419501321319196e-15
episode: 3735/10000, score: 9, eps: 3.338943579834367e-15
episode: 3736/10000, score: 12, eps: 3.334939050506927e-15
episode: 3737/10000, score: 10, eps: 3.3316056117788712e-15
episode: 3738/10000, score: 15, eps: 3.3266117000316703e-15
episode: 3739/10000, score: 19, eps: 3.3202968230854197e-15
episode: 3740/10000, score: 12, eps: 3.31631465756332e-15
episode: 3741/10000, score: 10, eps: 3.312999834849465e-15
episode: 3742/10000, score: 15, eps: 3.308033812240055e-15
episode: 3743/10000, score: 9, eps: 3.3050577724233784e-15
episode: 3744/10000, score: 14, eps: 3.3004336979418493e-15
episode: 3745/10000, score: 10, eps: 3.297134749043089e-15
episode: 3746/10000, score: 11, eps: 3.2935097136993354e-15
episode: 3747/10000, score: 10, eps: 3.2902176856698558e-15
episode: 3748/10000, score: 9, eps: 3.287257673954783e-15
episode: 3749/10000, score: 10, eps: 3.28397189515238e-15
episode: 3750/10000, score: 9, eps: 3.281017502400813

episode: 3877/10000, score: 11, eps: 2.8053989524939202e-15
episode: 3878/10000, score: 19, eps: 2.8000734889990463e-15
episode: 3879/10000, score: 12, eps: 2.7967152482448727e-15
episode: 3880/10000, score: 9, eps: 2.7941992111040527e-15
episode: 3881/10000, score: 12, eps: 2.7908480156076216e-15
episode: 3882/10000, score: 10, eps: 2.7880584231387782e-15
episode: 3883/10000, score: 9, eps: 2.7855501740248236e-15
episode: 3884/10000, score: 10, eps: 2.782765877014169e-15
episode: 3885/10000, score: 9, eps: 2.780262389286855e-15
episode: 3886/10000, score: 12, eps: 2.7769279087813684e-15
episode: 3887/10000, score: 11, eps: 2.773874814933957e-15
episode: 3888/10000, score: 19, eps: 2.7686091934247075e-15
episode: 3889/10000, score: 15, eps: 2.764459185414885e-15
episode: 3890/10000, score: 10, eps: 2.761695969904427e-15
episode: 3891/10000, score: 17, eps: 2.7570048407848132e-15
episode: 3892/10000, score: 16, eps: 2.7525969399019457e-15
episode: 3893/10000, score: 19, eps: 2.747371709

episode: 4023/10000, score: 17, eps: 2.3451205253537174e-15
episode: 4024/10000, score: 9, eps: 2.343010760927328e-15
episode: 4025/10000, score: 19, eps: 2.3385630447604984e-15
episode: 4026/10000, score: 13, eps: 2.3355247362128236e-15
episode: 4027/10000, score: 11, eps: 2.33295694315631e-15
episode: 4028/10000, score: 8, eps: 2.3310912306991e-15
episode: 4029/10000, score: 11, eps: 2.3285283120609554e-15
episode: 4030/10000, score: 9, eps: 2.3264334746547266e-15
episode: 4031/10000, score: 9, eps: 2.324340521848197e-15
episode: 4032/10000, score: 11, eps: 2.321785025278012e-15
episode: 4033/10000, score: 11, eps: 2.319232338348952e-15
episode: 4034/10000, score: 11, eps: 2.3166824579719577e-15
episode: 4035/10000, score: 11, eps: 2.3141353810613644e-15
episode: 4036/10000, score: 8, eps: 2.312284720584847e-15
episode: 4037/10000, score: 13, eps: 2.309280553369021e-15
episode: 4038/10000, score: 13, eps: 2.306280289228185e-15
episode: 4039/10000, score: 14, eps: 2.3030535946990736e-

episode: 4172/10000, score: 11, eps: 1.950971779110981e-15
episode: 4173/10000, score: 19, eps: 1.9472682670026773e-15
episode: 4174/10000, score: 19, eps: 1.9435717852379605e-15
episode: 4175/10000, score: 13, eps: 1.9410466573474214e-15
episode: 4176/10000, score: 17, eps: 1.9377495165339354e-15
episode: 4177/10000, score: 10, eps: 1.9358126387721953e-15
episode: 4178/10000, score: 17, eps: 1.9325243886755795e-15
episode: 4179/10000, score: 9, eps: 1.9307858122722443e-15
episode: 4180/10000, score: 11, eps: 1.9286630094924255e-15
episode: 4181/10000, score: 11, eps: 1.9265425406284727e-15
episode: 4182/10000, score: 11, eps: 1.924424403114363e-15
episode: 4183/10000, score: 12, eps: 1.9221163635274543e-15
episode: 4184/10000, score: 13, eps: 1.919619110956044e-15
episode: 4185/10000, score: 10, eps: 1.917700355443374e-15
episode: 4186/10000, score: 19, eps: 1.9140600021781315e-15
episode: 4187/10000, score: 13, eps: 1.911573216594817e-15
episode: 4188/10000, score: 19, eps: 1.9079444

episode: 4313/10000, score: 13, eps: 1.6457876613963314e-15
episode: 4314/10000, score: 17, eps: 1.6429920595244336e-15
episode: 4315/10000, score: 12, eps: 1.6410215530663865e-15
episode: 4316/10000, score: 12, eps: 1.6390534099159885e-15
episode: 4317/10000, score: 10, eps: 1.6374150938834553e-15
episode: 4318/10000, score: 11, eps: 1.6356148375883657e-15
episode: 4319/10000, score: 19, eps: 1.6325099647140438e-15
episode: 4320/10000, score: 9, eps: 1.6310412933122784e-15
episode: 4321/10000, score: 19, eps: 1.6279451023557497e-15
episode: 4322/10000, score: 15, eps: 1.6255048933040806e-15
episode: 4323/10000, score: 9, eps: 1.624042523945347e-15
episode: 4324/10000, score: 15, eps: 1.6216081646653615e-15
episode: 4325/10000, score: 13, eps: 1.619501338442001e-15
episode: 4326/10000, score: 13, eps: 1.6173972494500092e-15
episode: 4327/10000, score: 9, eps: 1.615942174052673e-15
episode: 4328/10000, score: 13, eps: 1.6138427091992566e-15
episode: 4329/10000, score: 11, eps: 1.6120683

episode: 4458/10000, score: 17, eps: 1.3805891185174556e-15
episode: 4459/10000, score: 13, eps: 1.3787954291281459e-15
episode: 4460/10000, score: 9, eps: 1.3775550094924841e-15
episode: 4461/10000, score: 8, eps: 1.3764533511231595e-15
episode: 4462/10000, score: 12, eps: 1.374802515258272e-15
episode: 4463/10000, score: 13, eps: 1.3730163439413033e-15
episode: 4464/10000, score: 13, eps: 1.3712324932543436e-15
episode: 4465/10000, score: 10, eps: 1.3698618776511919e-15
episode: 4466/10000, score: 10, eps: 1.3684926320470311e-15
episode: 4467/10000, score: 10, eps: 1.3671247550724783e-15
episode: 4468/10000, score: 15, eps: 1.3650755027990072e-15
episode: 4469/10000, score: 13, eps: 1.3633019690139467e-15
episode: 4470/10000, score: 9, eps: 1.362075487916043e-15
episode: 4471/10000, score: 17, eps: 1.3597618110833621e-15
episode: 4472/10000, score: 15, eps: 1.357723595498133e-15
episode: 4473/10000, score: 12, eps: 1.3560952229824762e-15
episode: 4474/10000, score: 17, eps: 1.3537917

episode: 4600/10000, score: 14, eps: 1.1602106807589533e-15
episode: 4601/10000, score: 10, eps: 1.1590509920338e-15
episode: 4602/10000, score: 12, eps: 1.1576608955620802e-15
episode: 4603/10000, score: 9, eps: 1.1566194174167676e-15
episode: 4604/10000, score: 9, eps: 1.1555788762269417e-15
episode: 4605/10000, score: 17, eps: 1.153615962939109e-15
episode: 4606/10000, score: 14, eps: 1.1520019499617198e-15
episode: 4607/10000, score: 9, eps: 1.1509655628307028e-15
episode: 4608/10000, score: 9, eps: 1.1499301080750911e-15
episode: 4609/10000, score: 19, eps: 1.1477472061363968e-15
episode: 4610/10000, score: 21, eps: 1.1453393457468264e-15
episode: 4611/10000, score: 12, eps: 1.1439656942039809e-15
episode: 4612/10000, score: 10, eps: 1.1428222431570873e-15
episode: 4613/10000, score: 12, eps: 1.141451610476615e-15
episode: 4614/10000, score: 19, eps: 1.139284803193339e-15
episode: 4615/10000, score: 15, eps: 1.1375770717193736e-15
episode: 4616/10000, score: 9, eps: 1.136553661787

episode: 4749/10000, score: 9, eps: 9.702443513258616e-16
episode: 4750/10000, score: 9, eps: 9.693714806161469e-16
episode: 4751/10000, score: 9, eps: 9.684993951759104e-16
episode: 4752/10000, score: 8, eps: 9.677248667853713e-16
episode: 4753/10000, score: 9, eps: 9.668542627049399e-16
episode: 4754/10000, score: 19, eps: 9.650188919900827e-16
episode: 4755/10000, score: 14, eps: 9.636687433573182e-16
episode: 4756/10000, score: 12, eps: 9.625129766747006e-16
episode: 4757/10000, score: 19, eps: 9.606858469839073e-16
episode: 4758/10000, score: 14, eps: 9.593417606726572e-16
episode: 4759/10000, score: 15, eps: 9.579037549041275e-16
episode: 4760/10000, score: 11, eps: 9.568505874627755e-16
episode: 4761/10000, score: 13, eps: 9.556074277689414e-16
episode: 4762/10000, score: 9, eps: 9.547477250223646e-16
episode: 4763/10000, score: 8, eps: 9.539841941182507e-16
episode: 4764/10000, score: 11, eps: 9.529353360386515e-16
episode: 4765/10000, score: 9, eps: 9.520780372129032e-16
episo

episode: 4895/10000, score: 10, eps: 1.039007629202401e-16
episode: 4896/10000, score: 10, eps: 1.0379690890019726e-16
episode: 4897/10000, score: 10, eps: 1.0369315868745261e-16
episode: 4898/10000, score: 9, eps: 1.0359987216546213e-16
episode: 4899/10000, score: 10, eps: 1.0349631890080935e-16
episode: 4900/10000, score: 10, eps: 1.0339286914283468e-16
episode: 4901/10000, score: 9, eps: 1.0329985277335532e-16
episode: 4902/10000, score: 9, eps: 1.0320692008513042e-16
episode: 4903/10000, score: 10, eps: 1.0310375959577669e-16
episode: 4904/10000, score: 10, eps: 1.0300070222050245e-16
episode: 4905/10000, score: 10, eps: 1.0289774785624004e-16
episode: 4906/10000, score: 10, eps: 1.0279489640002477e-16
episode: 4907/10000, score: 10, eps: 1.0269214774899493e-16
episode: 4908/10000, score: 9, eps: 1.0259976177656919e-16
episode: 4909/10000, score: 9, eps: 1.0250745891826744e-16
episode: 4910/10000, score: 9, eps: 1.0241523909931687e-16
episode: 4911/10000, score: 9, eps: 1.023231022

episode: 5035/10000, score: 10, eps: 5.715154099491269e-17
episode: 5036/10000, score: 11, eps: 5.708870572373773e-17
episode: 5037/10000, score: 11, eps: 5.702593953681201e-17
episode: 5038/10000, score: 11, eps: 5.696324235818088e-17
episode: 5039/10000, score: 12, eps: 5.689492405056192e-17
episode: 5040/10000, score: 11, eps: 5.683237091692873e-17
episode: 5041/10000, score: 10, eps: 5.677556411376004e-17
episode: 5042/10000, score: 10, eps: 5.671881409183826e-17
episode: 5043/10000, score: 11, eps: 5.665645458232825e-17
episode: 5044/10000, score: 9, eps: 5.660548416476937e-17
episode: 5045/10000, score: 10, eps: 5.654890414628101e-17
episode: 5046/10000, score: 9, eps: 5.649803048540547e-17
episode: 5047/10000, score: 10, eps: 5.644155787225518e-17
episode: 5048/10000, score: 11, eps: 5.637950319214156e-17
episode: 5049/10000, score: 9, eps: 5.632878193115464e-17
episode: 5050/10000, score: 9, eps: 5.627810630104718e-17
episode: 5051/10000, score: 10, eps: 5.622185351314179e-17
e

episode: 5175/10000, score: 77, eps: 3.674434247117736e-17
episode: 5176/10000, score: 93, eps: 3.6404187251709316e-17
episode: 5177/10000, score: 69, eps: 3.6153850497687516e-17
episode: 5178/10000, score: 117, eps: 3.5733294469201186e-17
episode: 5179/10000, score: 83, eps: 3.543792085246287e-17
episode: 5180/10000, score: 236, eps: 3.4611336650123794e-17
episode: 5181/10000, score: 78, eps: 3.434240497453016e-17
episode: 5182/10000, score: 72, eps: 3.409601540593202e-17
episode: 5183/10000, score: 78, eps: 3.383108780007631e-17
episode: 5184/10000, score: 139, eps: 3.336406565188615e-17
episode: 5185/10000, score: 116, eps: 3.297925944088329e-17
episode: 5186/10000, score: 174, eps: 3.241035569760399e-17
episode: 5187/10000, score: 66, eps: 3.2197141071364643e-17
episode: 5188/10000, score: 104, eps: 3.186400943464519e-17
episode: 5189/10000, score: 163, eps: 3.134881059845571e-17
episode: 5190/10000, score: 59, eps: 3.116438797638089e-17
episode: 5191/10000, score: 67, eps: 3.09562

episode: 5458/10000, score: 13, eps: 7.301453459507289e-19
episode: 5459/10000, score: 17, eps: 7.289050913639581e-19
episode: 5460/10000, score: 13, eps: 7.279580830827416e-19
episode: 5461/10000, score: 20, eps: 7.265035492074144e-19
episode: 5462/10000, score: 20, eps: 7.250519216378808e-19
episode: 5463/10000, score: 21, eps: 7.235308342475915e-19
episode: 5464/10000, score: 23, eps: 7.218685425811002e-19
episode: 5465/10000, score: 25, eps: 7.200660351708904e-19
episode: 5466/10000, score: 20, eps: 7.186272704054892e-19
episode: 5467/10000, score: 25, eps: 7.16832856459353e-19
episode: 5468/10000, score: 29, eps: 7.147569488994126e-19
episode: 5469/10000, score: 27, eps: 7.128296118448648e-19
episode: 5470/10000, score: 31, eps: 7.106231515039138e-19
episode: 5471/10000, score: 30, eps: 7.084943703769279e-19
episode: 5472/10000, score: 29, eps: 7.0644261060282235e-19
episode: 5473/10000, score: 29, eps: 7.04396792609409e-19
episode: 5474/10000, score: 32, eps: 7.021462131898743e-1

episode: 5596/10000, score: 88, eps: 2.1823295226264143e-23
episode: 5597/10000, score: 122, eps: 2.1558655377937815e-23
episode: 5598/10000, score: 239, eps: 2.1049486857204637e-23
episode: 5599/10000, score: 355, eps: 2.031530224766622e-23
episode: 5600/10000, score: 110, eps: 2.0093047452566716e-23
episode: 5601/10000, score: 263, eps: 1.9571463124538771e-23
episode: 5602/10000, score: 107, eps: 1.9363154492217464e-23
episode: 5603/10000, score: 1188, eps: 1.7194095961266132e-23
episode: 5604/10000, score: 106, eps: 1.7012792086956088e-23
episode: 5605/10000, score: 108, eps: 1.6830033467378577e-23
episode: 5606/10000, score: 100, eps: 1.666256350453182e-23
episode: 5607/10000, score: 92, eps: 1.650996332736132e-23
episode: 5608/10000, score: 89, eps: 1.636366931299851e-23
episode: 5609/10000, score: 109, eps: 1.6186265056856496e-23
episode: 5610/10000, score: 81, eps: 1.6055679366560503e-23
episode: 5611/10000, score: 82, eps: 1.5924554585779376e-23
episode: 5612/10000, score: 86, 

episode: 5732/10000, score: 123, eps: 1.2141005302495965e-25
episode: 5733/10000, score: 95, eps: 1.2026206171369895e-25
episode: 5734/10000, score: 64, eps: 1.1949480399892888e-25
episode: 5735/10000, score: 24, eps: 1.1920834603325997e-25
episode: 5736/10000, score: 19, eps: 1.1898205390660185e-25
episode: 5737/10000, score: 15, eps: 1.18803705702778e-25
episode: 5738/10000, score: 14, eps: 1.1863748858293365e-25
episode: 5739/10000, score: 13, eps: 1.184833523510951e-25
episode: 5740/10000, score: 12, eps: 1.1834125050122587e-25
episode: 5741/10000, score: 10, eps: 1.1822296249008892e-25
episode: 5742/10000, score: 10, eps: 1.1810479271374768e-25
episode: 5743/10000, score: 10, eps: 1.1798674105402058e-25
episode: 5744/10000, score: 10, eps: 1.1786880739284415e-25
episode: 5745/10000, score: 8, eps: 1.1777454534359615e-25
episode: 5746/10000, score: 11, eps: 1.1764505810028923e-25
episode: 5747/10000, score: 10, eps: 1.1752746596835015e-25
episode: 5748/10000, score: 9, eps: 1.17421

episode: 5872/10000, score: 93, eps: 4.0093662747661847e-26
episode: 5873/10000, score: 73, eps: 3.9802030181753395e-26
episode: 5874/10000, score: 84, eps: 3.94690768421697e-26
episode: 5875/10000, score: 90, eps: 3.9115431260358337e-26
episode: 5876/10000, score: 90, eps: 3.8764954366733826e-26
episode: 5877/10000, score: 92, eps: 3.840993463025922e-26
episode: 5878/10000, score: 85, eps: 3.8084817634583726e-26
episode: 5879/10000, score: 105, eps: 3.768699935925227e-26
episode: 5880/10000, score: 97, eps: 3.73231846286202e-26
episode: 5881/10000, score: 103, eps: 3.694070982966045e-26
episode: 5882/10000, score: 99, eps: 3.6576783015946794e-26
episode: 5883/10000, score: 93, eps: 3.62381789529136e-26
episode: 5884/10000, score: 99, eps: 3.588117322503421e-26
episode: 5885/10000, score: 103, eps: 3.551347564905726e-26
episode: 5886/10000, score: 95, eps: 3.5177678402766604e-26
episode: 5887/10000, score: 99, eps: 3.483112035139198e-26
episode: 5888/10000, score: 100, eps: 3.448452766

episode: 6010/10000, score: 33, eps: 1.2600801357635775e-26
episode: 6011/10000, score: 37, eps: 1.2554262216124511e-26
episode: 6012/10000, score: 27, eps: 1.2520409736902164e-26
episode: 6013/10000, score: 37, eps: 1.2474167509603533e-26
episode: 6014/10000, score: 33, eps: 1.2433068552418255e-26
episode: 6015/10000, score: 37, eps: 1.238714890648799e-26
episode: 6016/10000, score: 30, eps: 1.2350041293608377e-26
episode: 6017/10000, score: 24, eps: 1.2320435255634327e-26
episode: 6018/10000, score: 34, eps: 1.2278614819738579e-26
episode: 6019/10000, score: 33, eps: 1.2238160154977787e-26
episode: 6020/10000, score: 29, eps: 1.2202719132769398e-26
episode: 6021/10000, score: 31, eps: 1.2164947391288934e-26
episode: 6022/10000, score: 34, eps: 1.212365474277476e-26
episode: 6023/10000, score: 40, eps: 1.207525456863967e-26
episode: 6024/10000, score: 22, eps: 1.2048716883839657e-26
episode: 6025/10000, score: 26, eps: 1.201742934696289e-26
episode: 6026/10000, score: 18, eps: 1.19958

episode: 6151/10000, score: 25, eps: 9.199459025027841e-27
episode: 6152/10000, score: 11, eps: 9.189344678285166e-27
episode: 6153/10000, score: 19, eps: 9.17190062827491e-27
episode: 6154/10000, score: 15, eps: 9.158152403656197e-27
episode: 6155/10000, score: 19, eps: 9.140767565659161e-27
episode: 6156/10000, score: 19, eps: 9.123415729143083e-27
episode: 6157/10000, score: 11, eps: 9.113384988214616e-27
episode: 6158/10000, score: 19, eps: 9.096085131797998e-27
episode: 6159/10000, score: 8, eps: 9.088810810087078e-27
episode: 6160/10000, score: 19, eps: 9.071557602610866e-27
episode: 6161/10000, score: 11, eps: 9.06158387710817e-27
episode: 6162/10000, score: 9, eps: 9.053431713027908e-27
episode: 6163/10000, score: 9, eps: 9.045286882961227e-27
episode: 6164/10000, score: 9, eps: 9.037149380310152e-27
episode: 6165/10000, score: 19, eps: 9.01999424125951e-27
episode: 6166/10000, score: 11, eps: 9.010077207102959e-27
episode: 6167/10000, score: 8, eps: 9.002871667654395e-27
episo

episode: 6293/10000, score: 13, eps: 7.661712594656922e-27
episode: 6294/10000, score: 9, eps: 7.654819810894779e-27
episode: 6295/10000, score: 11, eps: 7.646403717990898e-27
episode: 6296/10000, score: 9, eps: 7.639524706707846e-27
episode: 6297/10000, score: 8, eps: 7.633415225581636e-27
episode: 6298/10000, score: 9, eps: 7.626547899266983e-27
episode: 6299/10000, score: 9, eps: 7.619686751074352e-27
episode: 6300/10000, score: 9, eps: 7.612831775445659e-27
episode: 6301/10000, score: 9, eps: 7.605982966827817e-27
episode: 6302/10000, score: 9, eps: 7.599140319672733e-27
episode: 6303/10000, score: 9, eps: 7.59230382843731e-27
episode: 6304/10000, score: 13, eps: 7.582439753286476e-27
episode: 6305/10000, score: 8, eps: 7.576375924142414e-27
episode: 6306/10000, score: 10, eps: 7.568802956678433e-27
episode: 6307/10000, score: 9, eps: 7.561993758150803e-27
episode: 6308/10000, score: 13, eps: 7.55216906245815e-27
episode: 6309/10000, score: 8, eps: 7.546129441392654e-27
episode: 63

episode: 6442/10000, score: 8, eps: 6.572063514761002e-27
episode: 6443/10000, score: 11, eps: 6.564837858445524e-27
episode: 6444/10000, score: 10, eps: 6.558275973976474e-27
episode: 6445/10000, score: 9, eps: 6.552375886028433e-27
episode: 6446/10000, score: 14, eps: 6.543208520065642e-27
episode: 6447/10000, score: 9, eps: 6.537321987403103e-27
episode: 6448/10000, score: 10, eps: 6.530787606426252e-27
episode: 6449/10000, score: 13, eps: 6.522302674684894e-27
episode: 6450/10000, score: 9, eps: 6.51643494975885e-27
episode: 6451/10000, score: 9, eps: 6.510572503673351e-27
episode: 6452/10000, score: 10, eps: 6.504064860146171e-27
episode: 6453/10000, score: 14, eps: 6.494965085674162e-27
episode: 6454/10000, score: 15, eps: 6.485229454804671e-27
episode: 6455/10000, score: 9, eps: 6.479395082433274e-27
episode: 6456/10000, score: 8, eps: 6.474213380235151e-27
episode: 6457/10000, score: 10, eps: 6.467742079474169e-27
episode: 6458/10000, score: 11, eps: 6.460631119377928e-27
episo

episode: 6590/10000, score: 9, eps: 5.63118634858471e-27
episode: 6591/10000, score: 10, eps: 5.625557695594359e-27
episode: 6592/10000, score: 10, eps: 5.61993466872478e-27
episode: 6593/10000, score: 10, eps: 5.614317262352381e-27
episode: 6594/10000, score: 8, eps: 5.60982738023697e-27
episode: 6595/10000, score: 10, eps: 5.6042200766059936e-27
episode: 6596/10000, score: 8, eps: 5.599738269412533e-27
episode: 6597/10000, score: 8, eps: 5.595260046410173e-27
episode: 6598/10000, score: 9, eps: 5.5902263261920896e-27
episode: 6599/10000, score: 15, eps: 5.581846853897655e-27
episode: 6600/10000, score: 12, eps: 5.575152320464171e-27
episode: 6601/10000, score: 15, eps: 5.566795443357479e-27
episode: 6602/10000, score: 9, eps: 5.5617873310372775e-27
episode: 6603/10000, score: 11, eps: 5.555672423038658e-27
episode: 6604/10000, score: 8, eps: 5.5512294403774274e-27
episode: 6605/10000, score: 9, eps: 5.546235331857453e-27
episode: 6606/10000, score: 12, eps: 5.539583508754644e-27
epis

episode: 6737/10000, score: 10, eps: 4.8172938743562734e-27
episode: 6738/10000, score: 10, eps: 4.812478747686186e-27
episode: 6739/10000, score: 10, eps: 4.807668433976541e-27
episode: 6740/10000, score: 8, eps: 4.803823645107326e-27
episode: 6741/10000, score: 9, eps: 4.799501932799781e-27
episode: 6742/10000, score: 8, eps: 4.795663674845345e-27
episode: 6743/10000, score: 10, eps: 4.7908701686437754e-27
episode: 6744/10000, score: 18, eps: 4.782253928463693e-27
episode: 6745/10000, score: 9, eps: 4.777951621137841e-27
episode: 6746/10000, score: 9, eps: 4.7736531843401125e-27
episode: 6747/10000, score: 10, eps: 4.768881678726968e-27
episode: 6748/10000, score: 16, eps: 4.761257188029313e-27
episode: 6749/10000, score: 8, eps: 4.7574495151643054e-27
episode: 6750/10000, score: 9, eps: 4.753169522882917e-27
episode: 6751/10000, score: 10, eps: 4.74841849171604e-27
episode: 6752/10000, score: 9, eps: 4.744146624105346e-27
episode: 6753/10000, score: 9, eps: 4.739878599637988e-27
epi

episode: 6878/10000, score: 41, eps: 3.4597480982278254e-27
episode: 6879/10000, score: 38, eps: 3.446625348322776e-27
episode: 6880/10000, score: 12, eps: 3.4424916719194326e-27
episode: 6881/10000, score: 8, eps: 3.43973864228681e-27
episode: 6882/10000, score: 12, eps: 3.435613225386998e-27
episode: 6883/10000, score: 9, eps: 3.432522410016363e-27
episode: 6884/10000, score: 48, eps: 3.416084961998897e-27
episode: 6885/10000, score: 13, eps: 3.411646715117813e-27
episode: 6886/10000, score: 42, eps: 3.397347134064989e-27
episode: 6887/10000, score: 16, eps: 3.391915453565151e-27
episode: 6888/10000, score: 26, eps: 3.383107488297196e-27
episode: 6889/10000, score: 9, eps: 3.380063909192286e-27
episode: 6890/10000, score: 40, eps: 3.366569984689845e-27
episode: 6891/10000, score: 17, eps: 3.3608513919625856e-27
episode: 6892/10000, score: 31, eps: 3.350448365510019e-27
episode: 6893/10000, score: 49, eps: 3.3340705081340646e-27
episode: 6894/10000, score: 9, eps: 3.331071044662107e-2

episode: 7018/10000, score: 28, eps: 2.663078266013686e-27
episode: 7019/10000, score: 32, eps: 2.6545696112313468e-27
episode: 7020/10000, score: 10, eps: 2.6519162358579483e-27
episode: 7021/10000, score: 10, eps: 2.6492655126662225e-27
episode: 7022/10000, score: 12, eps: 2.646088141983555e-27
episode: 7023/10000, score: 9, eps: 2.6437076150252634e-27
episode: 7024/10000, score: 13, eps: 2.640272856461759e-27
episode: 7025/10000, score: 17, eps: 2.6357879815821017e-27
episode: 7026/10000, score: 10, eps: 2.633153379388872e-27
episode: 7027/10000, score: 8, eps: 2.631047593820869e-27
episode: 7028/10000, score: 9, eps: 2.6286805979425898e-27
episode: 7029/10000, score: 8, eps: 2.6265783893476156e-27
episode: 7030/10000, score: 8, eps: 2.624477861931017e-27
episode: 7031/10000, score: 9, eps: 2.6221167764468867e-27
episode: 7032/10000, score: 10, eps: 2.6194958393083906e-27
episode: 7033/10000, score: 8, eps: 2.6174009759491056e-27
episode: 7034/10000, score: 10, eps: 2.61478475248956

episode: 7167/10000, score: 11, eps: 2.2811391743244277e-27
episode: 7168/10000, score: 10, eps: 2.278859061389043e-27
episode: 7169/10000, score: 10, eps: 2.276581227540817e-27
episode: 7170/10000, score: 12, eps: 2.2738508321106435e-27
episode: 7171/10000, score: 8, eps: 2.2720323879958688e-27
episode: 7172/10000, score: 9, eps: 2.26998837658751e-27
episode: 7173/10000, score: 8, eps: 2.2681730213558823e-27
episode: 7174/10000, score: 11, eps: 2.265679278153379e-27
episode: 7175/10000, score: 9, eps: 2.2636409822572927e-27
episode: 7176/10000, score: 10, eps: 2.2613783596418882e-27
episode: 7177/10000, score: 9, eps: 2.2593439330244934e-27
episode: 7178/10000, score: 9, eps: 2.257311336658831e-27
episode: 7179/10000, score: 9, eps: 2.255280568898334e-27
episode: 7180/10000, score: 9, eps: 2.2532516280979154e-27
episode: 7181/10000, score: 10, eps: 2.2509993901627075e-27
episode: 7182/10000, score: 9, eps: 2.2489743008822865e-27
episode: 7183/10000, score: 9, eps: 2.2469510334533554e-

episode: 7306/10000, score: 10, eps: 1.996245738128048e-27
episode: 7307/10000, score: 10, eps: 1.9942503904609943e-27
episode: 7308/10000, score: 9, eps: 1.9924562828722286e-27
episode: 7309/10000, score: 10, eps: 1.9904647229556307e-27
episode: 7310/10000, score: 10, eps: 1.9884751537029862e-27
episode: 7311/10000, score: 10, eps: 1.9864875731245276e-27
episode: 7312/10000, score: 16, eps: 1.983311575680545e-27
episode: 7313/10000, score: 10, eps: 1.9813291563571183e-27
episode: 7314/10000, score: 9, eps: 1.979546673228487e-27
episode: 7315/10000, score: 10, eps: 1.977568017113758e-27
episode: 7316/10000, score: 9, eps: 1.9757889176567507e-27
episode: 7317/10000, score: 10, eps: 1.9738140176070538e-27
episode: 7318/10000, score: 11, eps: 1.9716439074597816e-27
episode: 7319/10000, score: 11, eps: 1.9694761832404685e-27
episode: 7320/10000, score: 10, eps: 1.9675075930852152e-27
episode: 7321/10000, score: 8, eps: 1.9659341378027067e-27
episode: 7322/10000, score: 10, eps: 1.963969088

episode: 7446/10000, score: 9, eps: 1.7486807826392753e-27
episode: 7447/10000, score: 9, eps: 1.747107599313115e-27
episode: 7448/10000, score: 10, eps: 1.7453612777026053e-27
episode: 7449/10000, score: 11, eps: 1.743442339957909e-27
episode: 7450/10000, score: 10, eps: 1.7416996819578275e-27
episode: 7451/10000, score: 9, eps: 1.7401327791096706e-27
episode: 7452/10000, score: 10, eps: 1.7383934291815322e-27
episode: 7453/10000, score: 10, eps: 1.736655817820823e-27
episode: 7454/10000, score: 10, eps: 1.734919943289758e-27
episode: 7455/10000, score: 9, eps: 1.7333591397662653e-27
episode: 7456/10000, score: 8, eps: 1.7319729376979557e-27
episode: 7457/10000, score: 11, eps: 1.7300687197658863e-27
episode: 7458/10000, score: 11, eps: 1.7281665954265353e-27
episode: 7459/10000, score: 10, eps: 1.7264392062987336e-27
episode: 7460/10000, score: 9, eps: 1.7248860323861803e-27
episode: 7461/10000, score: 10, eps: 1.723161922345559e-27
episode: 7462/10000, score: 10, eps: 1.721439535639

episode: 7597/10000, score: 8, eps: 1.5171818450787553e-27
episode: 7598/10000, score: 10, eps: 1.515665345783477e-27
episode: 7599/10000, score: 10, eps: 1.5141503623052513e-27
episode: 7600/10000, score: 9, eps: 1.5127881719461377e-27
episode: 7601/10000, score: 9, eps: 1.5114272070680729e-27
episode: 7602/10000, score: 8, eps: 1.5102184884174072e-27
episode: 7603/10000, score: 9, eps: 1.5088598353296483e-27
episode: 7604/10000, score: 10, eps: 1.507351654300213e-27
episode: 7605/10000, score: 10, eps: 1.5058449807733072e-27
episode: 7606/10000, score: 9, eps: 1.5044902622683322e-27
episode: 7607/10000, score: 8, eps: 1.5032870912315502e-27
episode: 7608/10000, score: 8, eps: 1.502084882394777e-27
episode: 7609/10000, score: 8, eps: 1.5008836349885223e-27
episode: 7610/10000, score: 9, eps: 1.4995333799090859e-27
episode: 7611/10000, score: 11, eps: 1.4978847176871713e-27
episode: 7612/10000, score: 10, eps: 1.4963875068378928e-27
episode: 7613/10000, score: 10, eps: 1.49489179252589

episode: 7738/10000, score: 9, eps: 1.3302248515401145e-27
episode: 7739/10000, score: 11, eps: 1.3287623356076456e-27
episode: 7740/10000, score: 10, eps: 1.3274341710556656e-27
episode: 7741/10000, score: 10, eps: 1.3261073340707226e-27
episode: 7742/10000, score: 9, eps: 1.3249143147573231e-27
episode: 7743/10000, score: 10, eps: 1.3235899964950458e-27
episode: 7744/10000, score: 9, eps: 1.322399241879434e-27
episode: 7745/10000, score: 10, eps: 1.3210774375585535e-27
episode: 7746/10000, score: 9, eps: 1.3198889433416746e-27
episode: 7747/10000, score: 9, eps: 1.3187015183418329e-27
episode: 7748/10000, score: 8, eps: 1.3176469262897468e-27
episode: 7749/10000, score: 9, eps: 1.316461518298314e-27
episode: 7750/10000, score: 10, eps: 1.3151456490297512e-27
episode: 7751/10000, score: 8, eps: 1.3140939006776704e-27
episode: 7752/10000, score: 9, eps: 1.3129116891304975e-27
episode: 7753/10000, score: 9, eps: 1.3117305411482202e-27
episode: 7754/10000, score: 8, eps: 1.31068152392640

episode: 8017/10000, score: 10, eps: 1.0250434053575468e-27
episode: 8018/10000, score: 10, eps: 1.0240188230987383e-27
episode: 8019/10000, score: 10, eps: 1.0229952649612493e-27
episode: 8020/10000, score: 9, eps: 1.022074937415161e-27
episode: 8021/10000, score: 9, eps: 1.0211554378326234e-27
episode: 8022/10000, score: 10, eps: 1.0201347417922208e-27
episode: 8023/10000, score: 9, eps: 1.0192169876874361e-27
episode: 8024/10000, score: 9, eps: 1.0183000592310316e-27
episode: 8025/10000, score: 9, eps: 1.0173839556802208e-27
episode: 8026/10000, score: 9, eps: 1.0164686762928857e-27
episode: 8027/10000, score: 10, eps: 1.015452664905542e-27
episode: 8028/10000, score: 9, eps: 1.014539122984801e-27
episode: 8029/10000, score: 10, eps: 1.0135250402826985e-27
episode: 8030/10000, score: 9, eps: 1.0126132325303355e-27
episode: 8031/10000, score: 10, eps: 1.0116010748522676e-27
episode: 8032/10000, score: 9, eps: 1.010690997976326e-27
episode: 8033/10000, score: 10, eps: 1.00968076166803

episode: 8172/10000, score: 12, eps: 8.866789681660203e-28
episode: 8173/10000, score: 8, eps: 8.859698732119508e-28
episode: 8174/10000, score: 9, eps: 8.85172819200804e-28
episode: 8175/10000, score: 9, eps: 8.843764822513948e-28
episode: 8176/10000, score: 9, eps: 8.835808617186259e-28
episode: 8177/10000, score: 10, eps: 8.82697678362284e-28
episode: 8178/10000, score: 9, eps: 8.819035681487871e-28
episode: 8179/10000, score: 9, eps: 8.81110172348669e-28
episode: 8180/10000, score: 9, eps: 8.803174903192154e-28
episode: 8181/10000, score: 9, eps: 8.795255214182894e-28
episode: 8182/10000, score: 10, eps: 8.786463915778312e-28
episode: 8183/10000, score: 8, eps: 8.779437204363604e-28
episode: 8184/10000, score: 10, eps: 8.770661716852637e-28
episode: 8185/10000, score: 10, eps: 8.76189500088126e-28
episode: 8186/10000, score: 9, eps: 8.75401244892678e-28
episode: 8187/10000, score: 9, eps: 8.746136988432e-28
episode: 8188/10000, score: 8, eps: 8.739142527269888e-28
episode: 8189/100

episode: 8315/10000, score: 10, eps: 7.781166560503949e-28
episode: 8316/10000, score: 8, eps: 7.774943805546493e-28
episode: 8317/10000, score: 9, eps: 7.767949154448275e-28
episode: 8318/10000, score: 10, eps: 7.760184699938956e-28
episode: 8319/10000, score: 10, eps: 7.752428006391075e-28
episode: 8320/10000, score: 9, eps: 7.745453611408301e-28
episode: 8321/10000, score: 9, eps: 7.738485490870813e-28
episode: 8322/10000, score: 10, eps: 7.730750486769955e-28
episode: 8323/10000, score: 9, eps: 7.723795593752753e-28
episode: 8324/10000, score: 10, eps: 7.716075272940325e-28
episode: 8325/10000, score: 9, eps: 7.709133582333724e-28
episode: 8326/10000, score: 9, eps: 7.702198136750246e-28
episode: 8327/10000, score: 10, eps: 7.694499403678557e-28
episode: 8328/10000, score: 10, eps: 7.686808365876433e-28
episode: 8329/10000, score: 10, eps: 7.679125015652066e-28
episode: 8330/10000, score: 10, eps: 7.671449345321337e-28
episode: 8331/10000, score: 9, eps: 7.664547801988008e-28
episo

episode: 8457/10000, score: 10, eps: 6.818910238553807e-28
episode: 8458/10000, score: 10, eps: 6.812094396006735e-28
episode: 8459/10000, score: 9, eps: 6.8059659628321815e-28
episode: 8460/10000, score: 10, eps: 6.79916305873746e-28
episode: 8461/10000, score: 11, eps: 6.791687717790893e-28
episode: 8462/10000, score: 10, eps: 6.784899085517717e-28
episode: 8463/10000, score: 9, eps: 6.778795118334577e-28
episode: 8464/10000, score: 10, eps: 6.772019372860733e-28
episode: 8465/10000, score: 10, eps: 6.7652504000840915e-28
episode: 8466/10000, score: 10, eps: 6.758488193234999e-28
episode: 8467/10000, score: 9, eps: 6.752407986349211e-28
episode: 8468/10000, score: 10, eps: 6.745658616136309e-28
episode: 8469/10000, score: 10, eps: 6.738915992257212e-28
episode: 8470/10000, score: 8, eps: 6.733526745982553e-28
episode: 8471/10000, score: 10, eps: 6.726796248515726e-28
episode: 8472/10000, score: 8, eps: 6.72141669464321e-28
episode: 8473/10000, score: 10, eps: 6.714698301779653e-28
ep

episode: 8614/10000, score: 9, eps: 5.8908031967131345e-28
episode: 8615/10000, score: 10, eps: 5.8849150436710885e-28
episode: 8616/10000, score: 9, eps: 5.879620738206944e-28
episode: 8617/10000, score: 9, eps: 5.874331195712209e-28
episode: 8618/10000, score: 10, eps: 5.8684595072607385e-28
episode: 8619/10000, score: 10, eps: 5.862593687856164e-28
episode: 8620/10000, score: 9, eps: 5.857319463578438e-28
episode: 8621/10000, score: 9, eps: 5.852049984204283e-28
episode: 8622/10000, score: 10, eps: 5.846200566940449e-28
episode: 8623/10000, score: 9, eps: 5.8409410905714e-28
episode: 8624/10000, score: 9, eps: 5.835686345838114e-28
episode: 8625/10000, score: 10, eps: 5.829853284850973e-28
episode: 8626/10000, score: 10, eps: 5.824026054300642e-28
episode: 8627/10000, score: 8, eps: 5.819368463858392e-28
episode: 8628/10000, score: 10, eps: 5.81355171341214e-28
episode: 8629/10000, score: 9, eps: 5.808321609260423e-28
episode: 8630/10000, score: 9, eps: 5.803096210320042e-28
episode

episode: 8760/10000, score: 10, eps: 5.138624949250958e-28
episode: 8761/10000, score: 8, eps: 5.134515487818818e-28
episode: 8762/10000, score: 9, eps: 5.129896271874122e-28
episode: 8763/10000, score: 11, eps: 5.124256206571747e-28
episode: 8764/10000, score: 10, eps: 5.119134255665667e-28
episode: 8765/10000, score: 9, eps: 5.114528877293958e-28
episode: 8766/10000, score: 8, eps: 5.110438685973832e-28
episode: 8767/10000, score: 9, eps: 5.10584113048517e-28
episode: 8768/10000, score: 10, eps: 5.100737586370598e-28
episode: 8769/10000, score: 9, eps: 5.096148758379999e-28
episode: 8770/10000, score: 11, eps: 5.090545796786902e-28
episode: 8771/10000, score: 10, eps: 5.085457541124965e-28
episode: 8772/10000, score: 10, eps: 5.080374371429587e-28
episode: 8773/10000, score: 9, eps: 5.075803863003387e-28
episode: 8774/10000, score: 10, eps: 5.0707303426431335e-28
episode: 8775/10000, score: 10, eps: 5.065661893520765e-28
episode: 8776/10000, score: 11, eps: 5.060092450716267e-28
epis

episode: 8904/10000, score: 10, eps: 4.492362818902219e-28
episode: 8905/10000, score: 10, eps: 4.487872477107597e-28
episode: 8906/10000, score: 9, eps: 4.483835007135368e-28
episode: 8907/10000, score: 9, eps: 4.479801169432964e-28
episode: 8908/10000, score: 9, eps: 4.475770960732649e-28
episode: 8909/10000, score: 10, eps: 4.471297203331852e-28
episode: 8910/10000, score: 9, eps: 4.4672746451403135e-28
episode: 8911/10000, score: 9, eps: 4.463255705803366e-28
episode: 8912/10000, score: 9, eps: 4.45924038206534e-28
episode: 8913/10000, score: 9, eps: 4.4552286706735e-28
episode: 8914/10000, score: 9, eps: 4.4512205683780325e-28
episode: 8915/10000, score: 10, eps: 4.446771350324858e-28
episode: 8916/10000, score: 10, eps: 4.442326579488122e-28
episode: 8917/10000, score: 9, eps: 4.438330084431052e-28
episode: 8918/10000, score: 9, eps: 4.434337184781131e-28
episode: 8919/10000, score: 9, eps: 4.430347877303786e-28
episode: 8920/10000, score: 10, eps: 4.425919522551478e-28
episode: 

episode: 9049/10000, score: 11, eps: 3.9203149429889706e-28
episode: 9050/10000, score: 9, eps: 3.9167880705244037e-28
episode: 9051/10000, score: 10, eps: 3.9128730445385796e-28
episode: 9052/10000, score: 9, eps: 3.9093528671041595e-28
episode: 9053/10000, score: 9, eps: 3.905835856562462e-28
episode: 9054/10000, score: 9, eps: 3.902322010064423e-28
episode: 9055/10000, score: 11, eps: 3.8980316014867036e-28
episode: 9056/10000, score: 8, eps: 3.8949142674360997e-28
episode: 9057/10000, score: 9, eps: 3.89141024643742e-28
episode: 9058/10000, score: 9, eps: 3.887909377796486e-28
episode: 9059/10000, score: 10, eps: 3.884023217511442e-28
episode: 9060/10000, score: 9, eps: 3.880528994537831e-28
episode: 9061/10000, score: 9, eps: 3.8770379151072696e-28
episode: 9062/10000, score: 9, eps: 3.873549976391701e-28
episode: 9063/10000, score: 10, eps: 3.8696781690480534e-28
episode: 9064/10000, score: 8, eps: 3.8665835098060283e-28
episode: 9065/10000, score: 10, eps: 3.8627186657948923e-28

episode: 9190/10000, score: 10, eps: 3.4324192996858983e-28
episode: 9191/10000, score: 9, eps: 3.4293313576988495e-28
episode: 9192/10000, score: 10, eps: 3.4259035691288143e-28
episode: 9193/10000, score: 9, eps: 3.4228214889541514e-28
episode: 9194/10000, score: 10, eps: 3.4194002073242013e-28
episode: 9195/10000, score: 10, eps: 3.415982345436715e-28
episode: 9196/10000, score: 9, eps: 3.4129091907925673e-28
episode: 9197/10000, score: 9, eps: 3.409838800881522e-28
episode: 9198/10000, score: 9, eps: 3.406771173216314e-28
episode: 9199/10000, score: 8, eps: 3.4040467099829143e-28
episode: 9200/10000, score: 8, eps: 3.401324425557404e-28
episode: 9201/10000, score: 10, eps: 3.397924631319751e-28
episode: 9202/10000, score: 10, eps: 3.3945282353468364e-28
episode: 9203/10000, score: 10, eps: 3.3911352342419233e-28
episode: 9204/10000, score: 10, eps: 3.3877456246116715e-28
episode: 9205/10000, score: 8, eps: 3.385036376491067e-28
episode: 9206/10000, score: 9, eps: 3.381991062081021e

episode: 9330/10000, score: 10, eps: 3.0103573338936494e-28
episode: 9331/10000, score: 8, eps: 3.007949890758029e-28
episode: 9332/10000, score: 10, eps: 3.0049432940838315e-28
episode: 9333/10000, score: 10, eps: 3.0019397026537e-28
episode: 9334/10000, score: 10, eps: 2.9989391134637434e-28
episode: 9335/10000, score: 10, eps: 2.995941523513071e-28
episode: 9336/10000, score: 10, eps: 2.9929469298037927e-28
episode: 9337/10000, score: 10, eps: 2.989955329341016e-28
episode: 9338/10000, score: 10, eps: 2.9869667191328422e-28
episode: 9339/10000, score: 9, eps: 2.9842795241427744e-28
episode: 9340/10000, score: 10, eps: 2.9812965871863667e-28
episode: 9341/10000, score: 10, eps: 2.9783166318249513e-28
episode: 9342/10000, score: 9, eps: 2.9756372188001557e-28
episode: 9343/10000, score: 8, eps: 2.9732575420369224e-28
episode: 9344/10000, score: 10, eps: 2.970285622104051e-28
episode: 9345/10000, score: 17, eps: 2.965240174115833e-28
episode: 9346/10000, score: 10, eps: 2.9622762679440

episode: 9475/10000, score: 9, eps: 2.602965876967786e-28
episode: 9476/10000, score: 8, eps: 2.60088423295091e-28
episode: 9477/10000, score: 10, eps: 2.598284518803813e-28
episode: 9478/10000, score: 8, eps: 2.5962066185629495e-28
episode: 9479/10000, score: 9, eps: 2.593870967022577e-28
episode: 9480/10000, score: 8, eps: 2.591796596387591e-28
episode: 9481/10000, score: 8, eps: 2.5897238846684056e-28
episode: 9482/10000, score: 8, eps: 2.587652830538352e-28
episode: 9483/10000, score: 8, eps: 2.5855834326718237e-28
episode: 9484/10000, score: 10, eps: 2.5829990124414806e-28
episode: 9485/10000, score: 10, eps: 2.5804171754686895e-28
episode: 9486/10000, score: 10, eps: 2.5778379191713547e-28
episode: 9487/10000, score: 10, eps: 2.5752612409699614e-28
episode: 9488/10000, score: 8, eps: 2.5732017529061367e-28
episode: 9489/10000, score: 8, eps: 2.5711439118562217e-28
episode: 9490/10000, score: 9, eps: 2.568830807731416e-28
episode: 9491/10000, score: 9, eps: 2.5665197845677998e-28


episode: 9761/10000, score: 9, eps: 1.996984446410491e-28
episode: 9762/10000, score: 10, eps: 1.9949883603674854e-28
episode: 9763/10000, score: 9, eps: 1.9931935888714108e-28
episode: 9764/10000, score: 10, eps: 1.9912012919805133e-28
episode: 9765/10000, score: 9, eps: 1.9894099274829604e-28
episode: 9766/10000, score: 10, eps: 1.987421412551258e-28
episode: 9767/10000, score: 8, eps: 1.9858320317879308e-28
episode: 9768/10000, score: 10, eps: 1.9838470931422993e-28
episode: 9769/10000, score: 9, eps: 1.982062344776807e-28
episode: 9770/10000, score: 10, eps: 1.9800811741222798e-28
episode: 9771/10000, score: 10, eps: 1.9781019837471181e-28
episode: 9772/10000, score: 10, eps: 1.9761247716719333e-28
episode: 9773/10000, score: 10, eps: 1.9741495359193156e-28
episode: 9774/10000, score: 9, eps: 1.9723735118650174e-28
episode: 9775/10000, score: 9, eps: 1.9705990855931486e-28
episode: 9776/10000, score: 10, eps: 1.9686293730407138e-28
episode: 9777/10000, score: 9, eps: 1.966858315146

episode: 9901/10000, score: 8, eps: 1.752479663343764e-28
episode: 9902/10000, score: 9, eps: 1.7509030623922472e-28
episode: 9903/10000, score: 9, eps: 1.7493278798141426e-28
episode: 9904/10000, score: 9, eps: 1.7477541143334255e-28
episode: 9905/10000, score: 8, eps: 1.746356400315249e-28
episode: 9906/10000, score: 10, eps: 1.7446108295657878e-28
episode: 9907/10000, score: 10, eps: 1.742867003601779e-28
episode: 9908/10000, score: 10, eps: 1.7411249206792212e-28
episode: 9909/10000, score: 9, eps: 1.739558534909349e-28
episode: 9910/10000, score: 9, eps: 1.7379935583229025e-28
episode: 9911/10000, score: 9, eps: 1.7364299896521234e-28
episode: 9912/10000, score: 10, eps: 1.7346943408476318e-28
episode: 9913/10000, score: 9, eps: 1.7331337402851397e-28
episode: 9914/10000, score: 8, eps: 1.7317477184733156e-28
episode: 9915/10000, score: 9, eps: 1.7301897688104237e-28
episode: 9916/10000, score: 10, eps: 1.7284603574194231e-28
episode: 9917/10000, score: 10, eps: 1.7267326746617862