In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.distributions import Categorical

import gym
from tqdm.notebook import tqdm
import numpy as np
from typing import NamedTuple
from itertools import chain

In [2]:
from policy_generator.policy_instances.envs.simple_arena import ActionSpace

In [3]:
#device to run model on 
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
# DEVICE = 'cpu'

In [4]:
class ObsSpace(NamedTuple):
    agent: np.ndarray
    agent_direction: int
    target: np.ndarray
    velocity: int

In [5]:
class PolicyNetwork(nn.Module):
    #Takes in observations and outputs actions
    def __init__(self, observation_space, action_space, shape):
        super(PolicyNetwork, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(observation_space, shape),
            nn.ReLU(),
            nn.Linear(shape, shape),
            nn.ReLU(),
            nn.Linear(shape, shape),
            nn.ReLU(),
            nn.Linear(shape, action_space)
        )
    
    #forward pass
    def forward(self, x):
        actions = self.model(x)
        action_probs = F.softmax(actions, dim=1)
        return action_probs

class PGBot:
    def __init__(self):
        self.model = None
        self.train_config = None
        
    def training_config(self, **kwargs):
        if kwargs:
            self.train_config = kwargs
        else:
            self.train_config = {
                'discount_factor': 0.96,
                'max_steps': 1000,
                'learning_rate': 1e-5,
                'num_episodes': 10000,
                'network_shape': 128,
                }
#             b_params = {'discount_factor': 0.9589431748343439,
#                         'learning_rate': 0.09167455769715335,
#                         'network_shape': 39}
            
    def _init_environment(self):
        #Make environment
        env = gym.make("policy_instances/SimpleArena-v0")

        #Init network
        network = PolicyNetwork(env.shape, env.action_space.n, self.train_config['network_shape']).to(DEVICE)

        #Init optimizer
        optimizer = optim.Adam(network.parameters(), lr=self.train_config['learning_rate'])
        return env, network, optimizer
    
    def train(self, verbose=0):
        env, network, optimizer = self._init_environment()
        
        #track scores
        scores = []

        #iterate through episodes
        for episode in tqdm(range(self.train_config['num_episodes'])):

            #reset environment, initiable variables
            state = env.reset()
            rewards = []
            log_probs = []
            score = 0
            actions_dist = []

            #generate episode
            for step in range(self.train_config['max_steps']):
                env.render()

                #select action
                action, lp = select_action(network, state)
                actions_dist.append(action)

                #execute action
                new_state, reward, done, _, _ = env.step(action)

                #track episode score
                score += reward

                #store reward and log probability
                rewards.append(reward)
                log_probs.append(lp)

                #end episode
                if done:
                    break

                #move into new state
                state = new_state

            #append score
            scores.append(score)
            if verbose > 0:
                print('Score:', score)

            #Calculate Gt (cumulative discounted rewards)
            discounted_rewards = []

            #track cumulative reward
            total_r = .0

            #iterate rewards from Gt to G0
            for r in reversed(rewards):
                # print(type(r), type(total_r), type(DISCOUNT_FACTOR), type(total_r ** DISCOUNT_FACTOR))
                # print(r, total_r, DISCOUNT_FACTOR, total_r ** DISCOUNT_FACTOR)

                #Base case: G(T) = r(T)
                #Recursive: G(t) = r(t) + G(t+1)^DISCOUNT
                if total_r < 0:
                    total_r = r - np.abs(total_r) ** self.train_config['discount_factor']
                else:
                    total_r = r + total_r ** self.train_config['discount_factor']

                #append to discounted rewards
                discounted_rewards.append(total_r)

            #reverse discounted rewards
            rewards = torch.tensor(discounted_rewards).to(DEVICE)
            rewards = torch.flip(rewards, [0])

            #adjusting policy parameters with gradient ascent
            loss = []
            for r, lp in zip(rewards, log_probs):
                #we add a negative sign since network will perform gradient descent and we are doing gradient ascent with REINFORCE
                loss.append(-r * lp)


            #Backpropagation
            optimizer.zero_grad()
            sum(loss).backward()
            # print('Loss:', sum(loss))
            if verbose > 0:
                print(list(zip(*np.unique(actions_dist, return_counts=True))))
            optimizer.step()

        env.close()
        return scores

            
def select_action(network, state):
    ''' Selects an action given current state
    Args:
    - network (Torch NN): network to process state
    - state (Array): Array of action space in an environment
    
    Return:
    - (int): action that is selected
    - (float): log probability of selecting that action given state and network
    '''
    #convert state to float tensor, add 1 dimension, allocate tensor on device
    state = ObsSpace(**state[0] if isinstance(state, tuple) else state)
    unpack_state = list(chain(state.agent, state.target, [state.velocity, state.agent_direction]))
    state = torch.Tensor(unpack_state).float().unsqueeze(0).to(DEVICE)
    
    #use network to predict action probabilities
    action_probs = network(state)
    
    #sample an action using the probability distribution
    m = Categorical(action_probs)
    action = m.sample()
    # print(m, action)
    
    #return action
    return action.item(), m.log_prob(action)

In [None]:
bot = PGBot()
bot.training_config()
bot.train(1)

  0%|          | 0/10000 [00:00<?, ?it/s]

  logger.warn(f"{pre} is not within the observation space.")
  logger.warn(f"{pre} is not within the observation space.")


Score: -76
[(0, 3), (1, 11), (2, 11), (3, 47), (4, 28)]
Score: -46
[(0, 2), (1, 13), (2, 10), (3, 47), (4, 28)]
Score: -66
[(0, 3), (1, 15), (2, 7), (3, 33), (4, 42)]
Score: 158
[(0, 1), (1, 1), (2, 1), (3, 4), (4, 4)]
Score: -60
[(0, 4), (1, 11), (2, 14), (3, 42), (4, 29)]
Score: -62
[(0, 3), (1, 18), (2, 7), (3, 46), (4, 26)]
Score: -66
[(0, 4), (1, 15), (2, 9), (3, 37), (4, 35)]
Score: -67
[(0, 3), (1, 13), (2, 10), (3, 33), (4, 41)]
Score: -74
[(1, 14), (2, 5), (3, 42), (4, 39)]
Score: -58
[(0, 4), (1, 19), (2, 5), (3, 29), (4, 43)]
Score: -68
[(0, 1), (1, 11), (2, 3), (3, 48), (4, 37)]
Score: -46
[(0, 3), (1, 13), (2, 15), (3, 41), (4, 28)]
Score: -86
[(0, 5), (1, 10), (2, 5), (3, 51), (4, 29)]
Score: -56
[(0, 2), (1, 18), (2, 6), (3, 47), (4, 27)]
Score: -64
[(0, 2), (1, 15), (2, 10), (3, 40), (4, 33)]
Score: -50
[(0, 1), (1, 20), (2, 10), (3, 39), (4, 30)]
Score: -8
[(0, 7), (1, 20), (2, 3), (3, 45), (4, 25)]
Score: -68
[(0, 2), (1, 19), (2, 6), (3, 42), (4, 31)]
Score: -72
[(0,

Score: -58
[(0, 7), (1, 25), (2, 11), (3, 42), (4, 15)]
Score: -54
[(0, 6), (1, 32), (2, 7), (3, 39), (4, 16)]
Score: -42
[(0, 3), (1, 32), (2, 8), (3, 33), (4, 24)]
Score: -54
[(0, 8), (1, 26), (2, 9), (3, 34), (4, 23)]
Score: -52
[(0, 5), (1, 35), (2, 8), (3, 35), (4, 17)]
Score: -43
[(0, 4), (1, 27), (2, 11), (3, 30), (4, 28)]
Score: -50
[(0, 5), (1, 23), (2, 5), (3, 41), (4, 26)]
Score: -32
[(0, 8), (1, 34), (2, 7), (3, 27), (4, 24)]
Score: -44
[(0, 2), (1, 31), (2, 4), (3, 32), (4, 31)]
Score: -42
[(0, 7), (1, 33), (2, 11), (3, 31), (4, 18)]
Score: -66
[(0, 4), (1, 19), (2, 12), (3, 42), (4, 23)]
Score: -48
[(0, 8), (1, 27), (2, 5), (3, 36), (4, 24)]
Score: -38
[(0, 1), (1, 26), (2, 7), (3, 43), (4, 23)]
Score: -50
[(0, 6), (1, 26), (2, 11), (3, 41), (4, 16)]
Score: -52
[(0, 2), (1, 25), (2, 9), (3, 38), (4, 26)]
Score: -62
[(0, 5), (1, 29), (2, 13), (3, 23), (4, 30)]
Score: -56
[(0, 3), (1, 22), (2, 4), (3, 51), (4, 20)]
Score: -29
[(0, 3), (1, 36), (2, 10), (3, 30), (4, 21)]
Sco

In [7]:
import optuna

def objective(trial):
    bot = PGBot()
    bot.training_config(**{
        'discount_factor': trial.suggest_float('discount_factor', 0.90, 0.99),
        'learning_rate': trial.suggest_float('learning_rate', 1e-5, 1e-1),
        'num_episodes': 100,
        'max_steps': 10000,
        'network_shape': trial.suggest_int('network_shape', 16, 128),
                    })
    score = bot.train()
    return -1 * np.mean(score)

study = optuna.create_study()
study.optimize(objective, n_trials=1000)

study.best_params

[32m[I 2022-09-19 12:21:36,972][0m A new study created in memory with name: no-name-66e47096-aae4-4c0a-9082-e926e2d906e2[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 12:22:00,781][0m Trial 0 finished with value: 446.48 and parameters: {'discount_factor': 0.9873970997044688, 'learning_rate': 0.08556202411794087, 'network_shape': 125}. Best is trial 0 with value: 446.48.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 12:22:24,002][0m Trial 1 finished with value: 446.11 and parameters: {'discount_factor': 0.9683505979911465, 'learning_rate': 0.08508350136598532, 'network_shape': 114}. Best is trial 1 with value: 446.11.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 12:22:46,899][0m Trial 2 finished with value: 633.87 and parameters: {'discount_factor': 0.9715472135461904, 'learning_rate': 0.07453155766986909, 'network_shape': 83}. Best is trial 1 with value: 446.11.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 12:23:10,485][0m Trial 3 finished with value: 259.48 and parameters: {'discount_factor': 0.9419679548377589, 'learning_rate': 0.05278112073870399, 'network_shape': 33}. Best is trial 3 with value: 259.48.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 12:23:33,305][0m Trial 4 finished with value: 251.68 and parameters: {'discount_factor': 0.9491430056860483, 'learning_rate': 0.05115451977203845, 'network_shape': 33}. Best is trial 4 with value: 251.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 12:23:56,515][0m Trial 5 finished with value: 433.73 and parameters: {'discount_factor': 0.941183808418448, 'learning_rate': 0.06482851105698001, 'network_shape': 82}. Best is trial 4 with value: 251.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 12:24:19,322][0m Trial 6 finished with value: 251.04 and parameters: {'discount_factor': 0.9095903315725034, 'learning_rate': 0.037178235067303535, 'network_shape': 59}. Best is trial 6 with value: 251.04.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 12:24:42,710][0m Trial 7 finished with value: 443.63 and parameters: {'discount_factor': 0.9460186677204487, 'learning_rate': 0.05250708636727375, 'network_shape': 20}. Best is trial 6 with value: 251.04.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 12:25:04,766][0m Trial 8 finished with value: 449.53 and parameters: {'discount_factor': 0.9016091427064585, 'learning_rate': 0.08106647308826846, 'network_shape': 118}. Best is trial 6 with value: 251.04.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 12:25:25,689][0m Trial 9 finished with value: 293.92 and parameters: {'discount_factor': 0.9066745677178728, 'learning_rate': 0.02014866957773309, 'network_shape': 104}. Best is trial 6 with value: 251.04.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

  logger.warn(f"{pre} is not within the observation space.")
  logger.warn(f"{pre} is not within the observation space.")
[32m[I 2022-09-19 12:25:46,223][0m Trial 10 finished with value: 251.62 and parameters: {'discount_factor': 0.9206253043319347, 'learning_rate': 0.0199617402923913, 'network_shape': 60}. Best is trial 6 with value: 251.04.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 12:26:06,589][0m Trial 11 finished with value: 297.48 and parameters: {'discount_factor': 0.9221166219427774, 'learning_rate': 0.019596597525475948, 'network_shape': 57}. Best is trial 6 with value: 251.04.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 12:26:26,944][0m Trial 12 finished with value: 464.48 and parameters: {'discount_factor': 0.9226212043965255, 'learning_rate': 0.0011932578864452328, 'network_shape': 59}. Best is trial 6 with value: 251.04.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 12:26:46,431][0m Trial 13 finished with value: 446.9 and parameters: {'discount_factor': 0.9196457461822, 'learning_rate': 0.03327022536000832, 'network_shape': 58}. Best is trial 6 with value: 251.04.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 12:27:06,726][0m Trial 14 finished with value: 453.89 and parameters: {'discount_factor': 0.9131990535653417, 'learning_rate': 0.031987424764183744, 'network_shape': 69}. Best is trial 6 with value: 251.04.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 12:27:26,971][0m Trial 15 finished with value: 271.62 and parameters: {'discount_factor': 0.9321253122506212, 'learning_rate': 0.00252163780416155, 'network_shape': 42}. Best is trial 6 with value: 251.04.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 12:27:47,329][0m Trial 16 finished with value: 256.55 and parameters: {'discount_factor': 0.9287377736293256, 'learning_rate': 0.036543807465853506, 'network_shape': 88}. Best is trial 6 with value: 251.04.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 12:28:07,558][0m Trial 17 finished with value: 265.16 and parameters: {'discount_factor': 0.9103078791214686, 'learning_rate': 0.016818298652076243, 'network_shape': 46}. Best is trial 6 with value: 251.04.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 12:28:30,109][0m Trial 18 finished with value: 253.62 and parameters: {'discount_factor': 0.9317880665655666, 'learning_rate': 0.09940096987295056, 'network_shape': 98}. Best is trial 6 with value: 251.04.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 12:28:52,275][0m Trial 19 finished with value: 626.16 and parameters: {'discount_factor': 0.9003709021403234, 'learning_rate': 0.04039183241769001, 'network_shape': 72}. Best is trial 6 with value: 251.04.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 12:29:13,659][0m Trial 20 finished with value: 611.43 and parameters: {'discount_factor': 0.9137318916114743, 'learning_rate': 0.014175591934762675, 'network_shape': 66}. Best is trial 6 with value: 251.04.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 12:29:34,254][0m Trial 21 finished with value: 250.91 and parameters: {'discount_factor': 0.9523557160787889, 'learning_rate': 0.04536864066756141, 'network_shape': 16}. Best is trial 21 with value: 250.91.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 12:29:54,522][0m Trial 22 finished with value: 255.87 and parameters: {'discount_factor': 0.956016360736141, 'learning_rate': 0.027176043560765473, 'network_shape': 26}. Best is trial 21 with value: 250.91.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 12:30:15,426][0m Trial 23 finished with value: 609.24 and parameters: {'discount_factor': 0.9619847871389426, 'learning_rate': 0.04314869541182793, 'network_shape': 47}. Best is trial 21 with value: 250.91.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 12:30:35,362][0m Trial 24 finished with value: 287.51 and parameters: {'discount_factor': 0.9348078555874145, 'learning_rate': 0.06160317865335659, 'network_shape': 17}. Best is trial 21 with value: 250.91.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 12:30:55,449][0m Trial 25 finished with value: 625.73 and parameters: {'discount_factor': 0.9542277560867606, 'learning_rate': 0.025573884608561602, 'network_shape': 38}. Best is trial 21 with value: 250.91.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 12:31:15,424][0m Trial 26 finished with value: 393.93 and parameters: {'discount_factor': 0.9815550392355353, 'learning_rate': 0.010010574812904755, 'network_shape': 77}. Best is trial 21 with value: 250.91.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 12:31:35,879][0m Trial 27 finished with value: 253.09 and parameters: {'discount_factor': 0.9189335634497927, 'learning_rate': 0.04503984410649697, 'network_shape': 52}. Best is trial 21 with value: 250.91.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 12:31:57,109][0m Trial 28 finished with value: 446.85 and parameters: {'discount_factor': 0.925770744920289, 'learning_rate': 0.02782442983796031, 'network_shape': 97}. Best is trial 21 with value: 250.91.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 12:32:18,421][0m Trial 29 finished with value: 250.65 and parameters: {'discount_factor': 0.9896979858590563, 'learning_rate': 0.06039978512248087, 'network_shape': 64}. Best is trial 29 with value: 250.65.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 12:32:39,326][0m Trial 30 finished with value: 439.62 and parameters: {'discount_factor': 0.9868107545844677, 'learning_rate': 0.06297725992719815, 'network_shape': 90}. Best is trial 29 with value: 250.65.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 12:33:00,314][0m Trial 31 finished with value: 259.5 and parameters: {'discount_factor': 0.9770503511487627, 'learning_rate': 0.07035655353857151, 'network_shape': 64}. Best is trial 29 with value: 250.65.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 12:33:21,453][0m Trial 32 finished with value: 261.4 and parameters: {'discount_factor': 0.9642959082452016, 'learning_rate': 0.05846767583119203, 'network_shape': 51}. Best is trial 29 with value: 250.65.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 12:33:42,753][0m Trial 33 finished with value: 250.44 and parameters: {'discount_factor': 0.9729764864945177, 'learning_rate': 0.04117221168020109, 'network_shape': 127}. Best is trial 33 with value: 250.44.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 12:34:03,750][0m Trial 34 finished with value: 251.93 and parameters: {'discount_factor': 0.974795953656779, 'learning_rate': 0.04604687972471228, 'network_shape': 126}. Best is trial 33 with value: 250.44.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 12:34:25,219][0m Trial 35 finished with value: 250.48 and parameters: {'discount_factor': 0.9863059339994901, 'learning_rate': 0.05718153091625511, 'network_shape': 114}. Best is trial 33 with value: 250.44.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 12:34:46,752][0m Trial 36 finished with value: 634.24 and parameters: {'discount_factor': 0.9857774233554408, 'learning_rate': 0.057619174603862455, 'network_shape': 119}. Best is trial 33 with value: 250.44.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 12:35:07,034][0m Trial 37 finished with value: 440.71 and parameters: {'discount_factor': 0.9808647108549718, 'learning_rate': 0.07406675394866075, 'network_shape': 128}. Best is trial 33 with value: 250.44.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 12:35:28,065][0m Trial 38 finished with value: 448.2 and parameters: {'discount_factor': 0.9692731975724599, 'learning_rate': 0.049241788625750904, 'network_shape': 113}. Best is trial 33 with value: 250.44.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 12:35:49,327][0m Trial 39 finished with value: 438.83 and parameters: {'discount_factor': 0.988834492032988, 'learning_rate': 0.06692528115754291, 'network_shape': 109}. Best is trial 33 with value: 250.44.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 12:36:09,730][0m Trial 40 finished with value: 410.1 and parameters: {'discount_factor': 0.9732235467659385, 'learning_rate': 0.05737140130305035, 'network_shape': 105}. Best is trial 33 with value: 250.44.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 12:36:30,843][0m Trial 41 finished with value: 439.33 and parameters: {'discount_factor': 0.9811533156320017, 'learning_rate': 0.03910159428187657, 'network_shape': 120}. Best is trial 33 with value: 250.44.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 12:36:51,984][0m Trial 42 finished with value: 251.07 and parameters: {'discount_factor': 0.9898690085253273, 'learning_rate': 0.05252506925615607, 'network_shape': 79}. Best is trial 33 with value: 250.44.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 12:37:13,222][0m Trial 43 finished with value: 419.63 and parameters: {'discount_factor': 0.9382077266190219, 'learning_rate': 0.0489780847730535, 'network_shape': 122}. Best is trial 33 with value: 250.44.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 12:37:33,879][0m Trial 44 finished with value: 447.15 and parameters: {'discount_factor': 0.965816140981042, 'learning_rate': 0.034697482240982465, 'network_shape': 28}. Best is trial 33 with value: 250.44.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 12:37:54,250][0m Trial 45 finished with value: 447.05 and parameters: {'discount_factor': 0.9516588664391419, 'learning_rate': 0.054145528882692376, 'network_shape': 114}. Best is trial 33 with value: 250.44.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 12:38:15,197][0m Trial 46 finished with value: 618.56 and parameters: {'discount_factor': 0.9601973219671591, 'learning_rate': 0.08467219325460416, 'network_shape': 85}. Best is trial 33 with value: 250.44.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 12:38:36,469][0m Trial 47 finished with value: 631.54 and parameters: {'discount_factor': 0.9450343423713481, 'learning_rate': 0.041687485108676176, 'network_shape': 75}. Best is trial 33 with value: 250.44.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 12:38:56,758][0m Trial 48 finished with value: 250.25 and parameters: {'discount_factor': 0.9846153521300025, 'learning_rate': 0.07883238407344675, 'network_shape': 36}. Best is trial 48 with value: 250.25.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 12:39:18,132][0m Trial 49 finished with value: 635.28 and parameters: {'discount_factor': 0.9840402610365391, 'learning_rate': 0.09427429969180999, 'network_shape': 26}. Best is trial 48 with value: 250.25.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 12:39:38,513][0m Trial 50 finished with value: 254.14 and parameters: {'discount_factor': 0.9766662989197502, 'learning_rate': 0.07269311335664694, 'network_shape': 34}. Best is trial 48 with value: 250.25.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 12:39:59,014][0m Trial 51 finished with value: 254.06 and parameters: {'discount_factor': 0.9696101277462477, 'learning_rate': 0.07719100083184863, 'network_shape': 16}. Best is trial 48 with value: 250.25.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 12:40:19,408][0m Trial 52 finished with value: 631.6 and parameters: {'discount_factor': 0.9789062502308988, 'learning_rate': 0.06766478612505478, 'network_shape': 22}. Best is trial 48 with value: 250.25.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 12:40:40,091][0m Trial 53 finished with value: 630.89 and parameters: {'discount_factor': 0.9842292148379519, 'learning_rate': 0.09003242041922993, 'network_shape': 41}. Best is trial 48 with value: 250.25.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 12:41:00,310][0m Trial 54 finished with value: 250.37 and parameters: {'discount_factor': 0.9483242011209138, 'learning_rate': 0.07869114548132751, 'network_shape': 54}. Best is trial 48 with value: 250.25.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 12:41:20,872][0m Trial 55 finished with value: 261.81 and parameters: {'discount_factor': 0.9400844177829022, 'learning_rate': 0.0785820449252718, 'network_shape': 52}. Best is trial 48 with value: 250.25.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 12:41:41,021][0m Trial 56 finished with value: 250.62 and parameters: {'discount_factor': 0.9473986099352328, 'learning_rate': 0.08462258182315043, 'network_shape': 33}. Best is trial 48 with value: 250.25.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 12:42:02,072][0m Trial 57 finished with value: 251.43 and parameters: {'discount_factor': 0.9475199142686161, 'learning_rate': 0.08186680064357825, 'network_shape': 33}. Best is trial 48 with value: 250.25.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 12:42:23,420][0m Trial 58 finished with value: 627.22 and parameters: {'discount_factor': 0.9589326297285774, 'learning_rate': 0.08790840074562442, 'network_shape': 46}. Best is trial 48 with value: 250.25.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 12:42:45,358][0m Trial 59 finished with value: 267.43 and parameters: {'discount_factor': 0.9714092008149251, 'learning_rate': 0.09515783111274555, 'network_shape': 63}. Best is trial 48 with value: 250.25.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 12:43:05,791][0m Trial 60 finished with value: 264.61 and parameters: {'discount_factor': 0.9834874881368572, 'learning_rate': 0.08300744930913903, 'network_shape': 69}. Best is trial 48 with value: 250.25.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 12:43:26,839][0m Trial 61 finished with value: 252.42 and parameters: {'discount_factor': 0.9506603002249027, 'learning_rate': 0.07835655070873652, 'network_shape': 29}. Best is trial 48 with value: 250.25.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 12:43:47,134][0m Trial 62 finished with value: 275.53 and parameters: {'discount_factor': 0.941961119761315, 'learning_rate': 0.06976557236647818, 'network_shape': 22}. Best is trial 48 with value: 250.25.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 12:44:07,726][0m Trial 63 finished with value: 247.07 and parameters: {'discount_factor': 0.9543424451646368, 'learning_rate': 0.08808568968482207, 'network_shape': 39}. Best is trial 63 with value: 247.07.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 12:44:35,096][0m Trial 64 finished with value: 636.35 and parameters: {'discount_factor': 0.9565243196470933, 'learning_rate': 0.08857784601178295, 'network_shape': 40}. Best is trial 63 with value: 247.07.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 12:45:01,466][0m Trial 65 finished with value: 636.39 and parameters: {'discount_factor': 0.9441490877089662, 'learning_rate': 0.09885889383117902, 'network_shape': 37}. Best is trial 63 with value: 247.07.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 12:45:26,352][0m Trial 66 finished with value: 445.46 and parameters: {'discount_factor': 0.9367304939718274, 'learning_rate': 0.08043247212337576, 'network_shape': 55}. Best is trial 63 with value: 247.07.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 12:45:48,877][0m Trial 67 finished with value: 259.33 and parameters: {'discount_factor': 0.9862240453198848, 'learning_rate': 0.0913317431249915, 'network_shape': 43}. Best is trial 63 with value: 247.07.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 12:46:12,868][0m Trial 68 finished with value: 250.51 and parameters: {'discount_factor': 0.9662846352149659, 'learning_rate': 0.06032670359415025, 'network_shape': 49}. Best is trial 63 with value: 247.07.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 12:46:40,300][0m Trial 69 finished with value: 250.65 and parameters: {'discount_factor': 0.9538522102185588, 'learning_rate': 0.08582604190636056, 'network_shape': 36}. Best is trial 63 with value: 247.07.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 12:47:03,887][0m Trial 70 finished with value: 255.36 and parameters: {'discount_factor': 0.9473905466491724, 'learning_rate': 0.07595846083697513, 'network_shape': 48}. Best is trial 63 with value: 247.07.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 12:47:27,118][0m Trial 71 finished with value: 446.37 and parameters: {'discount_factor': 0.9653348205894724, 'learning_rate': 0.06344760766757507, 'network_shape': 55}. Best is trial 63 with value: 247.07.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 12:47:51,008][0m Trial 72 finished with value: 632.48 and parameters: {'discount_factor': 0.956144444718819, 'learning_rate': 0.08523655762390556, 'network_shape': 36}. Best is trial 63 with value: 247.07.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 12:48:14,893][0m Trial 73 finished with value: 447.76 and parameters: {'discount_factor': 0.9536164183115559, 'learning_rate': 0.09373299897033333, 'network_shape': 35}. Best is trial 63 with value: 247.07.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 12:48:36,638][0m Trial 74 finished with value: 252.9 and parameters: {'discount_factor': 0.9786884617765641, 'learning_rate': 0.06082255581174964, 'network_shape': 45}. Best is trial 63 with value: 247.07.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 12:48:58,874][0m Trial 75 finished with value: 254.8 and parameters: {'discount_factor': 0.9495104091512413, 'learning_rate': 0.07303515697186638, 'network_shape': 32}. Best is trial 63 with value: 247.07.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 12:49:23,006][0m Trial 76 finished with value: 629.01 and parameters: {'discount_factor': 0.9625715783386468, 'learning_rate': 0.08703830329955371, 'network_shape': 31}. Best is trial 63 with value: 247.07.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 12:49:46,148][0m Trial 77 finished with value: 446.34 and parameters: {'discount_factor': 0.9673639137065293, 'learning_rate': 0.08043401765626924, 'network_shape': 49}. Best is trial 63 with value: 247.07.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 12:50:11,160][0m Trial 78 finished with value: 246.68 and parameters: {'discount_factor': 0.9589431748343439, 'learning_rate': 0.09167455769715335, 'network_shape': 39}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 12:50:35,564][0m Trial 79 finished with value: 251.93 and parameters: {'discount_factor': 0.9588076399625809, 'learning_rate': 0.05553423752723674, 'network_shape': 40}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 12:51:00,329][0m Trial 80 finished with value: 631.48 and parameters: {'discount_factor': 0.9616004791976597, 'learning_rate': 0.09655198192143027, 'network_shape': 44}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 12:51:23,857][0m Trial 81 finished with value: 447.17 and parameters: {'discount_factor': 0.9895771229657363, 'learning_rate': 0.06572674507166511, 'network_shape': 61}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 12:51:47,941][0m Trial 82 finished with value: 441.35 and parameters: {'discount_factor': 0.9537385294666055, 'learning_rate': 0.09260532592736309, 'network_shape': 39}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 12:52:11,931][0m Trial 83 finished with value: 447.32 and parameters: {'discount_factor': 0.9880391626466102, 'learning_rate': 0.060595149861471125, 'network_shape': 124}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 12:52:37,184][0m Trial 84 finished with value: 633.02 and parameters: {'discount_factor': 0.9748174220369974, 'learning_rate': 0.04860428352643601, 'network_shape': 57}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 12:53:05,257][0m Trial 85 finished with value: 404.23 and parameters: {'discount_factor': 0.9716442957796327, 'learning_rate': 0.05157047937736617, 'network_shape': 55}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 12:53:28,815][0m Trial 86 finished with value: 447.04 and parameters: {'discount_factor': 0.9820058565184555, 'learning_rate': 0.07070720792363046, 'network_shape': 68}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 12:53:55,718][0m Trial 87 finished with value: 254.11 and parameters: {'discount_factor': 0.9670182714296812, 'learning_rate': 0.08989707558329038, 'network_shape': 96}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 12:54:30,527][0m Trial 88 finished with value: 438.4 and parameters: {'discount_factor': 0.9637234539736096, 'learning_rate': 0.05528721498733179, 'network_shape': 50}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 12:55:00,581][0m Trial 89 finished with value: 251.66 and parameters: {'discount_factor': 0.9568858926612038, 'learning_rate': 0.08537436079862878, 'network_shape': 25}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 12:55:27,606][0m Trial 90 finished with value: 251.43 and parameters: {'discount_factor': 0.9436714947179752, 'learning_rate': 0.03876814667170819, 'network_shape': 117}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 12:55:52,694][0m Trial 91 finished with value: 252.64 and parameters: {'discount_factor': 0.9470543034213563, 'learning_rate': 0.08206082338989693, 'network_shape': 37}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 12:56:17,955][0m Trial 92 finished with value: 252.68 and parameters: {'discount_factor': 0.9495100356494734, 'learning_rate': 0.08343909762801927, 'network_shape': 43}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 12:56:42,713][0m Trial 93 finished with value: 462.2 and parameters: {'discount_factor': 0.9518406897578269, 'learning_rate': 0.0964336953261553, 'network_shape': 30}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 12:57:10,646][0m Trial 94 finished with value: 443.18 and parameters: {'discount_factor': 0.9603749819339472, 'learning_rate': 0.08687931395583087, 'network_shape': 128}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 12:57:40,282][0m Trial 95 finished with value: 633.74 and parameters: {'discount_factor': 0.985565421146082, 'learning_rate': 0.07613188121152137, 'network_shape': 107}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 12:58:03,957][0m Trial 96 finished with value: 439.67 and parameters: {'discount_factor': 0.9788512841915872, 'learning_rate': 0.09054919056767503, 'network_shape': 72}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 12:58:27,193][0m Trial 97 finished with value: 259.63 and parameters: {'discount_factor': 0.9583592964682358, 'learning_rate': 0.043606325266512966, 'network_shape': 65}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 12:58:51,264][0m Trial 98 finished with value: 438.59 and parameters: {'discount_factor': 0.9549454566021586, 'learning_rate': 0.058819245954816635, 'network_shape': 53}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 12:59:13,949][0m Trial 99 finished with value: 255.55 and parameters: {'discount_factor': 0.9763540177654092, 'learning_rate': 0.0687226285351696, 'network_shape': 60}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 12:59:36,965][0m Trial 100 finished with value: 250.59 and parameters: {'discount_factor': 0.9828883573634564, 'learning_rate': 0.07470865601289992, 'network_shape': 28}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:00:00,709][0m Trial 101 finished with value: 251.94 and parameters: {'discount_factor': 0.9829392444615479, 'learning_rate': 0.07941024662865608, 'network_shape': 28}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:00:24,766][0m Trial 102 finished with value: 257.75 and parameters: {'discount_factor': 0.9874771497431385, 'learning_rate': 0.07418064022597778, 'network_shape': 33}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:00:49,542][0m Trial 103 finished with value: 580.38 and parameters: {'discount_factor': 0.9859980708592603, 'learning_rate': 0.07167814537070123, 'network_shape': 23}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:01:14,630][0m Trial 104 finished with value: 631.21 and parameters: {'discount_factor': 0.9815360455071326, 'learning_rate': 0.06443130054441432, 'network_shape': 35}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:01:39,364][0m Trial 105 finished with value: 255.3 and parameters: {'discount_factor': 0.9800200581560322, 'learning_rate': 0.08414121265455012, 'network_shape': 42}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:02:03,294][0m Trial 106 finished with value: 273.61 and parameters: {'discount_factor': 0.9704643491245217, 'learning_rate': 0.04723096007507971, 'network_shape': 101}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:02:28,330][0m Trial 107 finished with value: 444.22 and parameters: {'discount_factor': 0.9457566257258494, 'learning_rate': 0.08858345162980436, 'network_shape': 47}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:02:52,501][0m Trial 108 finished with value: 446.06 and parameters: {'discount_factor': 0.9734806538534942, 'learning_rate': 0.07637986676654951, 'network_shape': 39}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:03:17,267][0m Trial 109 finished with value: 442.49 and parameters: {'discount_factor': 0.9848563676554558, 'learning_rate': 0.09254178698079359, 'network_shape': 20}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:03:41,838][0m Trial 110 finished with value: 423.33 and parameters: {'discount_factor': 0.9526280615972337, 'learning_rate': 0.08659598508207937, 'network_shape': 26}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:04:06,577][0m Trial 111 finished with value: 263.39 and parameters: {'discount_factor': 0.94856049305598, 'learning_rate': 0.0368973255220917, 'network_shape': 19}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:04:32,567][0m Trial 112 finished with value: 456.37 and parameters: {'discount_factor': 0.9555138226214214, 'learning_rate': 0.03200259686949371, 'network_shape': 30}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:04:56,674][0m Trial 113 finished with value: 252.45 and parameters: {'discount_factor': 0.9509447712832174, 'learning_rate': 0.04329542889658473, 'network_shape': 111}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:05:21,021][0m Trial 114 finished with value: 436.64 and parameters: {'discount_factor': 0.9397348632701227, 'learning_rate': 0.05368623534709678, 'network_shape': 37}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:05:44,695][0m Trial 115 finished with value: 264.74 and parameters: {'discount_factor': 0.9578936776360764, 'learning_rate': 0.07810837734149743, 'network_shape': 18}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:06:10,631][0m Trial 116 finished with value: 252.69 and parameters: {'discount_factor': 0.9884037258248514, 'learning_rate': 0.08184324860577323, 'network_shape': 33}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:06:35,558][0m Trial 117 finished with value: 259.3 and parameters: {'discount_factor': 0.9603328337348492, 'learning_rate': 0.04610001767413475, 'network_shape': 24}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:07:01,585][0m Trial 118 finished with value: 251.44 and parameters: {'discount_factor': 0.9633868594889403, 'learning_rate': 0.050477903549836514, 'network_shape': 45}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:07:24,728][0m Trial 119 finished with value: 265.78 and parameters: {'discount_factor': 0.9897653676092831, 'learning_rate': 0.03440515933366964, 'network_shape': 16}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:07:44,647][0m Trial 120 finished with value: 440.9 and parameters: {'discount_factor': 0.9525417182179647, 'learning_rate': 0.05966592498373443, 'network_shape': 28}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:08:05,274][0m Trial 121 finished with value: 440.13 and parameters: {'discount_factor': 0.9094812406694295, 'learning_rate': 0.029852746945645013, 'network_shape': 58}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:08:30,427][0m Trial 122 finished with value: 439.89 and parameters: {'discount_factor': 0.9051839845326762, 'learning_rate': 0.0410264129823168, 'network_shape': 51}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:08:53,030][0m Trial 123 finished with value: 442.13 and parameters: {'discount_factor': 0.9264443021930263, 'learning_rate': 0.03701828530662697, 'network_shape': 62}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:09:14,032][0m Trial 124 finished with value: 250.09 and parameters: {'discount_factor': 0.949945293117253, 'learning_rate': 0.05669092548547341, 'network_shape': 68}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:09:35,373][0m Trial 125 finished with value: 439.92 and parameters: {'discount_factor': 0.9503826172379245, 'learning_rate': 0.05527117466117902, 'network_shape': 82}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:09:59,569][0m Trial 126 finished with value: 632.2 and parameters: {'discount_factor': 0.9434095505107997, 'learning_rate': 0.061518714181614295, 'network_shape': 73}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:10:22,461][0m Trial 127 finished with value: 443.76 and parameters: {'discount_factor': 0.9484234505916804, 'learning_rate': 0.056840817252551364, 'network_shape': 68}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:10:45,410][0m Trial 128 finished with value: 445.0 and parameters: {'discount_factor': 0.9540122106126261, 'learning_rate': 0.06654096003626545, 'network_shape': 78}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:11:05,503][0m Trial 129 finished with value: 255.68 and parameters: {'discount_factor': 0.9836081056217229, 'learning_rate': 0.05772185301643748, 'network_shape': 41}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:11:25,162][0m Trial 130 finished with value: 442.08 and parameters: {'discount_factor': 0.9450771159862961, 'learning_rate': 0.08425690844249475, 'network_shape': 37}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:11:44,880][0m Trial 131 finished with value: 442.21 and parameters: {'discount_factor': 0.9322242035121145, 'learning_rate': 0.05212161288532208, 'network_shape': 65}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:12:05,874][0m Trial 132 finished with value: 448.62 and parameters: {'discount_factor': 0.9465399075643443, 'learning_rate': 0.023986749352981843, 'network_shape': 55}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:12:26,193][0m Trial 133 finished with value: 446.48 and parameters: {'discount_factor': 0.9523395045078993, 'learning_rate': 0.040296365254221735, 'network_shape': 61}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:12:45,671][0m Trial 134 finished with value: 257.63 and parameters: {'discount_factor': 0.9570293672261297, 'learning_rate': 0.007036103355510755, 'network_shape': 58}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:13:05,117][0m Trial 135 finished with value: 255.21 and parameters: {'discount_factor': 0.9420582089901985, 'learning_rate': 0.06338113344614447, 'network_shape': 125}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:13:25,282][0m Trial 136 finished with value: 447.96 and parameters: {'discount_factor': 0.987087780056574, 'learning_rate': 0.047674240402516804, 'network_shape': 69}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:13:45,923][0m Trial 137 finished with value: 630.61 and parameters: {'discount_factor': 0.9492906604879415, 'learning_rate': 0.08919115168810482, 'network_shape': 31}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:14:06,468][0m Trial 138 finished with value: 568.42 and parameters: {'discount_factor': 0.968322151105022, 'learning_rate': 0.08110132887441987, 'network_shape': 53}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:14:26,720][0m Trial 139 finished with value: 625.01 and parameters: {'discount_factor': 0.9150063140100915, 'learning_rate': 0.044863839681739584, 'network_shape': 63}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:14:47,204][0m Trial 140 finished with value: 440.38 and parameters: {'discount_factor': 0.9647191523295238, 'learning_rate': 0.09833019533486043, 'network_shape': 91}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:15:07,976][0m Trial 141 finished with value: 257.47 and parameters: {'discount_factor': 0.9893386954236743, 'learning_rate': 0.049991777094606456, 'network_shape': 122}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:15:28,544][0m Trial 142 finished with value: 334.28 and parameters: {'discount_factor': 0.9845250115807033, 'learning_rate': 0.051758972616693814, 'network_shape': 35}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:15:49,227][0m Trial 143 finished with value: 427.4 and parameters: {'discount_factor': 0.9870786444901981, 'learning_rate': 0.0538965339931837, 'network_shape': 116}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:16:10,377][0m Trial 144 finished with value: 634.35 and parameters: {'discount_factor': 0.9803084206594264, 'learning_rate': 0.09165905310338748, 'network_shape': 47}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:16:30,939][0m Trial 145 finished with value: 260.67 and parameters: {'discount_factor': 0.9547496800643663, 'learning_rate': 0.03910639735690814, 'network_shape': 67}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:16:52,821][0m Trial 146 finished with value: 633.51 and parameters: {'discount_factor': 0.9770833426251824, 'learning_rate': 0.0864918570609168, 'network_shape': 39}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:17:13,552][0m Trial 147 finished with value: 630.56 and parameters: {'discount_factor': 0.9855044531262114, 'learning_rate': 0.08302219618131557, 'network_shape': 43}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:17:33,820][0m Trial 148 finished with value: 446.68 and parameters: {'discount_factor': 0.9822325158331111, 'learning_rate': 0.042802803903755224, 'network_shape': 49}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:17:55,290][0m Trial 149 finished with value: 250.59 and parameters: {'discount_factor': 0.9507563651304071, 'learning_rate': 0.05721709295981547, 'network_shape': 32}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:18:16,203][0m Trial 150 finished with value: 251.22 and parameters: {'discount_factor': 0.9511705132883966, 'learning_rate': 0.05633019797167444, 'network_shape': 27}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:18:37,330][0m Trial 151 finished with value: 265.88 and parameters: {'discount_factor': 0.948917717480546, 'learning_rate': 0.058909806905600944, 'network_shape': 34}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:18:58,132][0m Trial 152 finished with value: 254.75 and parameters: {'discount_factor': 0.9535092433223004, 'learning_rate': 0.05320517890771784, 'network_shape': 33}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:19:18,963][0m Trial 153 finished with value: 440.61 and parameters: {'discount_factor': 0.9899395865395636, 'learning_rate': 0.07930499550530616, 'network_shape': 79}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:19:39,304][0m Trial 154 finished with value: 397.06 and parameters: {'discount_factor': 0.9468596061889831, 'learning_rate': 0.05987616227886377, 'network_shape': 21}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:20:01,842][0m Trial 155 finished with value: 252.15 and parameters: {'discount_factor': 0.9615935823820203, 'learning_rate': 0.07490398518597369, 'network_shape': 36}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:20:24,305][0m Trial 156 finished with value: 446.18 and parameters: {'discount_factor': 0.955843970002032, 'learning_rate': 0.06239790146018808, 'network_shape': 75}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:20:47,423][0m Trial 157 finished with value: 446.08 and parameters: {'discount_factor': 0.9876415885183403, 'learning_rate': 0.056116828251195246, 'network_shape': 29}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:21:11,221][0m Trial 158 finished with value: 437.42 and parameters: {'discount_factor': 0.9590863193216229, 'learning_rate': 0.0946583838226401, 'network_shape': 31}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:21:33,894][0m Trial 159 finished with value: 447.95 and parameters: {'discount_factor': 0.9524648301767917, 'learning_rate': 0.06537782505651217, 'network_shape': 71}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:21:56,601][0m Trial 160 finished with value: 250.19 and parameters: {'discount_factor': 0.9501335826441033, 'learning_rate': 0.04465487786524209, 'network_shape': 41}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:22:18,375][0m Trial 161 finished with value: 433.14 and parameters: {'discount_factor': 0.9507789277908202, 'learning_rate': 0.04461968582231689, 'network_shape': 38}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:22:39,719][0m Trial 162 finished with value: 257.45 and parameters: {'discount_factor': 0.948000485597756, 'learning_rate': 0.046896530708734244, 'network_shape': 41}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:23:01,988][0m Trial 163 finished with value: 252.26 and parameters: {'discount_factor': 0.9505212187264523, 'learning_rate': 0.04900125501232777, 'network_shape': 45}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:23:24,844][0m Trial 164 finished with value: 447.26 and parameters: {'discount_factor': 0.945033116196854, 'learning_rate': 0.08744107083045947, 'network_shape': 39}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:23:45,896][0m Trial 165 finished with value: 436.96 and parameters: {'discount_factor': 0.9546456856721082, 'learning_rate': 0.041163370220334006, 'network_shape': 60}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:24:07,297][0m Trial 166 finished with value: 253.14 and parameters: {'discount_factor': 0.9722020215429751, 'learning_rate': 0.035727110883203225, 'network_shape': 35}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:24:28,408][0m Trial 167 finished with value: 441.89 and parameters: {'discount_factor': 0.9492971874056431, 'learning_rate': 0.057533466354709545, 'network_shape': 56}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:24:50,691][0m Trial 168 finished with value: 257.89 and parameters: {'discount_factor': 0.9833676034678552, 'learning_rate': 0.051072943993994105, 'network_shape': 42}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:25:13,956][0m Trial 169 finished with value: 630.64 and parameters: {'discount_factor': 0.9431575801282096, 'learning_rate': 0.05403246950985529, 'network_shape': 86}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:25:36,929][0m Trial 170 finished with value: 445.69 and parameters: {'discount_factor': 0.9464266757762612, 'learning_rate': 0.03825641998657128, 'network_shape': 53}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:26:00,385][0m Trial 171 finished with value: 610.59 and parameters: {'discount_factor': 0.9508980176162303, 'learning_rate': 0.06103929144534154, 'network_shape': 27}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:26:22,943][0m Trial 172 finished with value: 444.63 and parameters: {'discount_factor': 0.9510736568281681, 'learning_rate': 0.057272376132290706, 'network_shape': 26}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:26:45,160][0m Trial 173 finished with value: 252.59 and parameters: {'discount_factor': 0.9550101452600387, 'learning_rate': 0.05607169434768522, 'network_shape': 31}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:27:06,082][0m Trial 174 finished with value: 252.13 and parameters: {'discount_factor': 0.9528030492802919, 'learning_rate': 0.08521779593465388, 'network_shape': 24}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:27:26,961][0m Trial 175 finished with value: 250.26 and parameters: {'discount_factor': 0.9667199084338886, 'learning_rate': 0.09019656774718747, 'network_shape': 93}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:27:48,211][0m Trial 176 finished with value: 611.17 and parameters: {'discount_factor': 0.9688882937672963, 'learning_rate': 0.09017098090308022, 'network_shape': 122}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:28:08,844][0m Trial 177 finished with value: 250.53 and parameters: {'discount_factor': 0.9860305571321651, 'learning_rate': 0.09269677175966869, 'network_shape': 92}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:28:32,777][0m Trial 178 finished with value: 250.54 and parameters: {'discount_factor': 0.9664447270130037, 'learning_rate': 0.09151666161220073, 'network_shape': 95}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:28:57,413][0m Trial 179 finished with value: 439.65 and parameters: {'discount_factor': 0.966852995320928, 'learning_rate': 0.09309574677737263, 'network_shape': 88}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:29:20,797][0m Trial 180 finished with value: 629.97 and parameters: {'discount_factor': 0.9650442043700674, 'learning_rate': 0.09683909195551486, 'network_shape': 94}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:29:44,125][0m Trial 181 finished with value: 252.43 and parameters: {'discount_factor': 0.974582415113691, 'learning_rate': 0.09095538084377351, 'network_shape': 100}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:30:07,334][0m Trial 182 finished with value: 446.47 and parameters: {'discount_factor': 0.9625949431855041, 'learning_rate': 0.08893245796905247, 'network_shape': 97}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:30:29,152][0m Trial 183 finished with value: 250.59 and parameters: {'discount_factor': 0.9572341195409949, 'learning_rate': 0.08842994726862721, 'network_shape': 103}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:30:49,084][0m Trial 184 finished with value: 250.63 and parameters: {'discount_factor': 0.9575250686219637, 'learning_rate': 0.09353050583786028, 'network_shape': 101}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:31:10,402][0m Trial 185 finished with value: 438.39 and parameters: {'discount_factor': 0.9579572836367817, 'learning_rate': 0.09425882918809467, 'network_shape': 94}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:31:31,922][0m Trial 186 finished with value: 633.59 and parameters: {'discount_factor': 0.9567910544583587, 'learning_rate': 0.0930880345134443, 'network_shape': 101}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:31:52,997][0m Trial 187 finished with value: 250.23 and parameters: {'discount_factor': 0.9662885971202669, 'learning_rate': 0.0955927174822197, 'network_shape': 105}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:32:14,561][0m Trial 188 finished with value: 632.42 and parameters: {'discount_factor': 0.9608818643179594, 'learning_rate': 0.09577336865853678, 'network_shape': 106}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:32:35,390][0m Trial 189 finished with value: 447.04 and parameters: {'discount_factor': 0.9707458005989033, 'learning_rate': 0.09170135956946848, 'network_shape': 110}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:32:55,980][0m Trial 190 finished with value: 632.95 and parameters: {'discount_factor': 0.9668758074593686, 'learning_rate': 0.08798568461092147, 'network_shape': 103}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:33:18,462][0m Trial 191 finished with value: 634.9 and parameters: {'discount_factor': 0.9634455199157627, 'learning_rate': 0.09795651213393528, 'network_shape': 99}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:33:40,181][0m Trial 192 finished with value: 436.94 and parameters: {'discount_factor': 0.968535136597749, 'learning_rate': 0.09136524272245566, 'network_shape': 91}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:34:01,276][0m Trial 193 finished with value: 259.67 and parameters: {'discount_factor': 0.9655387362265154, 'learning_rate': 0.08874900176991289, 'network_shape': 108}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:34:22,110][0m Trial 194 finished with value: 254.88 and parameters: {'discount_factor': 0.9858808402390014, 'learning_rate': 0.09543543034758045, 'network_shape': 113}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:34:43,038][0m Trial 195 finished with value: 250.24 and parameters: {'discount_factor': 0.9605018742437834, 'learning_rate': 0.08576741024936228, 'network_shape': 103}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:35:04,186][0m Trial 196 finished with value: 621.4 and parameters: {'discount_factor': 0.9593906494717535, 'learning_rate': 0.08576404473809635, 'network_shape': 95}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:35:24,951][0m Trial 197 finished with value: 447.58 and parameters: {'discount_factor': 0.9622994754351534, 'learning_rate': 0.08244355114391066, 'network_shape': 105}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:35:46,205][0m Trial 198 finished with value: 439.9 and parameters: {'discount_factor': 0.9599044824395192, 'learning_rate': 0.08995423021763412, 'network_shape': 103}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:36:06,782][0m Trial 199 finished with value: 250.35 and parameters: {'discount_factor': 0.9642461924338349, 'learning_rate': 0.09955105238308443, 'network_shape': 102}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:36:28,270][0m Trial 200 finished with value: 442.32 and parameters: {'discount_factor': 0.9644857041528772, 'learning_rate': 0.09891177988733411, 'network_shape': 97}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:36:50,485][0m Trial 201 finished with value: 253.37 and parameters: {'discount_factor': 0.9620833352260872, 'learning_rate': 0.09328370767432127, 'network_shape': 102}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:37:11,380][0m Trial 202 finished with value: 250.18 and parameters: {'discount_factor': 0.9664942877816693, 'learning_rate': 0.09756520083478172, 'network_shape': 104}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:37:34,438][0m Trial 203 finished with value: 253.85 and parameters: {'discount_factor': 0.9669994156055942, 'learning_rate': 0.09584480890520464, 'network_shape': 99}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:37:58,485][0m Trial 204 finished with value: 251.31 and parameters: {'discount_factor': 0.9654867198506454, 'learning_rate': 0.09786600516617867, 'network_shape': 105}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:38:21,252][0m Trial 205 finished with value: 634.12 and parameters: {'discount_factor': 0.9687777451659458, 'learning_rate': 0.0990999885313461, 'network_shape': 108}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:38:42,515][0m Trial 206 finished with value: 439.01 and parameters: {'discount_factor': 0.9706581678381765, 'learning_rate': 0.09437675121620467, 'network_shape': 93}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:39:04,658][0m Trial 207 finished with value: 444.57 and parameters: {'discount_factor': 0.9577063611746953, 'learning_rate': 0.09605892137606746, 'network_shape': 102}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:39:27,308][0m Trial 208 finished with value: 629.04 and parameters: {'discount_factor': 0.9645489772640737, 'learning_rate': 0.09233521900844203, 'network_shape': 111}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:39:50,475][0m Trial 209 finished with value: 445.24 and parameters: {'discount_factor': 0.9660509241958787, 'learning_rate': 0.08735979985187342, 'network_shape': 104}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:40:12,543][0m Trial 210 finished with value: 446.5 and parameters: {'discount_factor': 0.9609209113574387, 'learning_rate': 0.09960825605555035, 'network_shape': 98}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:40:33,336][0m Trial 211 finished with value: 253.82 and parameters: {'discount_factor': 0.9630895264094452, 'learning_rate': 0.09021379187449591, 'network_shape': 106}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:40:53,097][0m Trial 212 finished with value: 250.65 and parameters: {'discount_factor': 0.9673289783514191, 'learning_rate': 0.0770118369321699, 'network_shape': 100}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:41:12,797][0m Trial 213 finished with value: 252.33 and parameters: {'discount_factor': 0.9686656436557834, 'learning_rate': 0.07550106618958195, 'network_shape': 100}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:41:33,298][0m Trial 214 finished with value: 631.18 and parameters: {'discount_factor': 0.9871602008926461, 'learning_rate': 0.09414892617085643, 'network_shape': 115}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:41:52,569][0m Trial 215 finished with value: 447.68 and parameters: {'discount_factor': 0.9585353589835959, 'learning_rate': 0.08402525252833064, 'network_shape': 119}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:42:14,184][0m Trial 216 finished with value: 252.43 and parameters: {'discount_factor': 0.9539059602543544, 'learning_rate': 0.08609338903360417, 'network_shape': 89}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:42:37,638][0m Trial 217 finished with value: 627.54 and parameters: {'discount_factor': 0.9847951465669703, 'learning_rate': 0.08011167763450258, 'network_shape': 103}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:42:58,372][0m Trial 218 finished with value: 446.45 and parameters: {'discount_factor': 0.9562358850103155, 'learning_rate': 0.09655343070648284, 'network_shape': 96}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:43:19,931][0m Trial 219 finished with value: 634.02 and parameters: {'discount_factor': 0.9636523565150755, 'learning_rate': 0.09299742801117476, 'network_shape': 108}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:43:40,825][0m Trial 220 finished with value: 633.37 and parameters: {'discount_factor': 0.9725456659957648, 'learning_rate': 0.08851655201766655, 'network_shape': 92}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:44:01,053][0m Trial 221 finished with value: 265.42 and parameters: {'discount_factor': 0.9480447174242554, 'learning_rate': 0.09103417449064857, 'network_shape': 38}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:44:21,471][0m Trial 222 finished with value: 637.01 and parameters: {'discount_factor': 0.9694413908255543, 'learning_rate': 0.07841821149871116, 'network_shape': 101}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:44:43,080][0m Trial 223 finished with value: 440.03 and parameters: {'discount_factor': 0.9677600399438506, 'learning_rate': 0.08420865840159447, 'network_shape': 98}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:45:05,088][0m Trial 224 finished with value: 446.57 and parameters: {'discount_factor': 0.9666241427597541, 'learning_rate': 0.07671447308294192, 'network_shape': 103}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:45:25,233][0m Trial 225 finished with value: 250.56 and parameters: {'discount_factor': 0.9606509453535279, 'learning_rate': 0.07330095114378923, 'network_shape': 100}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:45:45,175][0m Trial 226 finished with value: 447.11 and parameters: {'discount_factor': 0.9606935113438497, 'learning_rate': 0.07262526702787904, 'network_shape': 105}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:46:05,799][0m Trial 227 finished with value: 634.17 and parameters: {'discount_factor': 0.9595573162023419, 'learning_rate': 0.0782765644042731, 'network_shape': 100}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:46:25,668][0m Trial 228 finished with value: 250.98 and parameters: {'discount_factor': 0.9614568702186875, 'learning_rate': 0.0975410537979236, 'network_shape': 96}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:46:46,269][0m Trial 229 finished with value: 250.5 and parameters: {'discount_factor': 0.956711129139177, 'learning_rate': 0.06846570551342812, 'network_shape': 40}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:47:08,225][0m Trial 230 finished with value: 438.86 and parameters: {'discount_factor': 0.9568998396406868, 'learning_rate': 0.07003022394300627, 'network_shape': 40}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:47:29,581][0m Trial 231 finished with value: 441.75 and parameters: {'discount_factor': 0.9652522953017258, 'learning_rate': 0.07147623900763585, 'network_shape': 101}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:47:49,452][0m Trial 232 finished with value: 449.42 and parameters: {'discount_factor': 0.9585085190381919, 'learning_rate': 0.08183394698196149, 'network_shape': 36}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:48:10,323][0m Trial 233 finished with value: 250.58 and parameters: {'discount_factor': 0.9552094783344653, 'learning_rate': 0.07729425625368502, 'network_shape': 104}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:48:31,655][0m Trial 234 finished with value: 633.2 and parameters: {'discount_factor': 0.9560726022339897, 'learning_rate': 0.07597920264843085, 'network_shape': 106}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:48:53,201][0m Trial 235 finished with value: 631.77 and parameters: {'discount_factor': 0.9538102570612305, 'learning_rate': 0.07379904269523874, 'network_shape': 104}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:49:14,000][0m Trial 236 finished with value: 257.46 and parameters: {'discount_factor': 0.9569736316809466, 'learning_rate': 0.07992945943782954, 'network_shape': 34}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:49:35,619][0m Trial 237 finished with value: 633.65 and parameters: {'discount_factor': 0.9550479304187646, 'learning_rate': 0.09233197025579591, 'network_shape': 110}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:49:56,771][0m Trial 238 finished with value: 637.28 and parameters: {'discount_factor': 0.9596764851688199, 'learning_rate': 0.08943921903624119, 'network_shape': 32}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:50:18,136][0m Trial 239 finished with value: 631.64 and parameters: {'discount_factor': 0.9520582239629138, 'learning_rate': 0.06897273300720912, 'network_shape': 107}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:50:38,403][0m Trial 240 finished with value: 448.19 and parameters: {'discount_factor': 0.9623633189270857, 'learning_rate': 0.07297540962054261, 'network_shape': 43}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:50:59,850][0m Trial 241 finished with value: 571.51 and parameters: {'discount_factor': 0.9535760596063996, 'learning_rate': 0.08663306878167477, 'network_shape': 38}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:51:21,139][0m Trial 242 finished with value: 250.29 and parameters: {'discount_factor': 0.9554278042454541, 'learning_rate': 0.09996978781175535, 'network_shape': 36}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:51:44,036][0m Trial 243 finished with value: 448.61 and parameters: {'discount_factor': 0.9575774756960089, 'learning_rate': 0.09963272168883694, 'network_shape': 41}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:52:04,649][0m Trial 244 finished with value: 255.0 and parameters: {'discount_factor': 0.9554977067282052, 'learning_rate': 0.0947304121105984, 'network_shape': 34}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:52:24,940][0m Trial 245 finished with value: 250.2 and parameters: {'discount_factor': 0.94995519849834, 'learning_rate': 0.09777643764655122, 'network_shape': 86}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:52:45,507][0m Trial 246 finished with value: 252.18 and parameters: {'discount_factor': 0.95015816127857, 'learning_rate': 0.09805614224802921, 'network_shape': 83}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:53:06,285][0m Trial 247 finished with value: 440.27 and parameters: {'discount_factor': 0.9512315630709185, 'learning_rate': 0.09940064402282407, 'network_shape': 93}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:53:26,269][0m Trial 248 finished with value: 447.79 and parameters: {'discount_factor': 0.9472889886127192, 'learning_rate': 0.09570031362363568, 'network_shape': 89}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:53:46,836][0m Trial 249 finished with value: 409.68 and parameters: {'discount_factor': 0.948981660987131, 'learning_rate': 0.07384896472841339, 'network_shape': 37}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:54:07,913][0m Trial 250 finished with value: 250.53 and parameters: {'discount_factor': 0.949588470679619, 'learning_rate': 0.09999663150297795, 'network_shape': 84}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:54:28,438][0m Trial 251 finished with value: 444.9 and parameters: {'discount_factor': 0.9527583939437778, 'learning_rate': 0.09686539620897365, 'network_shape': 87}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:54:49,296][0m Trial 252 finished with value: 636.72 and parameters: {'discount_factor': 0.981745713298922, 'learning_rate': 0.09759356007280659, 'network_shape': 92}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:55:10,344][0m Trial 253 finished with value: 261.51 and parameters: {'discount_factor': 0.9498866219052275, 'learning_rate': 0.09869148312859455, 'network_shape': 98}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:55:31,925][0m Trial 254 finished with value: 251.28 and parameters: {'discount_factor': 0.964638094517191, 'learning_rate': 0.09960282376851456, 'network_shape': 85}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:55:53,426][0m Trial 255 finished with value: 250.5 and parameters: {'discount_factor': 0.9521327399948024, 'learning_rate': 0.09996016072496068, 'network_shape': 95}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:56:14,340][0m Trial 256 finished with value: 629.7 and parameters: {'discount_factor': 0.952344675261369, 'learning_rate': 0.09704713033781097, 'network_shape': 94}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:56:35,352][0m Trial 257 finished with value: 442.03 and parameters: {'discount_factor': 0.9492816546833514, 'learning_rate': 0.09991123366684797, 'network_shape': 90}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:56:55,628][0m Trial 258 finished with value: 250.95 and parameters: {'discount_factor': 0.9550866440139189, 'learning_rate': 0.09534647153379636, 'network_shape': 81}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:57:16,473][0m Trial 259 finished with value: 438.49 and parameters: {'discount_factor': 0.9524003938229678, 'learning_rate': 0.09983939078535033, 'network_shape': 85}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:57:37,046][0m Trial 260 finished with value: 634.68 and parameters: {'discount_factor': 0.9461667830751829, 'learning_rate': 0.09598137200469746, 'network_shape': 95}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:57:57,947][0m Trial 261 finished with value: 447.96 and parameters: {'discount_factor': 0.9606758208403956, 'learning_rate': 0.06757714754406381, 'network_shape': 128}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:58:19,991][0m Trial 262 finished with value: 435.46 and parameters: {'discount_factor': 0.9542219685342309, 'learning_rate': 0.09788440319506381, 'network_shape': 113}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:58:41,802][0m Trial 263 finished with value: 250.33 and parameters: {'discount_factor': 0.9514187841098082, 'learning_rate': 0.09371769395964004, 'network_shape': 97}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:59:03,989][0m Trial 264 finished with value: 446.15 and parameters: {'discount_factor': 0.9505018209044382, 'learning_rate': 0.09460240064335393, 'network_shape': 96}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:59:26,376][0m Trial 265 finished with value: 440.32 and parameters: {'discount_factor': 0.9834302094939944, 'learning_rate': 0.0972739945928053, 'network_shape': 98}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 13:59:47,698][0m Trial 266 finished with value: 252.55 and parameters: {'discount_factor': 0.9483483453124348, 'learning_rate': 0.09249558646277187, 'network_shape': 92}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:00:11,448][0m Trial 267 finished with value: 430.32 and parameters: {'discount_factor': 0.9520925837233553, 'learning_rate': 0.05948998896377657, 'network_shape': 45}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:00:32,865][0m Trial 268 finished with value: 447.12 and parameters: {'discount_factor': 0.963142967579738, 'learning_rate': 0.09051697803988076, 'network_shape': 102}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:00:53,787][0m Trial 269 finished with value: 635.47 and parameters: {'discount_factor': 0.9502801704327146, 'learning_rate': 0.06284225235836143, 'network_shape': 96}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:01:13,830][0m Trial 270 finished with value: 446.41 and parameters: {'discount_factor': 0.9591413053139725, 'learning_rate': 0.09446271188072553, 'network_shape': 104}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:01:34,487][0m Trial 271 finished with value: 449.89 and parameters: {'discount_factor': 0.9549379292802272, 'learning_rate': 0.09613150212144071, 'network_shape': 75}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:01:55,294][0m Trial 272 finished with value: 402.69 and parameters: {'discount_factor': 0.9800559223168934, 'learning_rate': 0.07579752463630468, 'network_shape': 41}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:02:16,930][0m Trial 273 finished with value: 448.17 and parameters: {'discount_factor': 0.9477838381827589, 'learning_rate': 0.04239203145678517, 'network_shape': 90}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:02:39,692][0m Trial 274 finished with value: 440.21 and parameters: {'discount_factor': 0.9771832053396874, 'learning_rate': 0.09780790808120303, 'network_shape': 48}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:03:01,558][0m Trial 275 finished with value: 633.4 and parameters: {'discount_factor': 0.966192378490547, 'learning_rate': 0.09429915592866553, 'network_shape': 94}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:03:23,034][0m Trial 276 finished with value: 446.62 and parameters: {'discount_factor': 0.9562554117742337, 'learning_rate': 0.09246727567751611, 'network_shape': 99}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:03:43,498][0m Trial 277 finished with value: 444.71 and parameters: {'discount_factor': 0.9701103837563071, 'learning_rate': 0.09984326142096042, 'network_shape': 121}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:04:03,936][0m Trial 278 finished with value: 444.51 and parameters: {'discount_factor': 0.9515816440898663, 'learning_rate': 0.09738803395545186, 'network_shape': 39}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:04:25,805][0m Trial 279 finished with value: 627.16 and parameters: {'discount_factor': 0.9449160571128976, 'learning_rate': 0.0550860526700233, 'network_shape': 97}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:04:46,588][0m Trial 280 finished with value: 446.71 and parameters: {'discount_factor': 0.9582485369251725, 'learning_rate': 0.09088363531979501, 'network_shape': 104}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:05:07,566][0m Trial 281 finished with value: 437.8 and parameters: {'discount_factor': 0.963362950518397, 'learning_rate': 0.0947106602635647, 'network_shape': 80}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:05:29,785][0m Trial 282 finished with value: 441.23 and parameters: {'discount_factor': 0.9613021556353314, 'learning_rate': 0.0881845278508557, 'network_shape': 83}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:05:51,670][0m Trial 283 finished with value: 438.98 and parameters: {'discount_factor': 0.9537884328665811, 'learning_rate': 0.09979403457681425, 'network_shape': 43}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:06:13,686][0m Trial 284 finished with value: 626.0 and parameters: {'discount_factor': 0.9492227102816063, 'learning_rate': 0.05776307801047397, 'network_shape': 118}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:06:35,978][0m Trial 285 finished with value: 446.71 and parameters: {'discount_factor': 0.9677379893054359, 'learning_rate': 0.09230766397191145, 'network_shape': 100}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:06:59,502][0m Trial 286 finished with value: 628.99 and parameters: {'discount_factor': 0.9652379788979694, 'learning_rate': 0.08953098377233283, 'network_shape': 102}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:07:24,003][0m Trial 287 finished with value: 261.74 and parameters: {'discount_factor': 0.9510110825421043, 'learning_rate': 0.09715925108934634, 'network_shape': 36}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:07:51,175][0m Trial 288 finished with value: 447.97 and parameters: {'discount_factor': 0.947071592089015, 'learning_rate': 0.06436646430520598, 'network_shape': 93}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:08:16,840][0m Trial 289 finished with value: 445.88 and parameters: {'discount_factor': 0.9862905809888277, 'learning_rate': 0.07884733302282133, 'network_shape': 87}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:08:42,688][0m Trial 290 finished with value: 447.52 and parameters: {'discount_factor': 0.9558498509668847, 'learning_rate': 0.07084335244766815, 'network_shape': 109}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:09:08,635][0m Trial 291 finished with value: 443.36 and parameters: {'discount_factor': 0.9527029313671737, 'learning_rate': 0.039871079998123365, 'network_shape': 95}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:09:36,734][0m Trial 292 finished with value: 438.9 and parameters: {'discount_factor': 0.963793843293024, 'learning_rate': 0.07722989229390508, 'network_shape': 45}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:10:03,538][0m Trial 293 finished with value: 259.54 and parameters: {'discount_factor': 0.9873780991178125, 'learning_rate': 0.07417208865275021, 'network_shape': 51}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:10:31,312][0m Trial 294 finished with value: 437.02 and parameters: {'discount_factor': 0.9852794834198437, 'learning_rate': 0.08035267222321853, 'network_shape': 91}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:10:58,973][0m Trial 295 finished with value: 251.26 and parameters: {'discount_factor': 0.9601074033509631, 'learning_rate': 0.09107974553355477, 'network_shape': 106}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:11:26,601][0m Trial 296 finished with value: 635.26 and parameters: {'discount_factor': 0.9738882065154035, 'learning_rate': 0.09385313269444419, 'network_shape': 99}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:11:53,525][0m Trial 297 finished with value: 445.25 and parameters: {'discount_factor': 0.9701774892855898, 'learning_rate': 0.09593586161910889, 'network_shape': 38}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:12:20,218][0m Trial 298 finished with value: 255.58 and parameters: {'discount_factor': 0.9497874466111939, 'learning_rate': 0.09964129453022333, 'network_shape': 39}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:12:46,625][0m Trial 299 finished with value: 253.49 and parameters: {'discount_factor': 0.9832015473934818, 'learning_rate': 0.06770578927590408, 'network_shape': 42}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:13:11,564][0m Trial 300 finished with value: 439.81 and parameters: {'discount_factor': 0.9530806501351818, 'learning_rate': 0.05979702394100518, 'network_shape': 125}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:13:35,380][0m Trial 301 finished with value: 441.03 and parameters: {'discount_factor': 0.9661688182672293, 'learning_rate': 0.05310805323624981, 'network_shape': 77}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:13:59,594][0m Trial 302 finished with value: 252.38 and parameters: {'discount_factor': 0.9884243311455402, 'learning_rate': 0.044904253283800696, 'network_shape': 95}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:14:24,061][0m Trial 303 finished with value: 442.39 and parameters: {'discount_factor': 0.9565660139249416, 'learning_rate': 0.08805443786389151, 'network_shape': 102}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:14:49,781][0m Trial 304 finished with value: 254.13 and parameters: {'discount_factor': 0.9685752210766282, 'learning_rate': 0.09771490448736317, 'network_shape': 34}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:15:13,715][0m Trial 305 finished with value: 455.67 and parameters: {'discount_factor': 0.9575430762108055, 'learning_rate': 0.09296328247959675, 'network_shape': 98}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:15:36,561][0m Trial 306 finished with value: 445.0 and parameters: {'discount_factor': 0.9510644114535215, 'learning_rate': 0.09576501883813623, 'network_shape': 35}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:16:00,073][0m Trial 307 finished with value: 251.85 and parameters: {'discount_factor': 0.9620219272912118, 'learning_rate': 0.0725156743395084, 'network_shape': 47}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:16:23,799][0m Trial 308 finished with value: 446.33 and parameters: {'discount_factor': 0.9590559204001695, 'learning_rate': 0.08596884213876813, 'network_shape': 105}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:16:49,009][0m Trial 309 finished with value: 447.25 and parameters: {'discount_factor': 0.9483234778093393, 'learning_rate': 0.09993114736015139, 'network_shape': 37}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:17:14,124][0m Trial 310 finished with value: 250.37 and parameters: {'discount_factor': 0.9847184259273992, 'learning_rate': 0.07765420327915186, 'network_shape': 112}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:17:39,344][0m Trial 311 finished with value: 447.4 and parameters: {'discount_factor': 0.9670334678564431, 'learning_rate': 0.0817930650776735, 'network_shape': 115}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:18:04,593][0m Trial 312 finished with value: 633.41 and parameters: {'discount_factor': 0.964389589411524, 'learning_rate': 0.07930123899245657, 'network_shape': 111}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:18:30,409][0m Trial 313 finished with value: 325.95 and parameters: {'discount_factor': 0.9543073632340505, 'learning_rate': 0.07761865943954836, 'network_shape': 40}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:18:58,236][0m Trial 314 finished with value: 632.48 and parameters: {'discount_factor': 0.9710188515657738, 'learning_rate': 0.0977979219007175, 'network_shape': 107}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:19:24,577][0m Trial 315 finished with value: 630.41 and parameters: {'discount_factor': 0.9854793246460358, 'learning_rate': 0.0759839898865788, 'network_shape': 90}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:19:49,727][0m Trial 316 finished with value: 438.06 and parameters: {'discount_factor': 0.9626451285835522, 'learning_rate': 0.09580840846018322, 'network_shape': 113}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:20:15,376][0m Trial 317 finished with value: 636.18 and parameters: {'discount_factor': 0.984384408288845, 'learning_rate': 0.09132812450497768, 'network_shape': 123}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:20:40,272][0m Trial 318 finished with value: 250.93 and parameters: {'discount_factor': 0.978677037498508, 'learning_rate': 0.065595534296655, 'network_shape': 97}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:21:06,108][0m Trial 319 finished with value: 635.27 and parameters: {'discount_factor': 0.9543279478548726, 'learning_rate': 0.09393366355602398, 'network_shape': 88}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:21:33,473][0m Trial 320 finished with value: 631.17 and parameters: {'discount_factor': 0.9604415736019158, 'learning_rate': 0.09785019792272072, 'network_shape': 93}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:21:59,062][0m Trial 321 finished with value: 446.59 and parameters: {'discount_factor': 0.9813234334902932, 'learning_rate': 0.08284530386948039, 'network_shape': 109}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:22:24,680][0m Trial 322 finished with value: 253.9 and parameters: {'discount_factor': 0.9454054989836866, 'learning_rate': 0.04257993635521831, 'network_shape': 100}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:22:51,922][0m Trial 323 finished with value: 447.87 and parameters: {'discount_factor': 0.965647552727283, 'learning_rate': 0.08102429233367218, 'network_shape': 117}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:23:17,122][0m Trial 324 finished with value: 632.22 and parameters: {'discount_factor': 0.9523235053602505, 'learning_rate': 0.09618828525446556, 'network_shape': 97}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:23:41,593][0m Trial 325 finished with value: 341.57 and parameters: {'discount_factor': 0.9681390380673918, 'learning_rate': 9.064837251462898e-05, 'network_shape': 103}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:24:07,150][0m Trial 326 finished with value: 634.27 and parameters: {'discount_factor': 0.9880077966313365, 'learning_rate': 0.07468282523942578, 'network_shape': 101}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:24:31,875][0m Trial 327 finished with value: 444.19 and parameters: {'discount_factor': 0.9725276034614313, 'learning_rate': 0.07815448967571088, 'network_shape': 107}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:24:55,198][0m Trial 328 finished with value: 251.93 and parameters: {'discount_factor': 0.9487419452479213, 'learning_rate': 0.06946322175399511, 'network_shape': 43}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:25:19,340][0m Trial 329 finished with value: 444.21 and parameters: {'discount_factor': 0.9553323327466776, 'learning_rate': 0.09330867873174722, 'network_shape': 92}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:25:42,691][0m Trial 330 finished with value: 250.17 and parameters: {'discount_factor': 0.964048169178655, 'learning_rate': 0.0980077140538107, 'network_shape': 95}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:26:07,213][0m Trial 331 finished with value: 253.39 and parameters: {'discount_factor': 0.9648685535292751, 'learning_rate': 0.014147305242463382, 'network_shape': 94}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:26:33,340][0m Trial 332 finished with value: 447.48 and parameters: {'discount_factor': 0.9644359252107455, 'learning_rate': 0.09836218487563729, 'network_shape': 95}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:26:58,874][0m Trial 333 finished with value: 448.17 and parameters: {'discount_factor': 0.9621078424576878, 'learning_rate': 0.09688024747413398, 'network_shape': 120}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:27:23,860][0m Trial 334 finished with value: 253.1 and parameters: {'discount_factor': 0.9666896316321448, 'learning_rate': 0.0948925335492775, 'network_shape': 98}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:27:50,391][0m Trial 335 finished with value: 252.42 and parameters: {'discount_factor': 0.963201744935102, 'learning_rate': 0.09959060558157835, 'network_shape': 40}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:28:16,609][0m Trial 336 finished with value: 448.64 and parameters: {'discount_factor': 0.9608459932872842, 'learning_rate': 0.09106224537450223, 'network_shape': 85}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:28:41,285][0m Trial 337 finished with value: 633.25 and parameters: {'discount_factor': 0.9474008356530518, 'learning_rate': 0.09690350553266032, 'network_shape': 89}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:29:05,543][0m Trial 338 finished with value: 633.95 and parameters: {'discount_factor': 0.975635345760607, 'learning_rate': 0.04931435722798065, 'network_shape': 91}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:29:32,701][0m Trial 339 finished with value: 252.38 and parameters: {'discount_factor': 0.9696541659278527, 'learning_rate': 0.09973804089280322, 'network_shape': 71}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:29:57,929][0m Trial 340 finished with value: 449.94 and parameters: {'discount_factor': 0.9670294152947578, 'learning_rate': 0.09472223324384628, 'network_shape': 97}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:30:22,452][0m Trial 341 finished with value: 446.18 and parameters: {'discount_factor': 0.9641121983333419, 'learning_rate': 0.06170992890981696, 'network_shape': 95}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:30:46,897][0m Trial 342 finished with value: 440.52 and parameters: {'discount_factor': 0.9591903049987763, 'learning_rate': 0.09291709410482964, 'network_shape': 37}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:31:12,891][0m Trial 343 finished with value: 444.82 and parameters: {'discount_factor': 0.961739179051506, 'learning_rate': 0.09799012252624316, 'network_shape': 93}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:31:37,146][0m Trial 344 finished with value: 632.88 and parameters: {'discount_factor': 0.9499821421949038, 'learning_rate': 0.09576981929156678, 'network_shape': 99}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:32:01,928][0m Trial 345 finished with value: 632.15 and parameters: {'discount_factor': 0.986661992240675, 'learning_rate': 0.04594670420311368, 'network_shape': 57}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:32:26,631][0m Trial 346 finished with value: 621.96 and parameters: {'discount_factor': 0.9826347538996151, 'learning_rate': 0.09029925694439625, 'network_shape': 39}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:32:51,691][0m Trial 347 finished with value: 622.73 and parameters: {'discount_factor': 0.9463558814181775, 'learning_rate': 0.08878640373936478, 'network_shape': 50}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:33:16,234][0m Trial 348 finished with value: 297.79 and parameters: {'discount_factor': 0.9658247027776022, 'learning_rate': 0.037970446977105644, 'network_shape': 45}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:33:41,090][0m Trial 349 finished with value: 630.35 and parameters: {'discount_factor': 0.9182642339855378, 'learning_rate': 0.09754184966731196, 'network_shape': 83}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:34:05,962][0m Trial 350 finished with value: 445.04 and parameters: {'discount_factor': 0.9521362843390322, 'learning_rate': 0.09351210921725525, 'network_shape': 66}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:34:28,951][0m Trial 351 finished with value: 250.54 and parameters: {'discount_factor': 0.9688291246853689, 'learning_rate': 0.09978208520146672, 'network_shape': 54}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:34:52,947][0m Trial 352 finished with value: 441.41 and parameters: {'discount_factor': 0.9687595425787935, 'learning_rate': 0.09840671243191572, 'network_shape': 56}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:35:15,693][0m Trial 353 finished with value: 447.99 and parameters: {'discount_factor': 0.9674589354651609, 'learning_rate': 0.09962837395169341, 'network_shape': 53}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:35:39,014][0m Trial 354 finished with value: 439.77 and parameters: {'discount_factor': 0.9705482522432196, 'learning_rate': 0.09999027984232056, 'network_shape': 61}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:36:01,920][0m Trial 355 finished with value: 253.44 and parameters: {'discount_factor': 0.9679244475470957, 'learning_rate': 0.09574840131541043, 'network_shape': 112}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:36:22,936][0m Trial 356 finished with value: 625.0 and parameters: {'discount_factor': 0.9659065016949552, 'learning_rate': 0.09753271671523277, 'network_shape': 48}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:36:43,501][0m Trial 357 finished with value: 250.9 and parameters: {'discount_factor': 0.9894447232213799, 'learning_rate': 0.09230615927569626, 'network_shape': 59}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:37:04,973][0m Trial 358 finished with value: 248.17 and parameters: {'discount_factor': 0.9844294426268302, 'learning_rate': 0.08520030350025148, 'network_shape': 42}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:37:25,677][0m Trial 359 finished with value: 265.12 and parameters: {'discount_factor': 0.9843945838609734, 'learning_rate': 0.08442380064591991, 'network_shape': 42}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:37:47,073][0m Trial 360 finished with value: 431.25 and parameters: {'discount_factor': 0.9849573060972998, 'learning_rate': 0.08459241061485898, 'network_shape': 41}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:38:07,889][0m Trial 361 finished with value: 446.37 and parameters: {'discount_factor': 0.9866066191448815, 'learning_rate': 0.08707556712099265, 'network_shape': 43}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:38:27,288][0m Trial 362 finished with value: 254.41 and parameters: {'discount_factor': 0.9808920196370412, 'learning_rate': 0.08661617126622104, 'network_shape': 36}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:38:46,841][0m Trial 363 finished with value: 447.01 and parameters: {'discount_factor': 0.9838061367785891, 'learning_rate': 0.09617463281760813, 'network_shape': 54}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:39:10,553][0m Trial 364 finished with value: 473.55 and parameters: {'discount_factor': 0.9492604866343068, 'learning_rate': 0.08862332382459816, 'network_shape': 40}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:39:30,964][0m Trial 365 finished with value: 436.24 and parameters: {'discount_factor': 0.9828757345805557, 'learning_rate': 0.09053972775710872, 'network_shape': 87}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:39:50,787][0m Trial 366 finished with value: 255.3 and parameters: {'discount_factor': 0.9779207278305821, 'learning_rate': 0.04123696101163795, 'network_shape': 38}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:40:11,424][0m Trial 367 finished with value: 440.6 and parameters: {'discount_factor': 0.9512609513147401, 'learning_rate': 0.0827745011435221, 'network_shape': 44}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:40:31,771][0m Trial 368 finished with value: 252.84 and parameters: {'discount_factor': 0.9859229118763476, 'learning_rate': 0.09389123686483569, 'network_shape': 91}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:40:52,176][0m Trial 369 finished with value: 438.88 and parameters: {'discount_factor': 0.9878153179325061, 'learning_rate': 0.09800274174417221, 'network_shape': 48}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:41:13,265][0m Trial 370 finished with value: 250.5 and parameters: {'discount_factor': 0.9718890167297832, 'learning_rate': 0.09832253630164735, 'network_shape': 50}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:41:32,942][0m Trial 371 finished with value: 446.75 and parameters: {'discount_factor': 0.9737172677012965, 'learning_rate': 0.09630794709344372, 'network_shape': 115}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:41:52,876][0m Trial 372 finished with value: 443.14 and parameters: {'discount_factor': 0.9806078274301782, 'learning_rate': 0.05515399286950795, 'network_shape': 46}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:42:13,204][0m Trial 373 finished with value: 441.52 and parameters: {'discount_factor': 0.9719701122377322, 'learning_rate': 0.04725678663821562, 'network_shape': 50}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:42:33,118][0m Trial 374 finished with value: 449.45 and parameters: {'discount_factor': 0.9476144872642678, 'learning_rate': 0.08555282077725909, 'network_shape': 41}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:42:52,827][0m Trial 375 finished with value: 447.71 and parameters: {'discount_factor': 0.9761531950556329, 'learning_rate': 0.05177224122459849, 'network_shape': 46}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:43:12,846][0m Trial 376 finished with value: 251.91 and parameters: {'discount_factor': 0.9506062866450894, 'learning_rate': 0.09474747729974337, 'network_shape': 52}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:43:33,927][0m Trial 377 finished with value: 446.8 and parameters: {'discount_factor': 0.9428997669719577, 'learning_rate': 0.09699323225444535, 'network_shape': 35}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:43:55,285][0m Trial 378 finished with value: 635.41 and parameters: {'discount_factor': 0.9490284391429563, 'learning_rate': 0.09998550939657197, 'network_shape': 108}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:44:16,558][0m Trial 379 finished with value: 446.15 and parameters: {'discount_factor': 0.953281642519701, 'learning_rate': 0.09459459121072958, 'network_shape': 74}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:44:38,047][0m Trial 380 finished with value: 439.16 and parameters: {'discount_factor': 0.9530326725840793, 'learning_rate': 0.09803575461186753, 'network_shape': 33}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:44:59,695][0m Trial 381 finished with value: 254.92 and parameters: {'discount_factor': 0.9564610726617118, 'learning_rate': 0.09173698716155224, 'network_shape': 79}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:45:21,394][0m Trial 382 finished with value: 432.12 and parameters: {'discount_factor': 0.9452911794314268, 'learning_rate': 0.06119230816581713, 'network_shape': 44}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:45:42,275][0m Trial 383 finished with value: 250.39 and parameters: {'discount_factor': 0.9827880779751218, 'learning_rate': 0.057814247846986175, 'network_shape': 110}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:46:03,067][0m Trial 384 finished with value: 445.87 and parameters: {'discount_factor': 0.9842040471267294, 'learning_rate': 0.05901906044528597, 'network_shape': 110}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:46:24,232][0m Trial 385 finished with value: 255.55 and parameters: {'discount_factor': 0.9817520070811652, 'learning_rate': 0.06340514023436579, 'network_shape': 111}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:46:45,030][0m Trial 386 finished with value: 635.05 and parameters: {'discount_factor': 0.9853699860800658, 'learning_rate': 0.0563271200541053, 'network_shape': 109}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:47:05,518][0m Trial 387 finished with value: 444.69 and parameters: {'discount_factor': 0.987909115256966, 'learning_rate': 0.059142943276775134, 'network_shape': 112}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:47:26,372][0m Trial 388 finished with value: 440.67 and parameters: {'discount_factor': 0.9788956001373571, 'learning_rate': 0.057197716967541494, 'network_shape': 106}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:47:47,287][0m Trial 389 finished with value: 250.27 and parameters: {'discount_factor': 0.9748468954359675, 'learning_rate': 0.05362101322331878, 'network_shape': 114}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:48:08,172][0m Trial 390 finished with value: 444.98 and parameters: {'discount_factor': 0.9735477135956033, 'learning_rate': 0.054179331845852545, 'network_shape': 116}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:48:29,042][0m Trial 391 finished with value: 250.97 and parameters: {'discount_factor': 0.9754489865109497, 'learning_rate': 0.0529786823885079, 'network_shape': 118}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:48:51,233][0m Trial 392 finished with value: 446.82 and parameters: {'discount_factor': 0.9743902310861875, 'learning_rate': 0.05065305495165659, 'network_shape': 115}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:49:13,912][0m Trial 393 finished with value: 441.26 and parameters: {'discount_factor': 0.9722484878660655, 'learning_rate': 0.05772472729526277, 'network_shape': 112}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:49:36,464][0m Trial 394 finished with value: 439.96 and parameters: {'discount_factor': 0.9710013444252605, 'learning_rate': 0.06056803917989903, 'network_shape': 109}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:49:57,467][0m Trial 395 finished with value: 447.8 and parameters: {'discount_factor': 0.9820555134842873, 'learning_rate': 0.05489403425604722, 'network_shape': 37}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:50:17,879][0m Trial 396 finished with value: 629.39 and parameters: {'discount_factor': 0.9639719141090402, 'learning_rate': 0.057067491272944824, 'network_shape': 113}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:50:37,512][0m Trial 397 finished with value: 629.33 and parameters: {'discount_factor': 0.9585776117111604, 'learning_rate': 0.052873415597374435, 'network_shape': 114}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:50:57,482][0m Trial 398 finished with value: 446.44 and parameters: {'discount_factor': 0.9775780425887061, 'learning_rate': 0.08369779948309088, 'network_shape': 106}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:51:18,563][0m Trial 399 finished with value: 437.67 and parameters: {'discount_factor': 0.9799674713080082, 'learning_rate': 0.05464395477507004, 'network_shape': 127}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:51:38,221][0m Trial 400 finished with value: 250.6 and parameters: {'discount_factor': 0.9701782664316504, 'learning_rate': 0.05521330342831504, 'network_shape': 120}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:51:57,998][0m Trial 401 finished with value: 319.09 and parameters: {'discount_factor': 0.9755280096348632, 'learning_rate': 0.04347585355443862, 'network_shape': 42}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:52:18,364][0m Trial 402 finished with value: 437.82 and parameters: {'discount_factor': 0.972030875256195, 'learning_rate': 0.05832622593205734, 'network_shape': 50}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:52:38,697][0m Trial 403 finished with value: 436.06 and parameters: {'discount_factor': 0.9548491297581001, 'learning_rate': 0.04962435580135462, 'network_shape': 39}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:52:59,132][0m Trial 404 finished with value: 438.57 and parameters: {'discount_factor': 0.9009745800151865, 'learning_rate': 0.08581853204159515, 'network_shape': 108}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:53:19,476][0m Trial 405 finished with value: 250.8 and parameters: {'discount_factor': 0.9629077734307928, 'learning_rate': 0.0892602846548812, 'network_shape': 36}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:53:39,708][0m Trial 406 finished with value: 442.31 and parameters: {'discount_factor': 0.9693801676930092, 'learning_rate': 0.08064631299777052, 'network_shape': 104}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:54:00,423][0m Trial 407 finished with value: 440.56 and parameters: {'discount_factor': 0.9578592196227723, 'learning_rate': 0.051754081279574794, 'network_shape': 64}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:54:21,179][0m Trial 408 finished with value: 252.46 and parameters: {'discount_factor': 0.9650322986948003, 'learning_rate': 0.056448367864695294, 'network_shape': 117}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:54:41,796][0m Trial 409 finished with value: 632.19 and parameters: {'discount_factor': 0.9540522802972307, 'learning_rate': 0.08759287101392114, 'network_shape': 111}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:55:02,507][0m Trial 410 finished with value: 440.92 and parameters: {'discount_factor': 0.9620271698703319, 'learning_rate': 0.062479230645842376, 'network_shape': 124}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:55:25,326][0m Trial 411 finished with value: 625.17 and parameters: {'discount_factor': 0.9842821522205227, 'learning_rate': 0.05920086402497361, 'network_shape': 39}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:55:48,302][0m Trial 412 finished with value: 438.51 and parameters: {'discount_factor': 0.9827584601392179, 'learning_rate': 0.09518582948868422, 'network_shape': 104}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:56:11,436][0m Trial 413 finished with value: 439.62 and parameters: {'discount_factor': 0.9513628511552708, 'learning_rate': 0.09778172456866072, 'network_shape': 114}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:56:33,141][0m Trial 414 finished with value: 275.97 and parameters: {'discount_factor': 0.9567758499956324, 'learning_rate': 0.039973166361948406, 'network_shape': 102}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:56:52,757][0m Trial 415 finished with value: 252.46 and parameters: {'discount_factor': 0.9732638554694982, 'learning_rate': 0.08249058859970808, 'network_shape': 48}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:57:14,051][0m Trial 416 finished with value: 419.29 and parameters: {'discount_factor': 0.9673525365267882, 'learning_rate': 0.03646993141342618, 'network_shape': 41}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:57:35,163][0m Trial 417 finished with value: 439.43 and parameters: {'discount_factor': 0.9528926374771448, 'learning_rate': 0.06568461789443886, 'network_shape': 32}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:57:56,362][0m Trial 418 finished with value: 439.29 and parameters: {'discount_factor': 0.9597950717548951, 'learning_rate': 0.09655845444474333, 'network_shape': 38}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:58:16,727][0m Trial 419 finished with value: 437.09 and parameters: {'discount_factor': 0.9628004615522237, 'learning_rate': 0.09317004818897696, 'network_shape': 106}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:58:38,038][0m Trial 420 finished with value: 437.92 and parameters: {'discount_factor': 0.9656818978287978, 'learning_rate': 0.089631941893466, 'network_shape': 44}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:58:59,444][0m Trial 421 finished with value: 439.51 and parameters: {'discount_factor': 0.9862999990647405, 'learning_rate': 0.09590195129254594, 'network_shape': 101}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:59:19,715][0m Trial 422 finished with value: 440.08 and parameters: {'discount_factor': 0.9800646114806835, 'learning_rate': 0.03375048910205687, 'network_shape': 110}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:59:39,370][0m Trial 423 finished with value: 263.42 and parameters: {'discount_factor': 0.9559149136486847, 'learning_rate': 0.053351034794278336, 'network_shape': 35}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 14:59:59,507][0m Trial 424 finished with value: 444.03 and parameters: {'discount_factor': 0.9688596267807492, 'learning_rate': 0.09815347489985599, 'network_shape': 41}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:00:19,899][0m Trial 425 finished with value: 634.38 and parameters: {'discount_factor': 0.9509815932973509, 'learning_rate': 0.08478895748657679, 'network_shape': 47}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:00:41,958][0m Trial 426 finished with value: 251.72 and parameters: {'discount_factor': 0.9039583756632786, 'learning_rate': 0.047646175007208476, 'network_shape': 116}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:01:04,106][0m Trial 427 finished with value: 636.16 and parameters: {'discount_factor': 0.9475331570974402, 'learning_rate': 0.08027209510153699, 'network_shape': 108}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:01:23,802][0m Trial 428 finished with value: 257.91 and parameters: {'discount_factor': 0.9891730584507943, 'learning_rate': 0.08747107395203378, 'network_shape': 52}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:01:43,461][0m Trial 429 finished with value: 250.77 and parameters: {'discount_factor': 0.9641513319516566, 'learning_rate': 0.04486489819256977, 'network_shape': 37}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:02:03,584][0m Trial 430 finished with value: 251.58 and parameters: {'discount_factor': 0.9585955104959697, 'learning_rate': 0.056317866933173394, 'network_shape': 30}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:02:23,511][0m Trial 431 finished with value: 255.47 and parameters: {'discount_factor': 0.9708764201242075, 'learning_rate': 0.09796909244865487, 'network_shape': 70}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:02:43,194][0m Trial 432 finished with value: 251.25 and parameters: {'discount_factor': 0.9838104303482783, 'learning_rate': 0.09437749145169629, 'network_shape': 104}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:03:03,629][0m Trial 433 finished with value: 631.19 and parameters: {'discount_factor': 0.9343736775290965, 'learning_rate': 0.09151292988427892, 'network_shape': 99}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:03:24,593][0m Trial 434 finished with value: 434.09 and parameters: {'discount_factor': 0.9541105071447595, 'learning_rate': 0.007034223963238322, 'network_shape': 44}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:03:45,981][0m Trial 435 finished with value: 559.15 and parameters: {'discount_factor': 0.9609423004161207, 'learning_rate': 0.02397975013770361, 'network_shape': 119}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:04:07,022][0m Trial 436 finished with value: 447.42 and parameters: {'discount_factor': 0.9664266272624698, 'learning_rate': 0.09608690772783655, 'network_shape': 112}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:04:28,137][0m Trial 437 finished with value: 440.3 and parameters: {'discount_factor': 0.952171445512566, 'learning_rate': 0.09978317414146565, 'network_shape': 39}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:04:48,602][0m Trial 438 finished with value: 443.78 and parameters: {'discount_factor': 0.9492609125192609, 'learning_rate': 0.06127893843858735, 'network_shape': 106}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:05:10,920][0m Trial 439 finished with value: 447.39 and parameters: {'discount_factor': 0.9758874712144133, 'learning_rate': 0.06734703914438071, 'network_shape': 102}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:05:32,549][0m Trial 440 finished with value: 441.91 and parameters: {'discount_factor': 0.9682539217230445, 'learning_rate': 0.05845903219328978, 'network_shape': 50}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:05:55,708][0m Trial 441 finished with value: 444.4 and parameters: {'discount_factor': 0.9820374817573627, 'learning_rate': 0.09772272941888661, 'network_shape': 97}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:06:16,332][0m Trial 442 finished with value: 445.91 and parameters: {'discount_factor': 0.9570956187576997, 'learning_rate': 0.08976401171979953, 'network_shape': 34}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:06:37,896][0m Trial 443 finished with value: 442.35 and parameters: {'discount_factor': 0.9868147438270695, 'learning_rate': 0.09997761075366747, 'network_shape': 46}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:07:00,113][0m Trial 444 finished with value: 447.85 and parameters: {'discount_factor': 0.9739140022555999, 'learning_rate': 0.09311389332437064, 'network_shape': 42}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:07:21,068][0m Trial 445 finished with value: 633.14 and parameters: {'discount_factor': 0.9772319976511746, 'learning_rate': 0.08349976630903316, 'network_shape': 109}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:07:41,556][0m Trial 446 finished with value: 440.47 and parameters: {'discount_factor': 0.9635153535894276, 'learning_rate': 0.051310922975703895, 'network_shape': 114}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:08:01,802][0m Trial 447 finished with value: 288.43 and parameters: {'discount_factor': 0.9602211290882461, 'learning_rate': 0.05455141994861386, 'network_shape': 39}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:08:22,258][0m Trial 448 finished with value: 250.79 and parameters: {'discount_factor': 0.9249952985682565, 'learning_rate': 0.09562717690415434, 'network_shape': 56}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:08:42,378][0m Trial 449 finished with value: 446.19 and parameters: {'discount_factor': 0.9464165788814175, 'learning_rate': 0.0702431989806347, 'network_shape': 100}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:09:03,314][0m Trial 450 finished with value: 446.2 and parameters: {'discount_factor': 0.9547171426687912, 'learning_rate': 0.08567286545138283, 'network_shape': 76}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:09:24,243][0m Trial 451 finished with value: 630.11 and parameters: {'discount_factor': 0.9652398244318344, 'learning_rate': 0.09801692079928843, 'network_shape': 36}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:09:45,323][0m Trial 452 finished with value: 631.81 and parameters: {'discount_factor': 0.9504287491128837, 'learning_rate': 0.0813935422819454, 'network_shape': 103}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:10:06,791][0m Trial 453 finished with value: 632.66 and parameters: {'discount_factor': 0.9789995379685051, 'learning_rate': 0.04109821648942949, 'network_shape': 107}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:10:28,825][0m Trial 454 finished with value: 443.25 and parameters: {'discount_factor': 0.9722972125769221, 'learning_rate': 0.0601546066131457, 'network_shape': 122}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:10:48,800][0m Trial 455 finished with value: 445.56 and parameters: {'discount_factor': 0.9520566184697344, 'learning_rate': 0.09226450908625794, 'network_shape': 96}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:11:08,506][0m Trial 456 finished with value: 253.36 and parameters: {'discount_factor': 0.9899658726067377, 'learning_rate': 0.08728225671565663, 'network_shape': 67}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:11:28,689][0m Trial 457 finished with value: 445.76 and parameters: {'discount_factor': 0.9848346603252763, 'learning_rate': 0.09479914212584688, 'network_shape': 41}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:11:48,363][0m Trial 458 finished with value: 620.77 and parameters: {'discount_factor': 0.9672358126837344, 'learning_rate': 0.05588103918861682, 'network_shape': 111}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:12:07,260][0m Trial 459 finished with value: 462.29 and parameters: {'discount_factor': 0.9480428148254255, 'learning_rate': 0.03894429733908484, 'network_shape': 38}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:12:27,455][0m Trial 460 finished with value: 631.46 and parameters: {'discount_factor': 0.961569216635329, 'learning_rate': 0.09659756922098425, 'network_shape': 126}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:12:50,421][0m Trial 461 finished with value: 448.3 and parameters: {'discount_factor': 0.9702550148772989, 'learning_rate': 0.09055731073818843, 'network_shape': 33}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:13:11,672][0m Trial 462 finished with value: 437.65 and parameters: {'discount_factor': 0.9580969604833475, 'learning_rate': 0.09993530895854341, 'network_shape': 117}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:13:32,818][0m Trial 463 finished with value: 437.82 and parameters: {'discount_factor': 0.9555634631170025, 'learning_rate': 0.09329998027960307, 'network_shape': 105}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:13:53,419][0m Trial 464 finished with value: 635.73 and parameters: {'discount_factor': 0.9440906824869497, 'learning_rate': 0.07813327667135819, 'network_shape': 43}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:14:15,105][0m Trial 465 finished with value: 250.98 and parameters: {'discount_factor': 0.98323567197759, 'learning_rate': 0.0579995079509724, 'network_shape': 94}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:14:36,601][0m Trial 466 finished with value: 251.96 and parameters: {'discount_factor': 0.9533629664948969, 'learning_rate': 0.09797178033666139, 'network_shape': 100}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:14:58,576][0m Trial 467 finished with value: 631.12 and parameters: {'discount_factor': 0.96359904954354, 'learning_rate': 0.0952613136996047, 'network_shape': 40}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:15:20,577][0m Trial 468 finished with value: 250.69 and parameters: {'discount_factor': 0.9496383635216593, 'learning_rate': 0.04857525001458543, 'network_shape': 49}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:15:40,887][0m Trial 469 finished with value: 433.57 and parameters: {'discount_factor': 0.986312359446154, 'learning_rate': 0.08868220541386387, 'network_shape': 52}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:16:00,925][0m Trial 470 finished with value: 447.12 and parameters: {'discount_factor': 0.9655126152121187, 'learning_rate': 0.04336121556566807, 'network_shape': 45}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:16:20,996][0m Trial 471 finished with value: 446.48 and parameters: {'discount_factor': 0.9808646632891174, 'learning_rate': 0.09687487405764869, 'network_shape': 35}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:16:41,782][0m Trial 472 finished with value: 446.52 and parameters: {'discount_factor': 0.9512959194812608, 'learning_rate': 0.09996848598301206, 'network_shape': 113}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:17:02,872][0m Trial 473 finished with value: 449.82 and parameters: {'discount_factor': 0.9686406968391686, 'learning_rate': 0.0795510130968506, 'network_shape': 58}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:17:24,754][0m Trial 474 finished with value: 633.48 and parameters: {'discount_factor': 0.9599907170868986, 'learning_rate': 0.0638597241495152, 'network_shape': 63}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:17:45,717][0m Trial 475 finished with value: 442.82 and parameters: {'discount_factor': 0.9624679134946041, 'learning_rate': 0.05314671513654484, 'network_shape': 97}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:18:06,549][0m Trial 476 finished with value: 441.02 and parameters: {'discount_factor': 0.9532170207361876, 'learning_rate': 0.0753105122813093, 'network_shape': 103}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:18:27,499][0m Trial 477 finished with value: 263.77 and parameters: {'discount_factor': 0.9715576306603009, 'learning_rate': 0.0830062392346383, 'network_shape': 108}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:18:51,054][0m Trial 478 finished with value: 443.76 and parameters: {'discount_factor': 0.9563808474628314, 'learning_rate': 0.0945626350215184, 'network_shape': 36}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:19:12,550][0m Trial 479 finished with value: 637.22 and parameters: {'discount_factor': 0.9666366152549694, 'learning_rate': 0.09803398889839504, 'network_shape': 110}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:19:33,248][0m Trial 480 finished with value: 634.6 and parameters: {'discount_factor': 0.9465346944073204, 'learning_rate': 0.09206338957127379, 'network_shape': 99}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:19:53,279][0m Trial 481 finished with value: 445.83 and parameters: {'discount_factor': 0.987829566601806, 'learning_rate': 0.08641083355993072, 'network_shape': 43}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:20:13,598][0m Trial 482 finished with value: 252.77 and parameters: {'discount_factor': 0.9485637303467316, 'learning_rate': 0.09589394234067798, 'network_shape': 73}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:20:34,509][0m Trial 483 finished with value: 440.74 and parameters: {'discount_factor': 0.9822035001247517, 'learning_rate': 0.0904039243921338, 'network_shape': 38}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:20:55,329][0m Trial 484 finished with value: 249.37 and parameters: {'discount_factor': 0.9585418739197404, 'learning_rate': 0.07221291828182243, 'network_shape': 115}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:21:16,436][0m Trial 485 finished with value: 257.13 and parameters: {'discount_factor': 0.9591287919176535, 'learning_rate': 0.07306545194344839, 'network_shape': 116}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:21:38,209][0m Trial 486 finished with value: 455.15 and parameters: {'discount_factor': 0.9569159649125667, 'learning_rate': 0.07126824883196606, 'network_shape': 114}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:22:00,647][0m Trial 487 finished with value: 449.48 and parameters: {'discount_factor': 0.9578871733634138, 'learning_rate': 0.06827541074233866, 'network_shape': 120}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:22:23,120][0m Trial 488 finished with value: 637.81 and parameters: {'discount_factor': 0.9551058370605011, 'learning_rate': 0.0785673537110587, 'network_shape': 112}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:22:44,867][0m Trial 489 finished with value: 250.47 and parameters: {'discount_factor': 0.9590005494724991, 'learning_rate': 0.0768229599218575, 'network_shape': 118}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:23:04,837][0m Trial 490 finished with value: 635.88 and parameters: {'discount_factor': 0.9592179106940536, 'learning_rate': 0.07632471240221464, 'network_shape': 119}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:23:24,818][0m Trial 491 finished with value: 633.45 and parameters: {'discount_factor': 0.9601099013592905, 'learning_rate': 0.0772268241526416, 'network_shape': 117}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:23:44,450][0m Trial 492 finished with value: 250.81 and parameters: {'discount_factor': 0.9581967363658725, 'learning_rate': 0.07164366788400021, 'network_shape': 115}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:24:04,644][0m Trial 493 finished with value: 442.41 and parameters: {'discount_factor': 0.9115507442942984, 'learning_rate': 0.07908499546567388, 'network_shape': 122}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:24:24,902][0m Trial 494 finished with value: 622.86 and parameters: {'discount_factor': 0.9621952206665918, 'learning_rate': 0.029259179928542062, 'network_shape': 114}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:24:44,382][0m Trial 495 finished with value: 274.02 and parameters: {'discount_factor': 0.9603199542453165, 'learning_rate': 0.08115218242685403, 'network_shape': 117}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:25:04,239][0m Trial 496 finished with value: 436.5 and parameters: {'discount_factor': 0.9566363259779325, 'learning_rate': 0.07351664044477066, 'network_shape': 111}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:25:24,372][0m Trial 497 finished with value: 440.59 and parameters: {'discount_factor': 0.9616449499148043, 'learning_rate': 0.07677164753819524, 'network_shape': 125}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:25:43,765][0m Trial 498 finished with value: 451.21 and parameters: {'discount_factor': 0.9544503403764627, 'learning_rate': 0.07356928265114852, 'network_shape': 119}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:26:03,673][0m Trial 499 finished with value: 635.91 and parameters: {'discount_factor': 0.9852843053860986, 'learning_rate': 0.08033941100098818, 'network_shape': 114}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:26:24,446][0m Trial 500 finished with value: 442.55 and parameters: {'discount_factor': 0.9503184085442058, 'learning_rate': 0.07578551468584972, 'network_shape': 112}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:26:46,316][0m Trial 501 finished with value: 251.48 and parameters: {'discount_factor': 0.9589887074560868, 'learning_rate': 0.07464998481597651, 'network_shape': 117}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:27:06,895][0m Trial 502 finished with value: 632.12 and parameters: {'discount_factor': 0.9748805874744914, 'learning_rate': 0.08373850101585735, 'network_shape': 114}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:27:26,690][0m Trial 503 finished with value: 250.8 and parameters: {'discount_factor': 0.9574351035929457, 'learning_rate': 0.06940282527362945, 'network_shape': 30}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:27:46,698][0m Trial 504 finished with value: 251.61 and parameters: {'discount_factor': 0.9607033283101736, 'learning_rate': 0.0818373669824513, 'network_shape': 108}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:28:06,940][0m Trial 505 finished with value: 634.81 and parameters: {'discount_factor': 0.9840465806951957, 'learning_rate': 0.09826056829344838, 'network_shape': 127}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:28:26,753][0m Trial 506 finished with value: 449.7 and parameters: {'discount_factor': 0.955598161513674, 'learning_rate': 0.07781661849776762, 'network_shape': 106}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:28:46,236][0m Trial 507 finished with value: 440.76 and parameters: {'discount_factor': 0.9795820165539365, 'learning_rate': 0.09334602950489146, 'network_shape': 109}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:29:05,900][0m Trial 508 finished with value: 250.38 and parameters: {'discount_factor': 0.9518806257150944, 'learning_rate': 0.045475971046821696, 'network_shape': 111}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:29:26,166][0m Trial 509 finished with value: 635.18 and parameters: {'discount_factor': 0.951232648575065, 'learning_rate': 0.0460400029153984, 'network_shape': 110}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:29:45,755][0m Trial 510 finished with value: 255.6 and parameters: {'discount_factor': 0.9521719185606268, 'learning_rate': 0.04504038632245102, 'network_shape': 113}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:30:05,781][0m Trial 511 finished with value: 438.54 and parameters: {'discount_factor': 0.9558521507594572, 'learning_rate': 0.04327805137530072, 'network_shape': 115}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:30:25,619][0m Trial 512 finished with value: 443.88 and parameters: {'discount_factor': 0.9586697244848832, 'learning_rate': 0.04776529932241139, 'network_shape': 116}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:30:45,773][0m Trial 513 finished with value: 251.52 and parameters: {'discount_factor': 0.9528300975687501, 'learning_rate': 0.041307793104681904, 'network_shape': 111}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:31:06,821][0m Trial 514 finished with value: 625.3 and parameters: {'discount_factor': 0.9495636888405873, 'learning_rate': 0.04594347908017354, 'network_shape': 112}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:31:26,860][0m Trial 515 finished with value: 253.27 and parameters: {'discount_factor': 0.9773681942589789, 'learning_rate': 0.04231818629864674, 'network_shape': 109}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:31:48,790][0m Trial 516 finished with value: 250.75 and parameters: {'discount_factor': 0.9406218471047216, 'learning_rate': 0.04883333172399535, 'network_shape': 105}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:32:17,505][0m Trial 517 finished with value: 635.12 and parameters: {'discount_factor': 0.9640304711014718, 'learning_rate': 0.0440004821571879, 'network_shape': 111}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:32:43,924][0m Trial 518 finished with value: 444.58 and parameters: {'discount_factor': 0.9485464421729025, 'learning_rate': 0.08475780318844953, 'network_shape': 123}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:33:12,306][0m Trial 519 finished with value: 447.79 and parameters: {'discount_factor': 0.95386304892993, 'learning_rate': 0.07525614148263986, 'network_shape': 119}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:33:39,811][0m Trial 520 finished with value: 250.82 and parameters: {'discount_factor': 0.951290803069368, 'learning_rate': 0.05075356604241856, 'network_shape': 94}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:34:06,434][0m Trial 521 finished with value: 436.85 and parameters: {'discount_factor': 0.9869967319317148, 'learning_rate': 0.0871454302978027, 'network_shape': 102}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:34:31,904][0m Trial 522 finished with value: 632.87 and parameters: {'discount_factor': 0.9829157395124698, 'learning_rate': 0.07121046344212391, 'network_shape': 32}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:34:57,295][0m Trial 523 finished with value: 447.84 and parameters: {'discount_factor': 0.9463475597066626, 'learning_rate': 0.07867143432998307, 'network_shape': 107}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:35:22,166][0m Trial 524 finished with value: 253.21 and parameters: {'discount_factor': 0.9497285388911054, 'learning_rate': 0.08937015535971332, 'network_shape': 89}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:35:47,903][0m Trial 525 finished with value: 633.21 and parameters: {'discount_factor': 0.9529336740489854, 'learning_rate': 0.04028225242968884, 'network_shape': 115}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:36:12,656][0m Trial 526 finished with value: 250.59 and parameters: {'discount_factor': 0.9855932032401753, 'learning_rate': 0.03870736392134611, 'network_shape': 98}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:36:37,434][0m Trial 527 finished with value: 252.96 and parameters: {'discount_factor': 0.9621495366160402, 'learning_rate': 0.04552706481899605, 'network_shape': 113}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:37:03,663][0m Trial 528 finished with value: 439.64 and parameters: {'discount_factor': 0.9735969196189129, 'learning_rate': 0.09166898828653511, 'network_shape': 118}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:37:29,612][0m Trial 529 finished with value: 440.58 and parameters: {'discount_factor': 0.9881222159229172, 'learning_rate': 0.08212569239040529, 'network_shape': 105}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:37:55,796][0m Trial 530 finished with value: 436.53 and parameters: {'discount_factor': 0.9542267613647056, 'learning_rate': 0.04697189342499081, 'network_shape': 101}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:38:22,291][0m Trial 531 finished with value: 437.95 and parameters: {'discount_factor': 0.9571787118256799, 'learning_rate': 0.07960408455265384, 'network_shape': 40}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:38:48,343][0m Trial 532 finished with value: 633.07 and parameters: {'discount_factor': 0.9691409907147818, 'learning_rate': 0.09448191404061987, 'network_shape': 110}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:39:14,900][0m Trial 533 finished with value: 452.44 and parameters: {'discount_factor': 0.9605222575423742, 'learning_rate': 0.07705927769369862, 'network_shape': 37}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:39:40,984][0m Trial 534 finished with value: 440.65 and parameters: {'discount_factor': 0.9814329829836561, 'learning_rate': 0.07484322528152866, 'network_shape': 107}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:40:07,310][0m Trial 535 finished with value: 449.26 and parameters: {'discount_factor': 0.9508774775096929, 'learning_rate': 0.05016754888559839, 'network_shape': 92}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:40:32,332][0m Trial 536 finished with value: 446.83 and parameters: {'discount_factor': 0.9475800834340575, 'learning_rate': 0.03610039250272352, 'network_shape': 95}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:40:56,737][0m Trial 537 finished with value: 448.54 and parameters: {'discount_factor': 0.964194429810359, 'learning_rate': 0.08813164846649411, 'network_shape': 121}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:41:23,200][0m Trial 538 finished with value: 251.68 and parameters: {'discount_factor': 0.9843156347649158, 'learning_rate': 0.09637564887220133, 'network_shape': 81}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:41:50,117][0m Trial 539 finished with value: 446.75 and parameters: {'discount_factor': 0.9562289434747696, 'learning_rate': 0.0722155188944996, 'network_shape': 128}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:42:17,527][0m Trial 540 finished with value: 444.37 and parameters: {'discount_factor': 0.952661084731869, 'learning_rate': 0.085469548490925, 'network_shape': 104}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:42:44,825][0m Trial 541 finished with value: 447.84 and parameters: {'discount_factor': 0.9667174721483895, 'learning_rate': 0.043263436477080634, 'network_shape': 61}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:43:12,572][0m Trial 542 finished with value: 252.55 and parameters: {'discount_factor': 0.9597666056110257, 'learning_rate': 0.08101613119839536, 'network_shape': 34}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:43:40,579][0m Trial 543 finished with value: 252.13 and parameters: {'discount_factor': 0.9488053828190103, 'learning_rate': 0.09437935375159258, 'network_shape': 97}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:44:06,760][0m Trial 544 finished with value: 442.91 and parameters: {'discount_factor': 0.9578228074352149, 'learning_rate': 0.07625874701842264, 'network_shape': 113}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:44:34,157][0m Trial 545 finished with value: 627.95 and parameters: {'discount_factor': 0.9711893385998536, 'learning_rate': 0.09183407489186157, 'network_shape': 117}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:45:00,066][0m Trial 546 finished with value: 435.01 and parameters: {'discount_factor': 0.9627252031913154, 'learning_rate': 0.052188979954702716, 'network_shape': 109}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:45:27,221][0m Trial 547 finished with value: 442.88 and parameters: {'discount_factor': 0.9443661628366674, 'learning_rate': 0.08937598041990369, 'network_shape': 41}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:45:52,457][0m Trial 548 finished with value: 252.01 and parameters: {'discount_factor': 0.9648548531627927, 'learning_rate': 0.09653555239747073, 'network_shape': 116}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:46:20,309][0m Trial 549 finished with value: 438.91 and parameters: {'discount_factor': 0.9514678221879173, 'learning_rate': 0.09999658010510472, 'network_shape': 69}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:46:49,737][0m Trial 550 finished with value: 438.72 and parameters: {'discount_factor': 0.9556332525678953, 'learning_rate': 0.05403536719024375, 'network_shape': 36}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:47:16,786][0m Trial 551 finished with value: 635.21 and parameters: {'discount_factor': 0.9723532667890177, 'learning_rate': 0.08382817277522481, 'network_shape': 112}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:47:42,796][0m Trial 552 finished with value: 633.75 and parameters: {'discount_factor': 0.9615277877125689, 'learning_rate': 0.07802747063488055, 'network_shape': 102}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:48:08,246][0m Trial 553 finished with value: 446.49 and parameters: {'discount_factor': 0.9537756676673353, 'learning_rate': 0.09839615428710309, 'network_shape': 100}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:48:33,162][0m Trial 554 finished with value: 444.71 and parameters: {'discount_factor': 0.9575182226090451, 'learning_rate': 0.06910884638085361, 'network_shape': 39}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:48:59,245][0m Trial 555 finished with value: 453.18 and parameters: {'discount_factor': 0.9589774309275994, 'learning_rate': 0.04126699986834846, 'network_shape': 107}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:49:25,012][0m Trial 556 finished with value: 250.87 and parameters: {'discount_factor': 0.9867035452903385, 'learning_rate': 0.09243501494394256, 'network_shape': 115}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:49:50,371][0m Trial 557 finished with value: 252.36 and parameters: {'discount_factor': 0.9480570499678462, 'learning_rate': 0.08723471609011715, 'network_shape': 96}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:50:15,693][0m Trial 558 finished with value: 252.59 and parameters: {'discount_factor': 0.9841557274242582, 'learning_rate': 0.05623705849511801, 'network_shape': 38}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:50:41,694][0m Trial 559 finished with value: 634.51 and parameters: {'discount_factor': 0.9758976818738745, 'learning_rate': 0.0941979015311661, 'network_shape': 78}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:51:07,700][0m Trial 560 finished with value: 444.74 and parameters: {'discount_factor': 0.9494686955421973, 'learning_rate': 0.09745706520404465, 'network_shape': 92}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:51:33,225][0m Trial 561 finished with value: 250.8 and parameters: {'discount_factor': 0.980642100645773, 'learning_rate': 0.07953996628653381, 'network_shape': 33}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:51:58,157][0m Trial 562 finished with value: 250.69 and parameters: {'discount_factor': 0.9524426919017271, 'learning_rate': 0.0442329644956757, 'network_shape': 103}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:52:23,217][0m Trial 563 finished with value: 445.28 and parameters: {'discount_factor': 0.9697158510068135, 'learning_rate': 0.09030592234045025, 'network_shape': 111}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:52:48,595][0m Trial 564 finished with value: 253.37 and parameters: {'discount_factor': 0.9653890828391989, 'learning_rate': 0.09499006360673302, 'network_shape': 65}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:53:14,402][0m Trial 565 finished with value: 250.77 and parameters: {'discount_factor': 0.9679932403849605, 'learning_rate': 0.09313564015122644, 'network_shape': 124}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:53:41,434][0m Trial 566 finished with value: 449.39 and parameters: {'discount_factor': 0.9614997586098963, 'learning_rate': 0.047620476681696354, 'network_shape': 108}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:54:09,497][0m Trial 567 finished with value: 633.9 and parameters: {'discount_factor': 0.9542837304961628, 'learning_rate': 0.06574883051758053, 'network_shape': 121}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:54:36,889][0m Trial 568 finished with value: 632.28 and parameters: {'discount_factor': 0.9505484580957569, 'learning_rate': 0.09645948203617614, 'network_shape': 105}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:55:03,226][0m Trial 569 finished with value: 248.28 and parameters: {'discount_factor': 0.9784059178245051, 'learning_rate': 0.09099616415744882, 'network_shape': 113}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:55:28,740][0m Trial 570 finished with value: 446.83 and parameters: {'discount_factor': 0.9819370886391858, 'learning_rate': 0.08878211554606162, 'network_shape': 114}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:55:54,879][0m Trial 571 finished with value: 424.58 and parameters: {'discount_factor': 0.9775405844361009, 'learning_rate': 0.09075615258884655, 'network_shape': 113}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:56:21,271][0m Trial 572 finished with value: 441.02 and parameters: {'discount_factor': 0.9809527130144142, 'learning_rate': 0.08510776583174133, 'network_shape': 110}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:56:46,951][0m Trial 573 finished with value: 254.4 and parameters: {'discount_factor': 0.9783749568146237, 'learning_rate': 0.08973877303847184, 'network_shape': 115}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:57:13,372][0m Trial 574 finished with value: 437.17 and parameters: {'discount_factor': 0.9783359797732503, 'learning_rate': 0.08645977769274793, 'network_shape': 111}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:57:38,567][0m Trial 575 finished with value: 440.9 and parameters: {'discount_factor': 0.9794816055608436, 'learning_rate': 0.09100303705142128, 'network_shape': 117}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:58:03,760][0m Trial 576 finished with value: 635.36 and parameters: {'discount_factor': 0.982583526690337, 'learning_rate': 0.08290771759023303, 'network_shape': 118}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:58:28,829][0m Trial 577 finished with value: 449.56 and parameters: {'discount_factor': 0.9851432411415147, 'learning_rate': 0.08828827371993783, 'network_shape': 109}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:58:53,554][0m Trial 578 finished with value: 446.47 and parameters: {'discount_factor': 0.9826436425293158, 'learning_rate': 0.04988302247536305, 'network_shape': 113}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:59:19,002][0m Trial 579 finished with value: 634.65 and parameters: {'discount_factor': 0.9757089078657809, 'learning_rate': 0.019317529183769715, 'network_shape': 107}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 15:59:44,691][0m Trial 580 finished with value: 438.97 and parameters: {'discount_factor': 0.9886717674133638, 'learning_rate': 0.04201904115722413, 'network_shape': 120}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 16:00:10,245][0m Trial 581 finished with value: 445.37 and parameters: {'discount_factor': 0.9632862463834606, 'learning_rate': 0.09186067417836574, 'network_shape': 112}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 16:00:36,154][0m Trial 582 finished with value: 252.04 and parameters: {'discount_factor': 0.9456604321822664, 'learning_rate': 0.0935579009247006, 'network_shape': 109}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 16:01:00,595][0m Trial 583 finished with value: 445.27 and parameters: {'discount_factor': 0.9797490413869896, 'learning_rate': 0.08721366300416936, 'network_shape': 115}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 16:01:23,002][0m Trial 584 finished with value: 631.46 and parameters: {'discount_factor': 0.9280910102816374, 'learning_rate': 0.08163886317605215, 'network_shape': 105}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 16:01:43,538][0m Trial 585 finished with value: 251.0 and parameters: {'discount_factor': 0.9833382823648802, 'learning_rate': 0.037891264703779425, 'network_shape': 87}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 16:02:04,501][0m Trial 586 finished with value: 634.29 and parameters: {'discount_factor': 0.9860646239718778, 'learning_rate': 0.09169971459735933, 'network_shape': 102}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 16:02:25,341][0m Trial 587 finished with value: 250.56 and parameters: {'discount_factor': 0.9847108298689085, 'learning_rate': 0.08415543709580696, 'network_shape': 118}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 16:02:46,274][0m Trial 588 finished with value: 441.89 and parameters: {'discount_factor': 0.9770345637024417, 'learning_rate': 0.0890660489432217, 'network_shape': 111}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 16:03:06,761][0m Trial 589 finished with value: 250.31 and parameters: {'discount_factor': 0.9636201209373875, 'learning_rate': 0.04633259619745527, 'network_shape': 29}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[32m[I 2022-09-19 16:03:28,075][0m Trial 590 finished with value: 445.73 and parameters: {'discount_factor': 0.9618886736939993, 'learning_rate': 0.04646045343994011, 'network_shape': 30}. Best is trial 78 with value: 246.68.[0m


  0%|          | 0/100 [00:00<?, ?it/s]

[33m[W 2022-09-19 16:03:36,422][0m Trial 591 failed because of the following error: KeyboardInterrupt()[0m
Traceback (most recent call last):
  File "/home/beast/.local/lib/python3.8/site-packages/optuna/study/_optimize.py", line 196, in _run_trial
    value_or_values = func(trial)
  File "/tmp/ipykernel_828/3544524599.py", line 12, in objective
    score = bot.train()
  File "/tmp/ipykernel_828/1221072934.py", line 70, in train
    action, lp = select_action(network, state)
  File "/tmp/ipykernel_828/1221072934.py", line 158, in select_action
    m = Categorical(action_probs)
  File "/home/beast/.local/lib/python3.8/site-packages/torch/distributions/categorical.py", line 64, in __init__
    super(Categorical, self).__init__(batch_shape, validate_args=validate_args)
  File "/home/beast/.local/lib/python3.8/site-packages/torch/distributions/distribution.py", line 54, in __init__
    if not valid.all():
KeyboardInterrupt


KeyboardInterrupt: 

In [8]:
study.best_params

{'discount_factor': 0.9589431748343439,
 'learning_rate': 0.09167455769715335,
 'network_shape': 39}

In [None]:
#discount factor for future utilities
DISCOUNT_FACTOR = 0.99

#number of episodes to run
NUM_EPISODES = 10000

#max steps per episode
MAX_STEPS = 1000

In [5]:
#Using a neural network to learn our policy parameters
class PolicyNetwork(nn.Module):
    
    #Takes in observations and outputs actions
    def __init__(self, observation_space, action_space):
        super(PolicyNetwork, self).__init__()
#         self.input_layer = 
#         self.relu = nn.ReLU(32, 32)
#         self.output_layer = nn.Linear(32, action_space)
        self.model = nn.Sequential(
            nn.Linear(observation_space, 64),
            nn.ReLU(),
            nn.Linear(64, 64),
            nn.ReLU(),
            nn.Linear(64, 64),
            nn.ReLU(),
            nn.Linear(64, action_space)
        )
    
    #forward pass
    def forward(self, x):
        #input states
#         x = self.input_layer(x)
        
#         #relu activation
#         x = F.relu(x)
        
#         #actions
#         actions = self.output_layer(x)
        actions = self.model(x)
        
        #get softmax for a probability distribution
        action_probs = F.softmax(actions, dim=1)
        # print(action_probs)
        
        return action_probs

In [6]:
def select_action(network, state):
    ''' Selects an action given current state
    Args:
    - network (Torch NN): network to process state
    - state (Array): Array of action space in an environment
    
    Return:
    - (int): action that is selected
    - (float): log probability of selecting that action given state and network
    '''
    #convert state to float tensor, add 1 dimension, allocate tensor on device
    state = ObsSpace(**state[0] if isinstance(state, tuple) else state)
    unpack_state = list(chain(state.agent, state.target, [state.velocity, state.agent_direction]))
    state = torch.Tensor(unpack_state).float().unsqueeze(0).to(DEVICE)
    
    #use network to predict action probabilities
    action_probs = network(state)
    
    #sample an action using the probability distribution
    m = Categorical(action_probs)
    action = m.sample()
    # print(m, action)
    
    #return action
    return action.item(), m.log_prob(action)

In [7]:
#Make environment
env = gym.make("policy_instances/SimpleArena-v0")

#Init network
network = PolicyNetwork(env.shape, env.action_space.n).to(DEVICE)

#Init optimizer
optimizer = optim.Adam(network.parameters(), lr=1e-4)

In [8]:
state = env.reset()
select_action(network, state)

  logger.warn(f"{pre} is not within the observation space.")


(4, tensor([-0.7832], device='cuda:0', grad_fn=<SqueezeBackward1>))

In [9]:
#track scores
scores = []

#iterate through episodes
for episode in tqdm(range(NUM_EPISODES)):
    
    #reset environment, initiable variables
    state = env.reset()
    rewards = []
    log_probs = []
    score = 0
    actions_dist = []
    
    #generate episode
    for step in range(MAX_STEPS):
        env.render()
        
        #select action
        action, lp = select_action(network, state)
        actions_dist.append(action)
        
        #execute action
        new_state, reward, done, _, _ = env.step(action)
        
        #track episode score
        score += reward
        
        #store reward and log probability
        rewards.append(reward)
        log_probs.append(lp)
        
        #end episode
        if done:
            break
        
        #move into new state
        state = new_state
    
    #append score
    scores.append(score)
    print('Score:', score)
        
    #Calculate Gt (cumulative discounted rewards)
    discounted_rewards = []
    
    #track cumulative reward
    total_r = .0
    
    #iterate rewards from Gt to G0
    for r in reversed(rewards):
        # print(type(r), type(total_r), type(DISCOUNT_FACTOR), type(total_r ** DISCOUNT_FACTOR))
        # print(r, total_r, DISCOUNT_FACTOR, total_r ** DISCOUNT_FACTOR)
        
        #Base case: G(T) = r(T)
        #Recursive: G(t) = r(t) + G(t+1)^DISCOUNT
        if total_r < 0:
            total_r = r - np.abs(total_r) ** DISCOUNT_FACTOR
        else:
            total_r = r + total_r ** DISCOUNT_FACTOR
        
        #append to discounted rewards
        discounted_rewards.append(total_r)
    
    #reverse discounted rewards
    rewards = torch.tensor(discounted_rewards).to(DEVICE)
    rewards = torch.flip(rewards, [0])
    
    #adjusting policy parameters with gradient ascent
    loss = []
    for r, lp in zip(rewards, log_probs):
        #we add a negative sign since network will perform gradient descent and we are doing gradient ascent with REINFORCE
        loss.append(-r * lp)
    
        
    #Backpropagation
    optimizer.zero_grad()
    sum(loss).backward()
    # print('Loss:', sum(loss))
    print(list(zip(*np.unique(actions_dist, return_counts=True))))
    optimizer.step()

env.close()

  0%|          | 0/10000 [00:00<?, ?it/s]

  logger.warn(f"{pre} is not within the observation space.")


Score: 142
[(1, 1), (3, 1), (4, 4)]
Score: -352
[(0, 4), (1, 16), (2, 14), (3, 34), (4, 33)]
Score: -379
[(0, 8), (1, 16), (2, 16), (3, 24), (4, 37)]
Score: 77
[(1, 3), (2, 1), (3, 6), (4, 26)]
Score: -398
[(0, 6), (1, 9), (2, 12), (3, 21), (4, 53)]
Score: -303
[(0, 7), (1, 22), (2, 11), (3, 21), (4, 40)]
Score: -386
[(0, 2), (1, 19), (2, 5), (3, 17), (4, 58)]
Score: -408
[(0, 6), (1, 8), (2, 5), (3, 29), (4, 53)]
Score: -374
[(0, 3), (1, 16), (2, 12), (3, 17), (4, 53)]
Score: -379
[(0, 7), (1, 18), (2, 10), (3, 13), (4, 53)]
Score: -402
[(0, 3), (1, 6), (2, 2), (3, 24), (4, 66)]
Score: -396
[(0, 5), (1, 12), (2, 2), (3, 14), (4, 68)]
Score: -359
[(0, 6), (1, 16), (2, 6), (3, 23), (4, 50)]
Score: -401
[(0, 4), (1, 13), (2, 7), (3, 23), (4, 54)]
Score: -381
[(0, 3), (1, 17), (2, 14), (3, 21), (4, 46)]
Score: -369
[(0, 4), (1, 13), (2, 8), (3, 22), (4, 54)]
Score: -385
[(0, 2), (1, 15), (2, 1), (3, 23), (4, 60)]
Score: -385
[(0, 5), (1, 12), (2, 10), (3, 17), (4, 57)]
Score: -429
[(0, 2)

Score: -311
[(0, 19), (1, 51), (2, 15), (3, 4), (4, 12)]
Score: -292
[(0, 19), (1, 49), (2, 7), (3, 11), (4, 15)]
Score: 52
[(0, 3), (1, 29), (2, 9), (3, 7), (4, 10)]
Score: -277
[(0, 14), (1, 44), (2, 18), (3, 9), (4, 16)]
Score: -501
[(0, 11), (1, 47), (2, 17), (3, 10), (4, 16)]
Score: -301
[(0, 14), (1, 54), (2, 7), (3, 9), (4, 17)]
Score: -317
[(0, 10), (1, 39), (2, 19), (3, 15), (4, 18)]
Score: -311
[(0, 11), (1, 50), (2, 12), (3, 16), (4, 12)]
Score: -329
[(0, 13), (1, 39), (2, 20), (3, 11), (4, 18)]
Score: -298
[(0, 11), (1, 44), (2, 19), (3, 11), (4, 16)]
Score: -25
[(0, 9), (1, 49), (2, 11), (3, 11), (4, 15)]
Score: -309
[(0, 13), (1, 48), (2, 12), (3, 14), (4, 14)]
Score: -341
[(0, 8), (1, 28), (2, 26), (3, 20), (4, 19)]
Score: -325
[(0, 11), (1, 51), (2, 17), (3, 10), (4, 12)]
Score: -323
[(0, 2), (1, 42), (2, 24), (3, 12), (4, 21)]
Score: -318
[(0, 16), (1, 54), (2, 17), (3, 6), (4, 8)]
Score: 11
[(0, 6), (1, 32), (2, 17), (3, 7), (4, 11)]
Score: -311
[(0, 16), (1, 43), (2,

Score: -322
[(0, 21), (1, 25), (2, 20), (3, 10), (4, 25)]
Score: -313
[(0, 28), (1, 25), (2, 13), (3, 12), (4, 23)]
Score: -284
[(0, 35), (1, 25), (2, 11), (3, 8), (4, 22)]
Score: -314
[(0, 27), (1, 25), (2, 13), (3, 9), (4, 27)]
Score: -303
[(0, 24), (1, 41), (2, 12), (3, 8), (4, 16)]
Score: 140
[(0, 8), (1, 7), (2, 1), (3, 1), (4, 1)]
Score: -308
[(0, 26), (1, 30), (2, 17), (3, 11), (4, 17)]
Score: -307
[(0, 20), (1, 32), (2, 12), (3, 11), (4, 26)]
Score: -301
[(0, 19), (1, 30), (2, 16), (3, 14), (4, 22)]
Score: -304
[(0, 24), (1, 39), (2, 14), (3, 11), (4, 13)]
Score: -293
[(0, 27), (1, 36), (2, 13), (3, 10), (4, 15)]
Score: -265
[(0, 32), (1, 27), (2, 13), (3, 13), (4, 16)]
Score: -265
[(0, 21), (1, 48), (2, 10), (3, 8), (4, 14)]
Score: -293
[(0, 30), (1, 38), (2, 17), (3, 10), (4, 6)]
Score: -284
[(0, 25), (1, 37), (2, 15), (3, 10), (4, 14)]
Score: -314
[(0, 37), (1, 26), (2, 14), (3, 10), (4, 14)]
Score: -326
[(0, 26), (1, 29), (2, 22), (3, 11), (4, 13)]
Score: -278
[(0, 44), (1,

Score: -323
[(0, 41), (1, 20), (2, 18), (3, 10), (4, 12)]
Score: -285
[(0, 54), (1, 18), (2, 10), (3, 7), (4, 12)]
Score: -297
[(0, 48), (1, 16), (2, 12), (3, 19), (4, 6)]
Score: -299
[(0, 53), (1, 23), (2, 11), (3, 7), (4, 7)]
Score: -283
[(0, 59), (1, 12), (2, 10), (3, 9), (4, 11)]
Score: -282
[(0, 33), (1, 18), (2, 15), (3, 20), (4, 15)]
Score: -321
[(0, 48), (1, 19), (2, 16), (3, 7), (4, 11)]
Score: -296
[(0, 47), (1, 24), (2, 13), (3, 7), (4, 10)]
Score: -266
[(0, 37), (1, 29), (2, 12), (3, 11), (4, 12)]
Score: -277
[(0, 49), (1, 17), (2, 18), (3, 8), (4, 9)]
Score: -278
[(0, 42), (1, 23), (2, 12), (3, 12), (4, 12)]
Score: 124
[(0, 10), (1, 6), (2, 3), (3, 4), (4, 4)]
Score: -279
[(0, 51), (1, 23), (2, 15), (3, 4), (4, 8)]
Score: -271
[(0, 47), (1, 23), (2, 14), (3, 8), (4, 9)]
Score: -296
[(0, 46), (1, 20), (2, 17), (3, 9), (4, 9)]
Score: -296
[(0, 49), (1, 19), (2, 16), (3, 6), (4, 11)]
Score: -283
[(0, 47), (1, 20), (2, 20), (3, 6), (4, 8)]
Score: -262
[(0, 37), (1, 27), (2, 15

Score: -276
[(0, 37), (1, 28), (2, 8), (3, 11), (4, 17)]
Score: -300
[(0, 36), (1, 23), (2, 17), (3, 8), (4, 17)]
Score: -287
[(0, 47), (1, 19), (2, 21), (3, 8), (4, 6)]
Score: -282
[(0, 47), (1, 29), (2, 13), (3, 6), (4, 6)]
Score: -261
[(0, 52), (1, 27), (2, 8), (3, 7), (4, 7)]
Score: -271
[(0, 50), (1, 23), (2, 8), (3, 12), (4, 8)]
Score: -332
[(0, 34), (1, 32), (2, 17), (3, 6), (4, 12)]
Score: -279
[(0, 50), (1, 26), (2, 12), (3, 6), (4, 7)]
Score: -285
[(0, 33), (1, 27), (2, 17), (3, 8), (4, 16)]
Score: -296
[(0, 46), (1, 28), (2, 11), (3, 8), (4, 8)]
Score: 84
[(0, 18), (1, 9), (2, 3), (3, 2), (4, 3)]
Score: -299
[(0, 44), (1, 24), (2, 13), (3, 9), (4, 11)]
Score: -293
[(0, 44), (1, 27), (2, 11), (3, 7), (4, 12)]
Score: -264
[(0, 52), (1, 23), (2, 9), (3, 9), (4, 8)]
Score: -296
[(0, 50), (1, 20), (2, 13), (3, 11), (4, 7)]
Score: -264
[(0, 65), (1, 19), (2, 5), (3, 8), (4, 4)]
Score: -301
[(0, 47), (1, 28), (2, 9), (3, 7), (4, 10)]
Score: -259
[(0, 54), (1, 22), (2, 5), (3, 11), 

Score: -260
[(0, 62), (1, 18), (2, 3), (3, 10), (4, 8)]
Score: -284
[(0, 52), (1, 14), (2, 9), (3, 17), (4, 9)]
Score: -327
[(0, 48), (1, 22), (2, 13), (3, 7), (4, 11)]
Score: -269
[(0, 43), (1, 25), (2, 15), (3, 6), (4, 12)]
Score: -289
[(0, 49), (1, 21), (2, 17), (3, 2), (4, 12)]
Score: -286
[(0, 47), (1, 28), (2, 13), (3, 5), (4, 8)]
Score: -296
[(0, 55), (1, 19), (2, 11), (3, 9), (4, 7)]
Score: -305
[(0, 46), (1, 32), (2, 12), (3, 3), (4, 8)]
Score: -252
[(0, 52), (1, 26), (2, 11), (3, 5), (4, 7)]
Score: -336
[(0, 33), (1, 22), (2, 22), (3, 12), (4, 12)]
Score: -280
[(0, 46), (1, 19), (2, 16), (3, 9), (4, 11)]
Score: -280
[(0, 45), (1, 27), (2, 12), (3, 4), (4, 13)]
Score: -306
[(0, 46), (1, 24), (2, 15), (3, 7), (4, 9)]
Score: -295
[(0, 40), (1, 16), (2, 11), (3, 17), (4, 17)]
Score: -281
[(0, 58), (1, 20), (2, 9), (3, 8), (4, 6)]
Score: -279
[(0, 42), (1, 26), (2, 15), (3, 3), (4, 15)]
Score: -314
[(0, 41), (1, 24), (2, 15), (3, 10), (4, 11)]
Score: -269
[(0, 44), (1, 19), (2, 17

Score: -253
[(0, 47), (1, 25), (2, 13), (3, 11), (4, 5)]
Score: -310
[(0, 41), (1, 19), (2, 19), (3, 10), (4, 12)]
Score: -257
[(0, 57), (1, 28), (2, 7), (3, 6), (4, 3)]
Score: -302
[(0, 56), (1, 17), (2, 9), (3, 7), (4, 12)]
Score: -315
[(0, 44), (1, 16), (2, 14), (3, 13), (4, 14)]
Score: -265
[(0, 54), (1, 20), (2, 9), (3, 7), (4, 11)]
Score: -316
[(0, 49), (1, 13), (2, 16), (3, 8), (4, 15)]
Score: -289
[(0, 56), (1, 20), (2, 13), (3, 6), (4, 6)]
Score: -262
[(0, 49), (1, 28), (2, 8), (3, 5), (4, 11)]
Score: -278
[(0, 50), (1, 21), (2, 14), (3, 9), (4, 7)]
Score: -271
[(0, 46), (1, 21), (2, 12), (3, 12), (4, 10)]
Score: 60
[(0, 49), (1, 12), (2, 11), (3, 12), (4, 12)]
Score: -306
[(0, 48), (1, 16), (2, 21), (3, 8), (4, 8)]
Score: -317
[(0, 50), (1, 21), (2, 13), (3, 7), (4, 10)]
Score: -284
[(0, 47), (1, 17), (2, 13), (3, 9), (4, 15)]
Score: -282
[(0, 56), (1, 20), (2, 12), (3, 6), (4, 7)]
Score: -372
[(0, 38), (1, 18), (2, 17), (3, 16), (4, 12)]
Score: -269
[(0, 58), (1, 19), (2, 10

[(0, 59), (1, 18), (2, 8), (3, 5), (4, 11)]
Score: -274
[(0, 42), (1, 17), (2, 22), (3, 9), (4, 11)]
Score: -260
[(0, 49), (1, 19), (2, 7), (3, 7), (4, 19)]
Score: -321
[(0, 52), (1, 15), (2, 17), (3, 9), (4, 8)]
Score: -264
[(0, 54), (1, 20), (2, 9), (3, 9), (4, 9)]
Score: -300
[(0, 42), (1, 24), (2, 12), (3, 10), (4, 13)]
Score: -278
[(0, 55), (1, 19), (2, 11), (3, 4), (4, 12)]
Score: -307
[(0, 53), (1, 14), (2, 13), (3, 11), (4, 10)]
Score: -297
[(0, 53), (1, 15), (2, 14), (3, 7), (4, 12)]
Score: -263
[(0, 44), (1, 19), (2, 15), (3, 12), (4, 11)]
Score: -304
[(0, 49), (1, 17), (2, 13), (3, 12), (4, 10)]
Score: -283
[(0, 51), (1, 23), (2, 11), (3, 5), (4, 11)]
Score: -246
[(0, 57), (1, 23), (2, 7), (3, 5), (4, 9)]
Score: -239
[(0, 53), (1, 27), (2, 8), (3, 6), (4, 7)]
Score: -270
[(0, 56), (1, 15), (2, 11), (3, 9), (4, 10)]
Score: 58
[(0, 18), (1, 5), (2, 2), (4, 6)]
Score: -271
[(0, 55), (1, 26), (2, 13), (3, 5), (4, 2)]
Score: -269
[(0, 41), (1, 21), (2, 14), (3, 12), (4, 13)]
Scor

Score: -253
[(0, 55), (1, 14), (2, 11), (3, 10), (4, 11)]
Score: -281
[(0, 55), (1, 20), (2, 9), (3, 8), (4, 9)]
Score: -308
[(0, 49), (1, 19), (2, 12), (3, 15), (4, 6)]
Score: -318
[(0, 48), (1, 22), (2, 15), (3, 7), (4, 9)]
Score: -278
[(0, 52), (1, 24), (2, 9), (3, 8), (4, 8)]
Score: -293
[(0, 55), (1, 16), (2, 11), (3, 8), (4, 11)]
Score: -283
[(0, 64), (1, 14), (2, 7), (3, 3), (4, 13)]
Score: -256
[(0, 55), (1, 23), (2, 11), (3, 5), (4, 7)]
Score: -299
[(0, 41), (1, 29), (2, 16), (3, 6), (4, 9)]
Score: -274
[(0, 54), (1, 17), (2, 13), (3, 8), (4, 9)]
Score: -292
[(0, 48), (1, 20), (2, 13), (3, 6), (4, 14)]
Score: -295
[(0, 54), (1, 19), (2, 11), (3, 8), (4, 9)]
Score: -313
[(0, 47), (1, 21), (2, 14), (3, 8), (4, 11)]
Score: -308
[(0, 48), (1, 16), (2, 15), (3, 8), (4, 14)]
Score: -292
[(0, 39), (1, 22), (2, 18), (3, 9), (4, 13)]
Score: -287
[(0, 54), (1, 17), (2, 9), (3, 13), (4, 8)]
Score: -275
[(0, 53), (1, 30), (2, 5), (3, 5), (4, 8)]
Score: -303
[(0, 61), (1, 18), (2, 11), (3,

Score: -258
[(0, 45), (1, 35), (2, 10), (3, 6), (4, 5)]
Score: -323
[(0, 41), (1, 30), (2, 17), (3, 3), (4, 10)]
Score: -245
[(0, 48), (1, 35), (2, 7), (3, 3), (4, 8)]
Score: -261
[(0, 44), (1, 39), (2, 13), (3, 2), (4, 3)]
Score: -268
[(0, 47), (1, 33), (2, 10), (3, 4), (4, 7)]
Score: -276
[(0, 53), (1, 30), (2, 8), (3, 6), (4, 4)]
Score: -288
[(0, 37), (1, 31), (2, 10), (3, 9), (4, 14)]
Score: -264
[(0, 51), (1, 33), (2, 2), (3, 10), (4, 5)]
Score: -283
[(0, 47), (1, 22), (2, 17), (3, 10), (4, 5)]
Score: -246
[(0, 42), (1, 29), (2, 15), (3, 9), (4, 6)]
Score: -253
[(0, 55), (1, 30), (2, 8), (3, 3), (4, 5)]
Score: -239
[(0, 48), (1, 37), (2, 4), (3, 8), (4, 4)]
Score: -281
[(0, 49), (1, 26), (2, 11), (3, 6), (4, 9)]
Score: -248
[(0, 52), (1, 34), (2, 9), (3, 4), (4, 2)]
Score: -256
[(0, 43), (1, 34), (2, 8), (3, 8), (4, 8)]
Score: -288
[(0, 50), (1, 37), (2, 5), (3, 3), (4, 6)]
Score: -272
[(0, 49), (1, 30), (2, 9), (3, 5), (4, 8)]
Score: -280
[(0, 40), (1, 34), (2, 13), (3, 9), (4, 5

Score: -278
[(0, 50), (1, 33), (2, 6), (3, 6), (4, 6)]
Score: -249
[(0, 50), (1, 31), (2, 12), (3, 4), (4, 4)]
Score: -268
[(0, 41), (1, 41), (2, 10), (3, 5), (4, 4)]
Score: -294
[(0, 52), (1, 29), (2, 10), (3, 3), (4, 7)]
Score: -269
[(0, 49), (1, 30), (2, 9), (3, 9), (4, 4)]
Score: -263
[(0, 46), (1, 26), (2, 12), (3, 8), (4, 9)]
Score: -234
[(0, 49), (1, 34), (2, 8), (3, 2), (4, 8)]
Score: -273
[(0, 50), (1, 26), (2, 13), (3, 7), (4, 5)]
Score: -272
[(0, 41), (1, 28), (2, 13), (3, 9), (4, 10)]
Score: 29
[(0, 35), (1, 18), (2, 11), (3, 2), (4, 7)]
Score: -303
[(0, 45), (1, 23), (2, 16), (3, 6), (4, 11)]
Score: -242
[(0, 41), (1, 30), (2, 11), (3, 11), (4, 8)]
Score: -301
[(0, 47), (1, 22), (2, 10), (3, 11), (4, 11)]
Score: -239
[(0, 47), (1, 34), (2, 8), (3, 6), (4, 6)]
Score: -241
[(0, 53), (1, 28), (2, 10), (3, 6), (4, 4)]
Score: -306
[(0, 44), (1, 23), (2, 19), (3, 10), (4, 5)]
Score: -300
[(0, 56), (1, 21), (2, 11), (3, 9), (4, 4)]
Score: -269
[(0, 51), (1, 34), (2, 11), (3, 3), 

Score: -292
[(0, 43), (1, 21), (2, 15), (3, 5), (4, 17)]
Score: -272
[(0, 50), (1, 15), (2, 18), (3, 13), (4, 5)]
Score: -323
[(0, 54), (1, 14), (2, 19), (3, 5), (4, 9)]
Score: -253
[(0, 48), (1, 25), (2, 14), (3, 7), (4, 7)]
Score: -317
[(0, 49), (1, 18), (2, 15), (3, 10), (4, 9)]
Score: -282
[(0, 60), (1, 13), (2, 12), (3, 9), (4, 7)]
Score: -285
[(0, 60), (1, 16), (2, 13), (3, 6), (4, 6)]
Score: -302
[(0, 48), (1, 19), (2, 14), (3, 12), (4, 8)]
Score: -289
[(0, 64), (1, 15), (2, 9), (3, 8), (4, 5)]
Score: -235
[(0, 52), (1, 19), (2, 15), (3, 7), (4, 8)]
Score: -243
[(0, 57), (1, 22), (2, 6), (3, 13), (4, 3)]
Score: -272
[(0, 58), (1, 12), (2, 10), (3, 8), (4, 13)]
Score: -294
[(0, 60), (1, 8), (2, 16), (3, 11), (4, 6)]
Score: -275
[(0, 57), (1, 16), (2, 15), (3, 6), (4, 7)]
Score: -280
[(0, 59), (1, 14), (2, 15), (3, 7), (4, 6)]
Score: -299
[(0, 61), (1, 11), (2, 13), (3, 10), (4, 6)]
Score: -293
[(0, 56), (1, 15), (2, 8), (3, 12), (4, 10)]
Score: -255
[(0, 58), (1, 23), (2, 9), (3,

Score: -289
[(0, 58), (1, 11), (2, 8), (3, 10), (4, 14)]
Score: 34
[(0, 17), (1, 6), (2, 12), (3, 8), (4, 12)]
Score: -307
[(0, 46), (1, 19), (2, 12), (3, 12), (4, 12)]
Score: -311
[(0, 45), (1, 18), (2, 19), (3, 11), (4, 8)]
Score: -290
[(0, 57), (1, 13), (2, 14), (3, 12), (4, 5)]
Score: -299
[(0, 58), (1, 12), (2, 16), (3, 7), (4, 8)]
Score: -321
[(0, 52), (1, 15), (2, 15), (3, 10), (4, 9)]
Score: -292
[(0, 38), (1, 18), (2, 15), (3, 14), (4, 16)]
Score: -337
[(0, 43), (1, 18), (2, 15), (3, 11), (4, 14)]
Score: -274
[(0, 57), (1, 13), (2, 8), (3, 11), (4, 12)]
Score: -292
[(0, 57), (1, 17), (2, 12), (3, 7), (4, 8)]
Score: -280
[(0, 55), (1, 18), (2, 10), (3, 7), (4, 11)]
Score: -319
[(0, 55), (1, 12), (2, 14), (3, 7), (4, 13)]
Score: -294
[(0, 56), (1, 11), (2, 14), (3, 13), (4, 7)]
Score: -286
[(0, 49), (1, 15), (2, 16), (3, 13), (4, 8)]
Score: -252
[(0, 50), (1, 14), (2, 11), (3, 10), (4, 16)]
Score: -246
[(0, 71), (1, 11), (2, 7), (3, 9), (4, 3)]
Score: -280
[(0, 57), (1, 15), (2,

KeyboardInterrupt: 

In [None]:
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
import seaborn as sns
import numpy as np

sns.set()

plt.plot(scores)
plt.ylabel('score')
plt.xlabel('episodes')
plt.title('Training score of SimpleArena with REINFORCE')

reg = LinearRegression().fit(np.arange(len(scores)).reshape(-1, 1), np.array(scores).reshape(-1, 1))
y_pred = reg.predict(np.arange(len(scores)).reshape(-1, 1))
plt.plot(y_pred)
plt.show()

In [None]:
done = False
state = env.reset()
scores = []

for _ in range(10):
    state = env.reset()
    done = False
    score = 0
    while not done:
        # env.render()
        action = env.action_space.sample()
        new_state, reward, done, info, _ = env.step(action)
        score += reward
        state = new_state
    scores.append(score)
env.close()

In [None]:
done = False
state = env.reset()
scores = []

for _ in tqdm(range(50)):
    state = env.reset()
    done = False
    score = 0
    while not done:
        env.render()
        action, lp = select_action(network, state)
        new_state, reward, done, info, _ = env.step(action)
        print(state, action)
        score += reward
        state = new_state
    scores.append(score)
env.close()

In [None]:
np.array(scores).mean()

In [None]:
done = False
state = env.reset()
scores = []

for _ in tqdm(range(50)):
    state = env.reset()
    done = False
    score = 0
    while not done:
        env.render()
        action, lp = select_action(network, state)
        new_state, reward, done, info, _ = env.step(action)
        print(state, action)
        score += reward
        state = new_state
    scores.append(score)
env.close()