In [7]:
import gym
import pylab
import random
import numpy as np
from collections import deque
import tflearn
import tensorflow as tf
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation
from keras.optimizers import sgd


gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.333)

## Task: fill empty spaces in the following agent code

In [21]:
class DeepQAgent:
    def __init__(self, state_size, action_size, render=True):
        # Tip: if you are training this on AWS the best way is to turn off rendering
        # and load it later with the serialized model
        self.render = render
        self.state_size = state_size
        self.action_size = action_size

        self.discount_factor = 0.99
        self.learning_rate = 0.1
        self.epsilon = 1.0
        self.epsilon_min = 0.005
        self.epsilon_decay = (self.epsilon - self.epsilon_min) / 50000
        self.batch_size = 64
        self.train_start = 1000
        # replay memory
        self.memory = deque(maxlen=10000)

        self.model = self.build_model()
        self.target_model = self.build_model()
        self.update_target_model()

    def build_model(self):
        model = Sequential()
        model.add(Dense(10, activation='relu', input_dim = self.state_size))
        model.add(Dense(10, activation='relu'))
        model.add(Dense(self.action_size))
        
        model.compile(sgd(lr=self.learning_rate), "mse")
        return model

    def update_target_model(self):
        print("update")
        """Update your target model to the model you are currently learning at regular time intervals"""        
        self.target_model.set_weights(self.model.get_weights())


    def get_action(self, state):
        """The choice of action uses the epsilon-greedy policy for the current network."""
        if np.random.rand() <= self.epsilon:
            #if (state[0][0] < -0.6 and state[0][1] < -0.01):
            #    return 0
            #elif (state[0][0] > -0.4 and state[0][1] > 0.01):
            #    return 1
            #elif (state[0][0] < -0.6 and state[0][1] > -0.01):
            #    return 1
            #elif (state[0][0] > -0.4 and state[0][1] < 0.01):
            #    return 0
            #elif (state[0][0] > -0.6 and state[0][0] < -0.4 and state[0][1] < 0):
            #    return 0
            #elif (state[0][0] > -0.6 and state[0][0] < -0.4 and state[0][1] > 0):
            #    return 1
            
            return random.randrange(self.action_size)
        else:
            q_value = self.model.predict(state)
            return np.argmax(q_value[0])

    def replay_memory(self, state, action, reward, next_state, done):
        """Save <s, a, r, s'> to replay_memory"""
        if action == 2:
            action = 1
        self.memory.append((state, action, reward, next_state, done))
        if self.epsilon > self.epsilon_min:
            self.epsilon -= self.epsilon_decay
            # print(len(self.memory))

    def train_replay(self):
        """Random sampling of batch_size samples from replay memory"""
        if len(self.memory) < self.train_start:
            return
        batch_size = min(self.batch_size, len(self.memory))
        mini_batch = random.sample(self.memory, batch_size)

        update_input = np.zeros((batch_size, self.state_size))
        update_target = np.zeros((batch_size, self.action_size))

        for i in range(batch_size):
            state, action, reward, next_state, done = mini_batch[i]
            target = self.model.predict(state)[0]
            #print(state, action, reward, next_state, done)

            # As in queuing, it gets the maximum Q Value at s'. However, it is imported from the target model.
            if done:
                target[action] = reward
            else:
                target[action] = reward + self.discount_factor * \
                                          np.amax(self.target_model.predict(next_state)[0])
                #print('new target action', reward, self.discount_factor, np.amax(self.target_model.predict(next_state)[0]))
                #print(next_state, np.amax(self.target_model.predict(next_state)[0]))
                
            update_input[i] = state
            update_target[i] = target

        #print('input', update_input)
        #print('target', update_target)
        # You can create a minibatch of the correct target answer and the current value of your own,
        self.model.fit(update_input, update_target, batch_size=batch_size, epochs=1, verbose=0)

    def load_model(self, name):
        self.model.load_model(name)

    def save_model(self, name):
        self.model.save(name)


In [22]:
env = gym.make('MountainCar-v0')
state_size = env.observation_space.shape[0] # should be equal 2
ACTION_SIZE = 2
agent = DeepQAgent(state_size, ACTION_SIZE)
#agent.load_model("./save_model/model")
scores, episodes = [], []
N_EPISODES = 4000

update


In [23]:
for e in range(N_EPISODES):
    done = False
    score = 0
    state = env.reset()
    state = np.reshape(state, [1, state_size])
    print("state", state)

    # Action 0 (left), 1 (do nothing), 3 (declare fake_action to avoid doing nothing
    fake_action = 0

    # Counter for the same action 4 times
    action_count = 0

    while not done:
        #if agent.render:
        #    env.render()

        # Select an action in the current state and proceed to a step
        action_count = action_count + 1

        if action_count == 4:
            action = agent.get_action(state)
            action_count = 0

            if action == 0:
                fake_action = 0
            elif action == 1:
                fake_action = 2

        # Take 1 step with the selected action
        next_state, reward, done, info = env.step(fake_action)
        next_state = np.reshape(next_state, [1, state_size])
        #print('env: ', next_state, reward, done, info)
        # Give a penalty of -100 for actions that end an episode
        # reward = reward if not done else -100

        # Save <s, a, r, s'> to replay memory
        agent.replay_memory(state, fake_action, reward, next_state, done)
        # Continue to learn every time step
        agent.train_replay()
        score += reward
        state = next_state

        if done:
            env.reset()
            # Copy the learning model for each episode to the target model
            agent.update_target_model()

            # For each episode, the time step where cartpole stood is plot
            scores.append(score)
            episodes.append(e)
            print("episode:", e, "  score:", score, "  memory length:", len(agent.memory),
                  "  epsilon:", agent.epsilon)

    # Save model for every 50 episodes
    if e % 50 == 0:
        agent.save_model("./save_model/model")

state [[-0.42803098  0.        ]]
update
episode: 0   score: -200.0   memory length: 200   epsilon: 0.9960200000000077
state [[-0.48558155  0.        ]]
update
episode: 1   score: -200.0   memory length: 400   epsilon: 0.9920400000000154
state [[-0.52362841  0.        ]]
update
episode: 2   score: -200.0   memory length: 600   epsilon: 0.988060000000023
state [[-0.51127663  0.        ]]
update
episode: 3   score: -200.0   memory length: 800   epsilon: 0.9840800000000307
state [[-0.4928123  0.       ]]
update
episode: 4   score: -200.0   memory length: 1000   epsilon: 0.9801000000000384
state [[-0.58122748  0.        ]]
update
episode: 5   score: -200.0   memory length: 1200   epsilon: 0.9761200000000461
state [[-0.57579039  0.        ]]
update
episode: 6   score: -200.0   memory length: 1400   epsilon: 0.9721400000000537
state [[-0.43192112  0.        ]]
update
episode: 7   score: -200.0   memory length: 1600   epsilon: 0.9681600000000614
state [[-0.51513039  0.        ]]
update
episod

update
episode: 68   score: -200.0   memory length: 10000   epsilon: 0.7253800000005297
state [[-0.54394262  0.        ]]
update
episode: 69   score: -200.0   memory length: 10000   epsilon: 0.7214000000005374
state [[-0.49639544  0.        ]]
update
episode: 70   score: -200.0   memory length: 10000   epsilon: 0.7174200000005451
state [[-0.56452713  0.        ]]
update
episode: 71   score: -200.0   memory length: 10000   epsilon: 0.7134400000005527
state [[-0.48023568  0.        ]]
update
episode: 72   score: -200.0   memory length: 10000   epsilon: 0.7094600000005604
state [[-0.52009665  0.        ]]
update
episode: 73   score: -200.0   memory length: 10000   epsilon: 0.7054800000005681
state [[-0.47839659  0.        ]]
update
episode: 74   score: -200.0   memory length: 10000   epsilon: 0.7015000000005758
state [[-0.53833185  0.        ]]
update
episode: 75   score: -200.0   memory length: 10000   epsilon: 0.6975200000005835
state [[-0.55118592  0.        ]]
update
episode: 76   sco

update
episode: 136   score: -200.0   memory length: 10000   epsilon: 0.45474000000092546
state [[-0.54082439  0.        ]]
update
episode: 137   score: -200.0   memory length: 10000   epsilon: 0.45076000000092203
state [[-0.53546547  0.        ]]
update
episode: 138   score: -128.0   memory length: 10000   epsilon: 0.44821280000091984
state [[-0.55867813  0.        ]]
update
episode: 139   score: -200.0   memory length: 10000   epsilon: 0.4442328000009164
state [[-0.58292479  0.        ]]
update
episode: 140   score: -200.0   memory length: 10000   epsilon: 0.440252800000913
state [[-0.40926182  0.        ]]
update
episode: 141   score: -200.0   memory length: 10000   epsilon: 0.43627280000090957
state [[-0.5487637  0.       ]]
update
episode: 142   score: -200.0   memory length: 10000   epsilon: 0.43229280000090614
state [[-0.45618541  0.        ]]
update
episode: 143   score: -200.0   memory length: 10000   epsilon: 0.4283128000009027
state [[-0.54887082  0.        ]]
update
episode

update
episode: 203   score: -200.0   memory length: 10000   epsilon: 0.19110480000078073
state [[-0.42772852  0.        ]]
update
episode: 204   score: -200.0   memory length: 10000   epsilon: 0.18712480000078285
state [[-0.4944659  0.       ]]
update
episode: 205   score: -200.0   memory length: 10000   epsilon: 0.18314480000078498
state [[-0.53765654  0.        ]]
update
episode: 206   score: -200.0   memory length: 10000   epsilon: 0.1791648000007871
state [[-0.59688304  0.        ]]
update
episode: 207   score: -200.0   memory length: 10000   epsilon: 0.17518480000078923
state [[-0.59509041  0.        ]]
update
episode: 208   score: -200.0   memory length: 10000   epsilon: 0.17120480000079136
state [[-0.58594207  0.        ]]
update
episode: 209   score: -200.0   memory length: 10000   epsilon: 0.16722480000079348
state [[-0.53616726  0.        ]]
update
episode: 210   score: -200.0   memory length: 10000   epsilon: 0.1632448000007956
state [[-0.42653875  0.        ]]
update
episo

update
episode: 269   score: -191.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.50217368  0.        ]]
update
episode: 270   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.5870195  0.       ]]
update
episode: 271   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.53914996  0.        ]]
update
episode: 272   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.41121877  0.        ]]
update
episode: 273   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.5841355  0.       ]]
update
episode: 274   score: -159.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.51345778  0.        ]]
update
episode: 275   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.54837095  0.        ]]
update
episode: 276   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.57254255  0.        ]]
upda

update
episode: 335   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.57199039  0.        ]]
update
episode: 336   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.55753836  0.        ]]
update
episode: 337   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.45335333  0.        ]]
update
episode: 338   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.54936182  0.        ]]
update
episode: 339   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.52791201  0.        ]]
update
episode: 340   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.46162354  0.        ]]
update
episode: 341   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.57104564  0.        ]]
update
episode: 342   score: -184.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.4012191  0.       ]]
up

update
episode: 401   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.55378966  0.        ]]
update
episode: 402   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.40634881  0.        ]]
update
episode: 403   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.43959993  0.        ]]
update
episode: 404   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.51097708  0.        ]]
update
episode: 405   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.50776935  0.        ]]
update
episode: 406   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.54642151  0.        ]]
update
episode: 407   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.57973246  0.        ]]
update
episode: 408   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.55636129  0.        ]]


update
episode: 467   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.59988603  0.        ]]
update
episode: 468   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.43567424  0.        ]]
update
episode: 469   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.58887723  0.        ]]
update
episode: 470   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.40176306  0.        ]]
update
episode: 471   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.59335  0.     ]]
update
episode: 472   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.58463847  0.        ]]
update
episode: 473   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.54026768  0.        ]]
update
episode: 474   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.46441028  0.        ]]
update

update
episode: 533   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.42790425  0.        ]]
update
episode: 534   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.55681002  0.        ]]
update
episode: 535   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.59689846  0.        ]]
update
episode: 536   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.55316531  0.        ]]
update
episode: 537   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.55789432  0.        ]]
update
episode: 538   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.49946803  0.        ]]
update
episode: 539   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.50272292  0.        ]]
update
episode: 540   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.4957274  0.       ]]
up

update
episode: 599   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.56923893  0.        ]]
update
episode: 600   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.55647799  0.        ]]
update
episode: 601   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.53150098  0.        ]]
update
episode: 602   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.58167597  0.        ]]
update
episode: 603   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.4419785  0.       ]]
update
episode: 604   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.48248982  0.        ]]
update
episode: 605   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.46325491  0.        ]]
update
episode: 606   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.54189035  0.        ]]
up

update
episode: 665   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.48657138  0.        ]]
update
episode: 666   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.574215  0.      ]]
update
episode: 667   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.47174772  0.        ]]
update
episode: 668   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.47173704  0.        ]]
update
episode: 669   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.52251062  0.        ]]
update
episode: 670   score: -177.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.53217009  0.        ]]
update
episode: 671   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.56934123  0.        ]]
update
episode: 672   score: -140.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.44555893  0.        ]]
upda

update
episode: 731   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.55325491  0.        ]]
update
episode: 732   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.51999754  0.        ]]
update
episode: 733   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.56128605  0.        ]]
update
episode: 734   score: -136.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.45064133  0.        ]]
update
episode: 735   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.50028325  0.        ]]
update
episode: 736   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.59627373  0.        ]]
update
episode: 737   score: -126.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.59326755  0.        ]]
update
episode: 738   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.56208127  0.        ]]


update
episode: 797   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.58204716  0.        ]]
update
episode: 798   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.58440253  0.        ]]
update
episode: 799   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.53092257  0.        ]]
update
episode: 800   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.57260606  0.        ]]
update
episode: 801   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.59469645  0.        ]]
update
episode: 802   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.52213713  0.        ]]
update
episode: 803   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.53957092  0.        ]]
update
episode: 804   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.59983137  0.        ]]


update
episode: 863   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.57199891  0.        ]]
update
episode: 864   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.48510259  0.        ]]
update
episode: 865   score: -172.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.4815167  0.       ]]
update
episode: 866   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.43483593  0.        ]]
update
episode: 867   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.5750996  0.       ]]
update
episode: 868   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.50737122  0.        ]]
update
episode: 869   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.5858541  0.       ]]
update
episode: 870   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.55248211  0.        ]]
update

update
episode: 929   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.51379785  0.        ]]
update
episode: 930   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.46140676  0.        ]]
update
episode: 931   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.53355337  0.        ]]
update
episode: 932   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.41439907  0.        ]]
update
episode: 933   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.47147852  0.        ]]
update
episode: 934   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.41749442  0.        ]]
update
episode: 935   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.468108  0.      ]]
update
episode: 936   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.52826403  0.        ]]
upda

update
episode: 995   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.55923052  0.        ]]
update
episode: 996   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.52021725  0.        ]]
update
episode: 997   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.56278338  0.        ]]
update
episode: 998   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.4384235  0.       ]]
update
episode: 999   score: -199.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.42219547  0.        ]]
update
episode: 1000   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.56943596  0.        ]]
update
episode: 1001   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.58845666  0.        ]]
update
episode: 1002   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.48475285  0.        ]]

update
episode: 1061   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.45233662  0.        ]]
update
episode: 1062   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.42511963  0.        ]]
update
episode: 1063   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.54716375  0.        ]]
update
episode: 1064   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.40983825  0.        ]]
update
episode: 1065   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.53589392  0.        ]]
update
episode: 1066   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.4802474  0.       ]]
update
episode: 1067   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.41375795  0.        ]]
update
episode: 1068   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.52851688  0.     

update
episode: 1127   score: -162.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.55108433  0.        ]]
update
episode: 1128   score: -140.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.56588863  0.        ]]
update
episode: 1129   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.55396209  0.        ]]
update
episode: 1130   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.47804428  0.        ]]
update
episode: 1131   score: -161.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.52790062  0.        ]]
update
episode: 1132   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.58326687  0.        ]]
update
episode: 1133   score: -155.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.48399131  0.        ]]
update
episode: 1134   score: -183.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.43941092  0.   

update
episode: 1193   score: -156.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.50993563  0.        ]]
update
episode: 1194   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.45412547  0.        ]]
update
episode: 1195   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.54006532  0.        ]]
update
episode: 1196   score: -157.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.56188958  0.        ]]
update
episode: 1197   score: -130.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.41250039  0.        ]]
update
episode: 1198   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.54580028  0.        ]]
update
episode: 1199   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.45263484  0.        ]]
update
episode: 1200   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.51721058  0.   

update
episode: 1259   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.48343998  0.        ]]
update
episode: 1260   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.44533424  0.        ]]
update
episode: 1261   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.58525295  0.        ]]
update
episode: 1262   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.53249951  0.        ]]
update
episode: 1263   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.44455849  0.        ]]
update
episode: 1264   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.58827809  0.        ]]
update
episode: 1265   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.42131315  0.        ]]
update
episode: 1266   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.42018146  0.   

update
episode: 1325   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.49964366  0.        ]]
update
episode: 1326   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.42175894  0.        ]]
update
episode: 1327   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.58901375  0.        ]]
update
episode: 1328   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.40515068  0.        ]]
update
episode: 1329   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.55509846  0.        ]]
update
episode: 1330   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.53889527  0.        ]]
update
episode: 1331   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.55498867  0.        ]]
update
episode: 1332   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.59827213  0.   

update
episode: 1391   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.52562994  0.        ]]
update
episode: 1392   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.46539611  0.        ]]
update
episode: 1393   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.53242463  0.        ]]
update
episode: 1394   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.57113607  0.        ]]
update
episode: 1395   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.58262712  0.        ]]
update
episode: 1396   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.56214246  0.        ]]
update
episode: 1397   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.48136895  0.        ]]
update
episode: 1398   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.58247591  0.   

update
episode: 1457   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.56288184  0.        ]]
update
episode: 1458   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.43025858  0.        ]]
update
episode: 1459   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.4244341  0.       ]]
update
episode: 1460   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.58685548  0.        ]]
update
episode: 1461   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.50694807  0.        ]]
update
episode: 1462   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.58837476  0.        ]]
update
episode: 1463   score: -196.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.44913485  0.        ]]
update
episode: 1464   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.40890088  0.     

update
episode: 1523   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.45679095  0.        ]]
update
episode: 1524   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.50369346  0.        ]]
update
episode: 1525   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.54612088  0.        ]]
update
episode: 1526   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.57383493  0.        ]]
update
episode: 1527   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.42831302  0.        ]]
update
episode: 1528   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.58456046  0.        ]]
update
episode: 1529   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.53731712  0.        ]]
update
episode: 1530   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.48502683  0.   

update
episode: 1589   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.56086783  0.        ]]
update
episode: 1590   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.43228693  0.        ]]
update
episode: 1591   score: -158.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.43193149  0.        ]]
update
episode: 1592   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.59521102  0.        ]]
update
episode: 1593   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.50552487  0.        ]]
update
episode: 1594   score: -133.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.52211448  0.        ]]
update
episode: 1595   score: -156.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.59805778  0.        ]]
update
episode: 1596   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.47092121  0.   

update
episode: 1655   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.44293486  0.        ]]
update
episode: 1656   score: -107.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.5193168  0.       ]]
update
episode: 1657   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.59411229  0.        ]]
update
episode: 1658   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.58057737  0.        ]]
update
episode: 1659   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.59275501  0.        ]]
update
episode: 1660   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.45590618  0.        ]]
update
episode: 1661   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.52856347  0.        ]]
update
episode: 1662   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.57933113  0.     

update
episode: 1721   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.45891705  0.        ]]
update
episode: 1722   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.47894296  0.        ]]
update
episode: 1723   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.5115936  0.       ]]
update
episode: 1724   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.57084626  0.        ]]
update
episode: 1725   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.45516223  0.        ]]
update
episode: 1726   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.58849073  0.        ]]
update
episode: 1727   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.44962724  0.        ]]
update
episode: 1728   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.46293813  0.     

update
episode: 1787   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.58113275  0.        ]]
update
episode: 1788   score: -193.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.52926177  0.        ]]
update
episode: 1789   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.56938752  0.        ]]
update
episode: 1790   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.42684107  0.        ]]
update
episode: 1791   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.55723224  0.        ]]
update
episode: 1792   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.59699735  0.        ]]
update
episode: 1793   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.48556756  0.        ]]
update
episode: 1794   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.4909592  0.    

update
episode: 1853   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.43507838  0.        ]]
update
episode: 1854   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.40358507  0.        ]]
update
episode: 1855   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.4031329  0.       ]]
update
episode: 1856   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.46237389  0.        ]]
update
episode: 1857   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.51658399  0.        ]]
update
episode: 1858   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.53263269  0.        ]]
update
episode: 1859   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.40435894  0.        ]]
update
episode: 1860   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.40107079  0.     

update
episode: 1919   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.41809093  0.        ]]
update
episode: 1920   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.51210848  0.        ]]
update
episode: 1921   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.59986102  0.        ]]
update
episode: 1922   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.40541015  0.        ]]
update
episode: 1923   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.43828052  0.        ]]
update
episode: 1924   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.59002869  0.        ]]
update
episode: 1925   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.56629818  0.        ]]
update
episode: 1926   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.54816221  0.   

update
episode: 1985   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.47330608  0.        ]]
update
episode: 1986   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.44856346  0.        ]]
update
episode: 1987   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.46585654  0.        ]]
update
episode: 1988   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.45901262  0.        ]]
update
episode: 1989   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.56728149  0.        ]]
update
episode: 1990   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.52000421  0.        ]]
update
episode: 1991   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.55963122  0.        ]]
update
episode: 1992   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.54545305  0.   

update
episode: 2051   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.50899928  0.        ]]
update
episode: 2052   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.51296242  0.        ]]
update
episode: 2053   score: -197.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.49755775  0.        ]]
update
episode: 2054   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.43799974  0.        ]]
update
episode: 2055   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.48580741  0.        ]]
update
episode: 2056   score: -129.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.44040782  0.        ]]
update
episode: 2057   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.54623918  0.        ]]
update
episode: 2058   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.48317033  0.   

update
episode: 2117   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.55943908  0.        ]]
update
episode: 2118   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.56795671  0.        ]]
update
episode: 2119   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.54676607  0.        ]]
update
episode: 2120   score: -138.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.54257987  0.        ]]
update
episode: 2121   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.54332531  0.        ]]
update
episode: 2122   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.50223835  0.        ]]
update
episode: 2123   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.46939901  0.        ]]
update
episode: 2124   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.52513253  0.   

update
episode: 2183   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.48266474  0.        ]]
update
episode: 2184   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.506576  0.      ]]
update
episode: 2185   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.56968589  0.        ]]
update
episode: 2186   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.53858257  0.        ]]
update
episode: 2187   score: -122.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.40575498  0.        ]]
update
episode: 2188   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.4302739  0.       ]]
update
episode: 2189   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.56018549  0.        ]]
update
episode: 2190   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.51848645  0.        ]

update
episode: 2249   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.52960775  0.        ]]
update
episode: 2250   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.54923451  0.        ]]
update
episode: 2251   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.59293082  0.        ]]
update
episode: 2252   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.45200958  0.        ]]
update
episode: 2253   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.55017376  0.        ]]
update
episode: 2254   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.5119067  0.       ]]
update
episode: 2255   score: -153.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.51472876  0.        ]]
update
episode: 2256   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.58650813  0.     

update
episode: 2315   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.43900472  0.        ]]
update
episode: 2316   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.59663508  0.        ]]
update
episode: 2317   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.49343026  0.        ]]
update
episode: 2318   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.41806162  0.        ]]
update
episode: 2319   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.57636933  0.        ]]
update
episode: 2320   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.53762971  0.        ]]
update
episode: 2321   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.59403186  0.        ]]
update
episode: 2322   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.46049713  0.   

update
episode: 2381   score: -174.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.41931604  0.        ]]
update
episode: 2382   score: -175.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.4453533  0.       ]]
update
episode: 2383   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.50916326  0.        ]]
update
episode: 2384   score: -142.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.49200003  0.        ]]
update
episode: 2385   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.4965188  0.       ]]
update
episode: 2386   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.42963486  0.        ]]
update
episode: 2387   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.43648483  0.        ]]
update
episode: 2388   score: -171.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.45360092  0.       

update
episode: 2447   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.4904415  0.       ]]
update
episode: 2448   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.44530116  0.        ]]
update
episode: 2449   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.53980326  0.        ]]
update
episode: 2450   score: -129.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.42206322  0.        ]]
update
episode: 2451   score: -181.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.59719684  0.        ]]
update
episode: 2452   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.40197073  0.        ]]
update
episode: 2453   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.45028181  0.        ]]
update
episode: 2454   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.53764084  0.     

update
episode: 2513   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.44375298  0.        ]]
update
episode: 2514   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.4153798  0.       ]]
update
episode: 2515   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.50265779  0.        ]]
update
episode: 2516   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.5591933  0.       ]]
update
episode: 2517   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.54047686  0.        ]]
update
episode: 2518   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.44486666  0.        ]]
update
episode: 2519   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.53730466  0.        ]]
update
episode: 2520   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.54941565  0.       

update
episode: 2579   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.44874179  0.        ]]
update
episode: 2580   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.56213001  0.        ]]
update
episode: 2581   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.49692122  0.        ]]
update
episode: 2582   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.56661334  0.        ]]
update
episode: 2583   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.47272841  0.        ]]
update
episode: 2584   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.54371092  0.        ]]
update
episode: 2585   score: -160.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.52254891  0.        ]]
update
episode: 2586   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.56676792  0.   

update
episode: 2645   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.41265672  0.        ]]
update
episode: 2646   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.56006034  0.        ]]
update
episode: 2647   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.47015312  0.        ]]
update
episode: 2648   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.52821619  0.        ]]
update
episode: 2649   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.4604258  0.       ]]
update
episode: 2650   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.48579895  0.        ]]
update
episode: 2651   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.47640746  0.        ]]
update
episode: 2652   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.43107042  0.     

update
episode: 2711   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.42007953  0.        ]]
update
episode: 2712   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.53521168  0.        ]]
update
episode: 2713   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.46777796  0.        ]]
update
episode: 2714   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.40657589  0.        ]]
update
episode: 2715   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.40273189  0.        ]]
update
episode: 2716   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.41673334  0.        ]]
update
episode: 2717   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.5447009  0.       ]]
update
episode: 2718   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.55503191  0.     

update
episode: 2777   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.44097178  0.        ]]
update
episode: 2778   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.48292982  0.        ]]
update
episode: 2779   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.48021107  0.        ]]
update
episode: 2780   score: -174.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.560139  0.      ]]
update
episode: 2781   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.56353262  0.        ]]
update
episode: 2782   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.54734336  0.        ]]
update
episode: 2783   score: -114.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.55671544  0.        ]]
update
episode: 2784   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.47862995  0.       

update
episode: 2843   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.55212773  0.        ]]
update
episode: 2844   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.59639531  0.        ]]
update
episode: 2845   score: -121.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.46092011  0.        ]]
update
episode: 2846   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.50086311  0.        ]]
update
episode: 2847   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.49505498  0.        ]]
update
episode: 2848   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.47777059  0.        ]]
update
episode: 2849   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.44088323  0.        ]]
update
episode: 2850   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.48780006  0.   

update
episode: 2909   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.43470085  0.        ]]
update
episode: 2910   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.53465971  0.        ]]
update
episode: 2911   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.59378745  0.        ]]
update
episode: 2912   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.53368711  0.        ]]
update
episode: 2913   score: -190.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.52734575  0.        ]]
update
episode: 2914   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.55736992  0.        ]]
update
episode: 2915   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.4185379  0.       ]]
update
episode: 2916   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.5185788  0.      

update
episode: 2975   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.48814232  0.        ]]
update
episode: 2976   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.59512663  0.        ]]
update
episode: 2977   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.4889029  0.       ]]
update
episode: 2978   score: -169.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.44871953  0.        ]]
update
episode: 2979   score: -122.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.4519597  0.       ]]
update
episode: 2980   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.49512473  0.        ]]
update
episode: 2981   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.48686899  0.        ]]
update
episode: 2982   score: -187.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.56995471  0.       

update
episode: 3041   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.46946825  0.        ]]
update
episode: 3042   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.45078759  0.        ]]
update
episode: 3043   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.5629793  0.       ]]
update
episode: 3044   score: -124.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.46043718  0.        ]]
update
episode: 3045   score: -113.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.54419087  0.        ]]
update
episode: 3046   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.53801542  0.        ]]
update
episode: 3047   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.49193713  0.        ]]
update
episode: 3048   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.42555886  0.     

update
episode: 3107   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.57065061  0.        ]]
update
episode: 3108   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.48021225  0.        ]]
update
episode: 3109   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.57768768  0.        ]]
update
episode: 3110   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.40197974  0.        ]]
update
episode: 3111   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.46242852  0.        ]]
update
episode: 3112   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.42702736  0.        ]]
update
episode: 3113   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.54173569  0.        ]]
update
episode: 3114   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.53959874  0.   

update
episode: 3173   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.5398997  0.       ]]
update
episode: 3174   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.52558216  0.        ]]
update
episode: 3175   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.46247693  0.        ]]
update
episode: 3176   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.44640856  0.        ]]
update
episode: 3177   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.57642826  0.        ]]
update
episode: 3178   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.45327344  0.        ]]
update
episode: 3179   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.58583645  0.        ]]
update
episode: 3180   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.58238298  0.     

update
episode: 3239   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.40326317  0.        ]]
update
episode: 3240   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.48128051  0.        ]]
update
episode: 3241   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.59886569  0.        ]]
update
episode: 3242   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.42870929  0.        ]]
update
episode: 3243   score: -174.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.42898767  0.        ]]
update
episode: 3244   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.40176973  0.        ]]
update
episode: 3245   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.42997574  0.        ]]
update
episode: 3246   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.43633975  0.   

update
episode: 3305   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.57207415  0.        ]]
update
episode: 3306   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.54314668  0.        ]]
update
episode: 3307   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.40285801  0.        ]]
update
episode: 3308   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.49935889  0.        ]]
update
episode: 3309   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.43573876  0.        ]]
update
episode: 3310   score: -139.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.55030406  0.        ]]
update
episode: 3311   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.41959137  0.        ]]
update
episode: 3312   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.47067568  0.   

update
episode: 3371   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.53067618  0.        ]]
update
episode: 3372   score: -165.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.44822522  0.        ]]
update
episode: 3373   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.51983928  0.        ]]
update
episode: 3374   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.44519255  0.        ]]
update
episode: 3375   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.53203705  0.        ]]
update
episode: 3376   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.54378806  0.        ]]
update
episode: 3377   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.4445149  0.       ]]
update
episode: 3378   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.43916358  0.     

update
episode: 3437   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.5288804  0.       ]]
update
episode: 3438   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.42342278  0.        ]]
update
episode: 3439   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.51020371  0.        ]]
update
episode: 3440   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.40935635  0.        ]]
update
episode: 3441   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.45595443  0.        ]]
update
episode: 3442   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.57177909  0.        ]]
update
episode: 3443   score: -119.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.57649285  0.        ]]
update
episode: 3444   score: -153.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.55081535  0.     

update
episode: 3503   score: -185.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.43825133  0.        ]]
update
episode: 3504   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.59584535  0.        ]]
update
episode: 3505   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.49410242  0.        ]]
update
episode: 3506   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.59732114  0.        ]]
update
episode: 3507   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.4219287  0.       ]]
update
episode: 3508   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.49116119  0.        ]]
update
episode: 3509   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.543152  0.      ]]
update
episode: 3510   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.53816845  0.        ]

update
episode: 3569   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.56975948  0.        ]]
update
episode: 3570   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.43406157  0.        ]]
update
episode: 3571   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.52709082  0.        ]]
update
episode: 3572   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.48377955  0.        ]]
update
episode: 3573   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.43317056  0.        ]]
update
episode: 3574   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.49006278  0.        ]]
update
episode: 3575   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.4104397  0.       ]]
update
episode: 3576   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.47826097  0.     

update
episode: 3635   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.47135597  0.        ]]
update
episode: 3636   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.55905123  0.        ]]
update
episode: 3637   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.54811819  0.        ]]
update
episode: 3638   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.58513011  0.        ]]
update
episode: 3639   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.55484431  0.        ]]
update
episode: 3640   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.51751708  0.        ]]
update
episode: 3641   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.53225118  0.        ]]
update
episode: 3642   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.49546698  0.   

update
episode: 3701   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.42557885  0.        ]]
update
episode: 3702   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.57160504  0.        ]]
update
episode: 3703   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.48391802  0.        ]]
update
episode: 3704   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.42553443  0.        ]]
update
episode: 3705   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.51608504  0.        ]]
update
episode: 3706   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.54703516  0.        ]]
update
episode: 3707   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.53361561  0.        ]]
update
episode: 3708   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.56491833  0.   

update
episode: 3767   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.57218388  0.        ]]
update
episode: 3768   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.4009117  0.       ]]
update
episode: 3769   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.53449622  0.        ]]
update
episode: 3770   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.50112771  0.        ]]
update
episode: 3771   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.56266005  0.        ]]
update
episode: 3772   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.57775483  0.        ]]
update
episode: 3773   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.58988484  0.        ]]
update
episode: 3774   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.53241195  0.     

update
episode: 3833   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.44059191  0.        ]]
update
episode: 3834   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.43077717  0.        ]]
update
episode: 3835   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.4810235  0.       ]]
update
episode: 3836   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.50973347  0.        ]]
update
episode: 3837   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.41536282  0.        ]]
update
episode: 3838   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.40057557  0.        ]]
update
episode: 3839   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.55692238  0.        ]]
update
episode: 3840   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.50469297  0.     

update
episode: 3899   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.57234786  0.        ]]
update
episode: 3900   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.49710014  0.        ]]
update
episode: 3901   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.42393338  0.        ]]
update
episode: 3902   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.41342292  0.        ]]
update
episode: 3903   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.47179323  0.        ]]
update
episode: 3904   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.58130442  0.        ]]
update
episode: 3905   score: -139.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.53202752  0.        ]]
update
episode: 3906   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.5309918  0.    

update
episode: 3965   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.52500986  0.        ]]
update
episode: 3966   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.40789554  0.        ]]
update
episode: 3967   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.52801487  0.        ]]
update
episode: 3968   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.48704744  0.        ]]
update
episode: 3969   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.57119853  0.        ]]
update
episode: 3970   score: -124.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.47541054  0.        ]]
update
episode: 3971   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.44769347  0.        ]]
update
episode: 3972   score: -200.0   memory length: 10000   epsilon: 0.004980100000801017
state [[-0.4290338  0.    