In [1]:
import pandas as pd
import numpy as np

In [2]:
import tensorflow as tf
import keras
sess = tf.Session(config=tf.ConfigProto(log_device_placement=True))
(keras.__version__, tf.__version__)

Using TensorFlow backend.


('2.2.2', '1.10.1')

In [3]:
exch = 'BTRX'
pair = 'BTC/USDT'

df = pd.read_csv(f"{exch}_{pair.replace('/', '-')}_ohlcv.csv")
df.head()

Unnamed: 0,timestamp,open,high,low,close,volume
0,2016-12-31 05:02:00,948.0,948.0,942.899,942.899,0.083403
1,2016-12-31 05:03:00,942.899,942.899,942.899,942.899,0.0
2,2016-12-31 05:04:00,942.899,942.899,942.899,942.899,0.0
3,2016-12-31 05:05:00,942.899,942.899,942.899,942.899,0.0
4,2016-12-31 05:06:00,942.899,942.899,942.899,942.899,0.0


In [4]:
df.dtypes

timestamp     object
open         float64
high         float64
low          float64
close        float64
volume       float64
dtype: object

In [5]:
# Plot the stock prices for the last day
import matplotlib.pyplot as plt
from mpl_finance import candlestick_ohlc
import matplotlib.dates as mdates

plot_last_n_minutes = 60 * 12  # 1/2 day
cs_frame = df.iloc[-1 * plot_last_n_minutes:].copy()  # Create the candlestick frame

#if necessary convert to datetime
cs_frame.timestamp = pd.to_datetime(cs_frame.timestamp)

cs_frame = cs_frame[['timestamp', 'open', 'high', 'low', 'close', 'volume']]
cs_frame["timestamp"] = cs_frame["timestamp"].apply(mdates.date2num)

f1 = plt.subplot2grid((6, 1), (0, 0), rowspan=6, colspan=1, facecolor='#07000d')
candlestick_ohlc(f1, cs_frame.values, width=.0001, colorup='#53c156', colordown='#ff1717', alpha=.75)
f1.xaxis_date()
f1.xaxis.set_major_formatter(mdates.DateFormatter('%y-%m-%d %H:%M'))

plt.xticks(rotation=45)
plt.ylabel('Stock Price')
plt.xlabel('Date Hours:Minutes')
plt.show()

# Cleanup memory
%reset_selective -f "^cs_frame$"

<Figure size 640x480 with 1 Axes>

In [6]:
# Add a minute moving average over period
def add_moving_avg(df, period=30):
    #df[f"{period}_ma"] = pd.rolling_mean(df['close'], period)
    df[f"{period}_ma"] = df.close.rolling(period).mean()

In [7]:
add_moving_avg(df)
df.tail()

Unnamed: 0,timestamp,open,high,low,close,volume,30_ma
532493,2018-01-04 23:55:00,15199.0,15200.0,15199.0,15199.0,10.446506,15063.411186
532494,2018-01-04 23:56:00,15199.0,15220.0,15195.582639,15220.0,5.457758,15070.777853
532495,2018-01-04 23:57:00,15220.0,15238.0,15200.0,15201.0,7.473745,15077.64452
532496,2018-01-04 23:58:00,15200.0,15202.21529,15085.001,15101.591266,7.258691,15081.030929
532497,2018-01-04 23:59:00,15101.591266,15199.0,15085.001,15199.0,1.777752,15087.697595


In [8]:
from stock_gym.envs import stocks
import gym

env = gym.make('ContSinMarketEnv-v0')
#env.add_data(df)

In [9]:
from keras.models import Sequential
from keras.layers import Dense, Activation, Flatten, GRU, Dropout
from keras.optimizers import Adam

In [10]:
from rl.agents.dqn import DQNAgent
from rl.policy import BoltzmannQPolicy
from rl.memory import SequentialMemory

In [11]:
np.random.seed(123)
env.seed(123)
(env.n_actions, env.observation_shape())

(3, (1, 64))

In [12]:
model = Sequential()

model.add(GRU(16,
                #input_shape=env.observation_space.shape,
                input_shape=env.observation_shape(),
                dropout=0.1,
                recurrent_dropout=0.5,
                #return_sequences=True,
               #stateful=False,
             ))
# model.add(GRU(128,
#                 dropout=0.1,
#                 recurrent_dropout=0.5,
#                 return_sequences=True,
#              ))
# model.add(GRU(128,
#                 dropout=0.1,
#                 recurrent_dropout=0.5,
#              ))
#model.add(Dense(64))
#model.add(Activation('relu'))
#model.add(Dropout(0.5))

model.add(Dense(env.n_actions, kernel_initializer='lecun_uniform', activation='linear'))
#model.add(Activation('linear')) #linear output so we can have range of real-valued outputs
#model.add(Dense(env.n_actions, activation='linear'))

# model.add(Flatten(input_shape=env.observation_space.shape))
# model.add(Dense(16))
# model.add(Activation('relu'))
# model.add(Dense(16))
# model.add(Activation('relu'))
# model.add(Dense(16))
# model.add(Activation('relu'))
# model.add(Dense(env.n_actions, activation='linear'))

print(model.summary())

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
gru_1 (GRU)                  (None, 16)                3888      
_________________________________________________________________
dense_1 (Dense)              (None, 3)                 51        
Total params: 3,939
Trainable params: 3,939
Non-trainable params: 0
_________________________________________________________________
None


In [13]:
memory = SequentialMemory(limit=env.total_space_size, window_length=env.n_features)
policy = BoltzmannQPolicy()

In [14]:
dqn = DQNAgent(model=model, nb_actions=env.n_actions, memory=memory, nb_steps_warmup=100,
               enable_dueling_network=True, dueling_type='avg', target_model_update=1e-2, policy=policy)


In [15]:
#dqn.compile(Adam(lr=1e-3), metrics=['mae'])
dqn.compile(
    #loss='mse',
    optimizer=Adam(lr=1e-3),
    metrics=['mae'],
)

In [16]:
history = dqn.fit(env, nb_steps=5000, visualize=False, verbose=2)

Training for 5000 steps ...
    2/5000: episode: 1, duration: 0.221s, episode steps: 2, steps per second: 9, episode reward: -1000.003, mean reward: -500.001 [-1000.002, -0.001], mean action: 1.000 [0.000, 2.000], mean observation: 0.728 [0.151, 1.000], loss: --, mean_absolute_error: --, mean_q: --
    3/5000: episode: 2, duration: 0.004s, episode steps: 1, steps per second: 251, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.787 [0.296, 1.000], loss: --, mean_absolute_error: --, mean_q: --
    4/5000: episode: 3, duration: 0.002s, episode steps: 1, steps per second: 432, episode reward: -1000.002, mean reward: -1000.002 [-1000.002, -1000.002], mean action: 2.000 [2.000, 2.000], mean observation: 0.392 [0.000, 0.966], loss: --, mean_absolute_error: --, mean_q: --
    5/5000: episode: 4, duration: 0.003s, episode steps: 1, steps per second: 362, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action

   82/5000: episode: 81, duration: 0.003s, episode steps: 1, steps per second: 366, episode reward: -1000.000, mean reward: -1000.000 [-1000.000, -1000.000], mean action: 1.000 [1.000, 1.000], mean observation: 0.290 [0.000, 0.867], loss: --, mean_absolute_error: --, mean_q: --
   83/5000: episode: 82, duration: 0.003s, episode steps: 1, steps per second: 358, episode reward: -1000.001, mean reward: -1000.001 [-1000.001, -1000.001], mean action: 2.000 [2.000, 2.000], mean observation: 0.808 [0.388, 1.000], loss: --, mean_absolute_error: --, mean_q: --
   84/5000: episode: 83, duration: 0.002s, episode steps: 1, steps per second: 421, episode reward: -1000.002, mean reward: -1000.002 [-1000.002, -1000.002], mean action: 2.000 [2.000, 2.000], mean observation: 0.529 [0.003, 1.000], loss: --, mean_absolute_error: --, mean_q: --
   85/5000: episode: 84, duration: 0.003s, episode steps: 1, steps per second: 388, episode reward: -1000.000, mean reward: -1000.000 [-1000.000, -1000.000], mean 

  115/5000: episode: 114, duration: 0.015s, episode steps: 1, steps per second: 67, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.622 [0.043, 1.000], loss: 280360.968750, mean_absolute_error: 188.293259, mean_q: -0.859543
  116/5000: episode: 115, duration: 0.016s, episode steps: 1, steps per second: 61, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.627 [0.047, 1.000], loss: 358000.375000, mean_absolute_error: 240.242218, mean_q: -0.984811
  117/5000: episode: 116, duration: 0.014s, episode steps: 1, steps per second: 69, episode reward: -1000.000, mean reward: -1000.000 [-1000.000, -1000.000], mean action: 1.000 [1.000, 1.000], mean observation: 0.280 [0.000, 0.852], loss: 357930.843750, mean_absolute_error: 240.283051, mean_q: -1.010729
  118/5000: episode: 117, duration: 0.014s, episode steps: 1, steps per second: 72, episode reward: -0.001, m

  143/5000: episode: 142, duration: 0.014s, episode steps: 1, steps per second: 71, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.775 [0.259, 1.000], loss: 324030.781250, mean_absolute_error: 220.573090, mean_q: -1.670188
  144/5000: episode: 143, duration: 0.016s, episode steps: 1, steps per second: 61, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.748 [0.196, 1.000], loss: 339729.562500, mean_absolute_error: 230.808472, mean_q: -1.528084
  145/5000: episode: 144, duration: 0.014s, episode steps: 1, steps per second: 73, episode reward: -1000.001, mean reward: -1000.001 [-1000.001, -1000.001], mean action: 2.000 [2.000, 2.000], mean observation: 0.184 [0.000, 0.552], loss: 262498.625000, mean_absolute_error: 179.601578, mean_q: -1.596015
  146/5000: episode: 145, duration: 0.014s, episode steps: 1, steps per second: 70, episode reward: -0.001, m

  171/5000: episode: 170, duration: 0.016s, episode steps: 1, steps per second: 62, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.640 [0.056, 1.000], loss: 291887.062500, mean_absolute_error: 200.623703, mean_q: -1.289813
  172/5000: episode: 171, duration: 0.015s, episode steps: 1, steps per second: 67, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.535 [0.005, 1.000], loss: 230656.171875, mean_absolute_error: 159.553955, mean_q: -1.211856
  173/5000: episode: 172, duration: 0.014s, episode steps: 1, steps per second: 70, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.609 [0.034, 1.000], loss: 307063.375000, mean_absolute_error: 210.810852, mean_q: -1.263484
  174/5000: episode: 173, duration: 0.014s, episode steps: 1, steps per second: 71, episode reward: -0.001, mean reward: 

  200/5000: episode: 199, duration: 0.014s, episode steps: 1, steps per second: 71, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.464 [0.000, 0.995], loss: 198765.000000, mean_absolute_error: 140.000488, mean_q: -1.137427
  201/5000: episode: 200, duration: 0.016s, episode steps: 1, steps per second: 63, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.809 [0.395, 1.000], loss: 214082.015625, mean_absolute_error: 150.027756, mean_q: -1.092204
  202/5000: episode: 201, duration: 0.015s, episode steps: 1, steps per second: 68, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.768 [0.240, 1.000], loss: 259973.593750, mean_absolute_error: 180.744873, mean_q: -1.104923
  203/5000: episode: 202, duration: 0.014s, episode steps: 1, steps per second: 72, episode reward: -0.001, mean reward: 

  229/5000: episode: 228, duration: 0.016s, episode steps: 1, steps per second: 64, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.612 [0.036, 1.000], loss: 183170.562500, mean_absolute_error: 130.335114, mean_q: -0.907710
  230/5000: episode: 229, duration: 0.014s, episode steps: 1, steps per second: 71, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.519 [0.002, 1.000], loss: 228569.671875, mean_absolute_error: 160.913727, mean_q: -0.945588
  231/5000: episode: 230, duration: 0.014s, episode steps: 1, steps per second: 73, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.784 [0.285, 1.000], loss: 228573.593750, mean_absolute_error: 161.015137, mean_q: -0.949276
  232/5000: episode: 231, duration: 0.014s, episode steps: 1, steps per second: 72, episode reward: -0.001, mean reward: 

  259/5000: episode: 258, duration: 0.016s, episode steps: 1, steps per second: 62, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.468 [0.000, 0.996], loss: 106359.648438, mean_absolute_error: 80.373199, mean_q: -0.787744
  260/5000: episode: 259, duration: 0.015s, episode steps: 1, steps per second: 66, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.760 [0.221, 1.000], loss: 167219.421875, mean_absolute_error: 120.867950, mean_q: -0.765761
  261/5000: episode: 260, duration: 0.015s, episode steps: 1, steps per second: 69, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.187 [0.000, 0.575], loss: 151870.781250, mean_absolute_error: 110.895462, mean_q: -0.790553
  262/5000: episode: 261, duration: 0.014s, episode steps: 1, steps per second: 69, episode reward: -0.001, mean reward: -

  287/5000: episode: 286, duration: 0.014s, episode steps: 1, steps per second: 69, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.592 [0.025, 1.000], loss: 106041.804688, mean_absolute_error: 81.005829, mean_q: -0.659836
  288/5000: episode: 287, duration: 0.016s, episode steps: 1, steps per second: 64, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.328 [0.000, 0.914], loss: 106129.960938, mean_absolute_error: 81.121964, mean_q: -0.659138
  289/5000: episode: 288, duration: 0.014s, episode steps: 1, steps per second: 72, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.200 [0.000, 0.657], loss: 181816.187500, mean_absolute_error: 131.524414, mean_q: -0.648777
  290/5000: episode: 289, duration: 0.014s, episode steps: 1, steps per second: 71, episode reward: -0.001, mean reward: -0

  329/5000: episode: 328, duration: 0.015s, episode steps: 1, steps per second: 66, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.373 [0.000, 0.954], loss: 90565.648438, mean_absolute_error: 71.829865, mean_q: -0.469887
  330/5000: episode: 329, duration: 0.015s, episode steps: 1, steps per second: 68, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.592 [0.025, 1.000], loss: 135837.937500, mean_absolute_error: 102.002258, mean_q: -0.465347
  331/5000: episode: 330, duration: 0.015s, episode steps: 1, steps per second: 68, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.542 [0.006, 1.000], loss: 120824.492188, mean_absolute_error: 92.000237, mean_q: -0.458790
  332/5000: episode: 331, duration: 0.014s, episode steps: 1, steps per second: 70, episode reward: -0.001, mean reward: -0.

  371/5000: episode: 370, duration: 0.015s, episode steps: 1, steps per second: 65, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.710 [0.133, 1.000], loss: 150429.968750, mean_absolute_error: 112.778229, mean_q: -0.293007
  372/5000: episode: 371, duration: 0.015s, episode steps: 1, steps per second: 66, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.682 [0.097, 1.000], loss: 90274.843750, mean_absolute_error: 72.673698, mean_q: -0.286228
  373/5000: episode: 372, duration: 0.014s, episode steps: 1, steps per second: 71, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.707 [0.128, 1.000], loss: 180440.906250, mean_absolute_error: 132.833725, mean_q: -0.285778
  374/5000: episode: 373, duration: 0.015s, episode steps: 1, steps per second: 68, episode reward: -0.001, mean reward: -0

  401/5000: episode: 400, duration: 0.015s, episode steps: 1, steps per second: 65, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.320 [0.000, 0.905], loss: 90068.390625, mean_absolute_error: 73.240311, mean_q: -0.182780
  402/5000: episode: 401, duration: 0.014s, episode steps: 1, steps per second: 70, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.678 [0.093, 1.000], loss: 75079.687500, mean_absolute_error: 63.286125, mean_q: -0.181309
  403/5000: episode: 402, duration: 0.014s, episode steps: 1, steps per second: 71, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.193 [0.000, 0.617], loss: 150062.453125, mean_absolute_error: 113.201958, mean_q: -0.173175
  404/5000: episode: 403, duration: 0.014s, episode steps: 1, steps per second: 73, episode reward: -0.001, mean reward: -0.0

  443/5000: episode: 442, duration: 0.014s, episode steps: 1, steps per second: 71, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.651 [0.066, 1.000], loss: 74808.304688, mean_absolute_error: 63.964050, mean_q: -0.033665
  444/5000: episode: 443, duration: 0.016s, episode steps: 1, steps per second: 63, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.231 [0.000, 0.757], loss: 74810.703125, mean_absolute_error: 63.986835, mean_q: -0.028504
  445/5000: episode: 444, duration: 0.014s, episode steps: 1, steps per second: 70, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.279 [0.000, 0.852], loss: 89764.109375, mean_absolute_error: 73.988113, mean_q: -0.027514
  446/5000: episode: 445, duration: 0.014s, episode steps: 1, steps per second: 73, episode reward: -0.001, mean reward: -0.001

  482/5000: episode: 481, duration: 0.017s, episode steps: 1, steps per second: 59, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.189 [0.000, 0.595], loss: 104473.843750, mean_absolute_error: 84.578262, mean_q: 0.083237
  483/5000: episode: 482, duration: 0.021s, episode steps: 1, steps per second: 48, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.182 [0.000, 0.496], loss: 149277.281250, mean_absolute_error: 114.447678, mean_q: 0.086879
  484/5000: episode: 483, duration: 0.015s, episode steps: 1, steps per second: 67, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.614 [0.038, 1.000], loss: 164133.203125, mean_absolute_error: 124.334572, mean_q: 0.089937
  485/5000: episode: 484, duration: 0.019s, episode steps: 1, steps per second: 53, episode reward: -0.001, mean reward: -0.0

  521/5000: episode: 520, duration: 0.015s, episode steps: 1, steps per second: 67, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.447 [0.000, 0.991], loss: 74395.390625, mean_absolute_error: 65.368790, mean_q: 0.165741
  522/5000: episode: 521, duration: 0.015s, episode steps: 1, steps per second: 65, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.532 [0.004, 1.000], loss: 89423.093750, mean_absolute_error: 75.346100, mean_q: 0.168828
  523/5000: episode: 522, duration: 0.016s, episode steps: 1, steps per second: 61, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.187 [0.000, 0.575], loss: 119194.453125, mean_absolute_error: 95.200539, mean_q: 0.170885
  524/5000: episode: 523, duration: 0.014s, episode steps: 1, steps per second: 72, episode reward: -0.001, mean reward: -0.001 [

  563/5000: episode: 562, duration: 0.014s, episode steps: 1, steps per second: 70, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.816 [0.449, 1.000], loss: 118828.015625, mean_absolute_error: 95.779457, mean_q: 0.273814
  564/5000: episode: 563, duration: 0.018s, episode steps: 1, steps per second: 54, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.582 [0.020, 1.000], loss: 44598.515625, mean_absolute_error: 46.330605, mean_q: 0.276560
  565/5000: episode: 564, duration: 0.016s, episode steps: 1, steps per second: 61, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.737 [0.176, 1.000], loss: 89108.625000, mean_absolute_error: 75.998367, mean_q: 0.279707
  566/5000: episode: 565, duration: 0.016s, episode steps: 1, steps per second: 64, episode reward: -0.001, mean reward: -0.001 [

  606/5000: episode: 605, duration: 0.017s, episode steps: 1, steps per second: 60, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.258 [0.000, 0.815], loss: 14796.308594, mean_absolute_error: 27.350546, mean_q: 0.384431
  607/5000: episode: 606, duration: 0.016s, episode steps: 1, steps per second: 63, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.230 [0.000, 0.754], loss: 44488.695312, mean_absolute_error: 47.132118, mean_q: 0.387157
  608/5000: episode: 607, duration: 0.015s, episode steps: 1, steps per second: 68, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.328 [0.000, 0.915], loss: 103711.640625, mean_absolute_error: 86.563805, mean_q: 0.389468
  609/5000: episode: 608, duration: 0.015s, episode steps: 1, steps per second: 68, episode reward: -0.001, mean reward: -0.001 [

  647/5000: episode: 646, duration: 0.015s, episode steps: 1, steps per second: 66, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.189 [0.000, 0.596], loss: 29628.384766, mean_absolute_error: 37.996773, mean_q: 0.482418
  648/5000: episode: 647, duration: 0.017s, episode steps: 1, steps per second: 60, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.662 [0.076, 1.000], loss: 147670.437500, mean_absolute_error: 116.661186, mean_q: 0.484676
  649/5000: episode: 648, duration: 0.016s, episode steps: 1, steps per second: 64, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.674 [0.088, 1.000], loss: 103516.539062, mean_absolute_error: 87.282211, mean_q: 0.486922
  650/5000: episode: 649, duration: 0.014s, episode steps: 1, steps per second: 70, episode reward: -0.001, mean reward: -0.001

  689/5000: episode: 688, duration: 0.014s, episode steps: 1, steps per second: 70, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.770 [0.245, 1.000], loss: 44302.304688, mean_absolute_error: 48.592712, mean_q: 0.565322
  690/5000: episode: 689, duration: 0.015s, episode steps: 1, steps per second: 65, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.377 [0.000, 0.957], loss: 88432.968750, mean_absolute_error: 78.010101, mean_q: 0.567164
  691/5000: episode: 690, duration: 0.015s, episode steps: 1, steps per second: 67, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.714 [0.139, 1.000], loss: 29568.460938, mean_absolute_error: 38.803867, mean_q: 0.568712
  692/5000: episode: 691, duration: 0.014s, episode steps: 1, steps per second: 71, episode reward: -0.001, mean reward: -0.001 [-

  731/5000: episode: 730, duration: 0.014s, episode steps: 1, steps per second: 71, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.787 [0.295, 1.000], loss: 44187.128906, mean_absolute_error: 49.272072, mean_q: 0.637574
  732/5000: episode: 731, duration: 0.015s, episode steps: 1, steps per second: 65, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.791 [0.308, 1.000], loss: 29359.050781, mean_absolute_error: 39.407867, mean_q: 0.639919
  733/5000: episode: 732, duration: 0.014s, episode steps: 1, steps per second: 70, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.729 [0.161, 1.000], loss: 44188.390625, mean_absolute_error: 49.306427, mean_q: 0.641660
  734/5000: episode: 733, duration: 0.016s, episode steps: 1, steps per second: 63, episode reward: -0.001, mean reward: -0.001 [-

  773/5000: episode: 772, duration: 0.021s, episode steps: 1, steps per second: 48, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.394 [0.000, 0.967], loss: 44199.078125, mean_absolute_error: 49.986530, mean_q: 0.688902
  774/5000: episode: 773, duration: 0.018s, episode steps: 1, steps per second: 57, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.187 [0.000, 0.579], loss: 58686.914062, mean_absolute_error: 59.639343, mean_q: 0.690476
  775/5000: episode: 774, duration: 0.015s, episode steps: 1, steps per second: 68, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.465 [0.000, 0.995], loss: 29369.173828, mean_absolute_error: 40.175217, mean_q: 0.692010
  776/5000: episode: 775, duration: 0.014s, episode steps: 1, steps per second: 71, episode reward: -0.001, mean reward: -0.001 [-

  814/5000: episode: 813, duration: 0.018s, episode steps: 1, steps per second: 55, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.259 [0.000, 0.818], loss: 88020.609375, mean_absolute_error: 79.929276, mean_q: 0.760022
  815/5000: episode: 814, duration: 0.017s, episode steps: 1, steps per second: 60, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.242 [0.000, 0.784], loss: 87913.437500, mean_absolute_error: 79.865746, mean_q: 0.761853
  816/5000: episode: 815, duration: 0.020s, episode steps: 1, steps per second: 51, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.670 [0.084, 1.000], loss: 58602.332031, mean_absolute_error: 60.367821, mean_q: 0.763336
  817/5000: episode: 816, duration: 0.020s, episode steps: 1, steps per second: 49, episode reward: -0.001, mean reward: -0.001 [-

  850/5000: episode: 849, duration: 0.021s, episode steps: 1, steps per second: 48, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.332 [0.000, 0.918], loss: 73075.078125, mean_absolute_error: 70.590820, mean_q: 0.816802
  851/5000: episode: 850, duration: 0.019s, episode steps: 1, steps per second: 54, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.748 [0.197, 1.000], loss: 29345.203125, mean_absolute_error: 41.491890, mean_q: 0.818408
  852/5000: episode: 851, duration: 0.017s, episode steps: 1, steps per second: 58, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.640 [0.056, 1.000], loss: 43827.402344, mean_absolute_error: 51.154873, mean_q: 0.820679
  853/5000: episode: 852, duration: 0.017s, episode steps: 1, steps per second: 59, episode reward: -0.001, mean reward: -0.001 [-

  891/5000: episode: 890, duration: 0.017s, episode steps: 1, steps per second: 58, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.384 [0.000, 0.961], loss: 43639.925781, mean_absolute_error: 51.656807, mean_q: 0.880305
  892/5000: episode: 891, duration: 0.020s, episode steps: 1, steps per second: 50, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.775 [0.258, 1.000], loss: 58377.859375, mean_absolute_error: 61.499336, mean_q: 0.881021
  893/5000: episode: 892, duration: 0.018s, episode steps: 1, steps per second: 57, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.271 [0.000, 0.838], loss: 87568.414062, mean_absolute_error: 80.964096, mean_q: 0.882074
  894/5000: episode: 893, duration: 0.016s, episode steps: 1, steps per second: 62, episode reward: -0.001, mean reward: -0.001 [-

  928/5000: episode: 927, duration: 0.015s, episode steps: 1, steps per second: 68, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.586 [0.022, 1.000], loss: 14617.686523, mean_absolute_error: 32.955376, mean_q: 0.932100
  929/5000: episode: 928, duration: 0.015s, episode steps: 1, steps per second: 65, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.607 [0.033, 1.000], loss: 58168.453125, mean_absolute_error: 61.977768, mean_q: 0.934254
  930/5000: episode: 929, duration: 0.014s, episode steps: 1, steps per second: 72, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.191 [0.000, 0.610], loss: 29133.763672, mean_absolute_error: 42.654114, mean_q: 0.936451
  931/5000: episode: 930, duration: 0.014s, episode steps: 1, steps per second: 74, episode reward: -0.001, mean reward: -0.001 [-

  968/5000: episode: 967, duration: 0.015s, episode steps: 1, steps per second: 67, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.786 [0.291, 1.000], loss: 14482.774414, mean_absolute_error: 33.573399, mean_q: 0.980382
  969/5000: episode: 968, duration: 0.020s, episode steps: 1, steps per second: 51, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.306 [0.000, 0.889], loss: 43446.156250, mean_absolute_error: 52.885487, mean_q: 0.982018
  970/5000: episode: 969, duration: 0.018s, episode steps: 1, steps per second: 54, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.495 [0.000, 1.000], loss: 43658.304688, mean_absolute_error: 53.062599, mean_q: 0.983143
  971/5000: episode: 970, duration: 0.014s, episode steps: 1, steps per second: 70, episode reward: -0.001, mean reward: -0.001 [-

 1009/5000: episode: 1008, duration: 0.014s, episode steps: 1, steps per second: 70, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.425 [0.000, 0.983], loss: 0.527967, mean_absolute_error: 24.637964, mean_q: 1.026579
 1010/5000: episode: 1009, duration: 0.015s, episode steps: 1, steps per second: 67, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.422 [0.000, 0.982], loss: 58141.921875, mean_absolute_error: 63.355164, mean_q: 1.026629
 1011/5000: episode: 1010, duration: 0.014s, episode steps: 1, steps per second: 70, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.306 [0.000, 0.889], loss: 29127.871094, mean_absolute_error: 44.043453, mean_q: 1.027065
 1012/5000: episode: 1011, duration: 0.014s, episode steps: 1, steps per second: 69, episode reward: -0.001, mean reward: -0.001 [-

 1051/5000: episode: 1050, duration: 0.014s, episode steps: 1, steps per second: 72, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.730 [0.163, 1.000], loss: 43263.089844, mean_absolute_error: 54.072845, mean_q: 1.068407
 1052/5000: episode: 1051, duration: 0.016s, episode steps: 1, steps per second: 61, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.818 [0.496, 1.000], loss: 14419.010742, mean_absolute_error: 34.898453, mean_q: 1.069310
 1053/5000: episode: 1052, duration: 0.014s, episode steps: 1, steps per second: 71, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.586 [0.022, 1.000], loss: 43494.566406, mean_absolute_error: 54.244507, mean_q: 1.068666
 1054/5000: episode: 1053, duration: 0.015s, episode steps: 1, steps per second: 69, episode reward: -0.001, mean reward: -0.00

 1093/5000: episode: 1092, duration: 0.015s, episode steps: 1, steps per second: 68, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.267 [0.000, 0.832], loss: 43410.367188, mean_absolute_error: 54.824921, mean_q: 1.105809
 1094/5000: episode: 1093, duration: 0.016s, episode steps: 1, steps per second: 62, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.626 [0.046, 1.000], loss: 43407.722656, mean_absolute_error: 54.835537, mean_q: 1.107397
 1095/5000: episode: 1094, duration: 0.015s, episode steps: 1, steps per second: 69, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.818 [0.501, 1.000], loss: 57790.457031, mean_absolute_error: 64.418358, mean_q: 1.108513
 1096/5000: episode: 1095, duration: 0.014s, episode steps: 1, steps per second: 71, episode reward: -0.001, mean reward: -0.00

 1135/5000: episode: 1134, duration: 0.014s, episode steps: 1, steps per second: 72, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.725 [0.155, 1.000], loss: 43089.015625, mean_absolute_error: 55.314606, mean_q: 1.158298
 1136/5000: episode: 1135, duration: 0.015s, episode steps: 1, steps per second: 68, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.721 [0.149, 1.000], loss: 28721.142578, mean_absolute_error: 45.761223, mean_q: 1.158414
 1137/5000: episode: 1136, duration: 0.014s, episode steps: 1, steps per second: 69, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.433 [0.000, 0.986], loss: 14475.087891, mean_absolute_error: 36.295258, mean_q: 1.158309
 1138/5000: episode: 1137, duration: 0.014s, episode steps: 1, steps per second: 70, episode reward: -0.001, mean reward: -0.00

 1177/5000: episode: 1176, duration: 0.015s, episode steps: 1, steps per second: 67, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.234 [0.000, 0.764], loss: 57322.820312, mean_absolute_error: 65.427422, mean_q: 1.193339
 1178/5000: episode: 1177, duration: 0.016s, episode steps: 1, steps per second: 63, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.818 [0.505, 1.000], loss: 28902.015625, mean_absolute_error: 46.540855, mean_q: 1.193409
 1179/5000: episode: 1178, duration: 0.014s, episode steps: 1, steps per second: 70, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.728 [0.160, 1.000], loss: 0.714388, mean_absolute_error: 27.336483, mean_q: 1.194308
 1180/5000: episode: 1179, duration: 0.014s, episode steps: 1, steps per second: 73, episode reward: -0.001, mean reward: -0.001 [-

 1219/5000: episode: 1218, duration: 0.014s, episode steps: 1, steps per second: 74, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.526 [0.003, 1.000], loss: 14303.645508, mean_absolute_error: 37.381680, mean_q: 1.211005
 1220/5000: episode: 1219, duration: 0.016s, episode steps: 1, steps per second: 61, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.182 [0.000, 0.515], loss: 28847.957031, mean_absolute_error: 47.078766, mean_q: 1.211210
 1221/5000: episode: 1220, duration: 0.015s, episode steps: 1, steps per second: 66, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.816 [0.453, 1.000], loss: 57572.484375, mean_absolute_error: 66.193680, mean_q: 1.211218
 1222/5000: episode: 1221, duration: 0.014s, episode steps: 1, steps per second: 70, episode reward: -0.001, mean reward: -0.00

 1261/5000: episode: 1260, duration: 0.014s, episode steps: 1, steps per second: 70, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.767 [0.238, 1.000], loss: 0.772282, mean_absolute_error: 28.531494, mean_q: 1.241801
 1262/5000: episode: 1261, duration: 0.016s, episode steps: 1, steps per second: 61, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.220 [0.000, 0.726], loss: 57334.628906, mean_absolute_error: 66.677582, mean_q: 1.241457
 1263/5000: episode: 1262, duration: 0.015s, episode steps: 1, steps per second: 66, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.818 [0.492, 1.000], loss: 14394.436523, mean_absolute_error: 38.122765, mean_q: 1.241963
 1264/5000: episode: 1263, duration: 0.014s, episode steps: 1, steps per second: 72, episode reward: -0.001, mean reward: -0.001 [-

 1303/5000: episode: 1302, duration: 0.015s, episode steps: 1, steps per second: 68, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.184 [0.000, 0.552], loss: 28493.769531, mean_absolute_error: 48.107933, mean_q: 1.261135
 1304/5000: episode: 1303, duration: 0.016s, episode steps: 1, steps per second: 62, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.251 [0.000, 0.802], loss: 28487.796875, mean_absolute_error: 48.118900, mean_q: 1.261614
 1305/5000: episode: 1304, duration: 0.015s, episode steps: 1, steps per second: 67, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.578 [0.018, 1.000], loss: 71475.226562, mean_absolute_error: 76.707390, mean_q: 1.260728
 1306/5000: episode: 1305, duration: 0.014s, episode steps: 1, steps per second: 70, episode reward: -0.001, mean reward: -0.00

 1343/5000: episode: 1342, duration: 0.018s, episode steps: 1, steps per second: 57, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.266 [0.000, 0.829], loss: 42902.546875, mean_absolute_error: 58.309273, mean_q: 1.275588
 1344/5000: episode: 1343, duration: 0.019s, episode steps: 1, steps per second: 52, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.797 [0.333, 1.000], loss: 28557.140625, mean_absolute_error: 48.785679, mean_q: 1.275908
 1345/5000: episode: 1344, duration: 0.016s, episode steps: 1, steps per second: 63, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.669 [0.083, 1.000], loss: 28555.800781, mean_absolute_error: 48.805214, mean_q: 1.276344
 1346/5000: episode: 1345, duration: 0.015s, episode steps: 1, steps per second: 68, episode reward: -0.001, mean reward: -0.00

 1381/5000: episode: 1380, duration: 0.015s, episode steps: 1, steps per second: 68, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.472 [0.000, 0.997], loss: 71217.804688, mean_absolute_error: 77.670296, mean_q: 1.287718
 1382/5000: episode: 1381, duration: 0.017s, episode steps: 1, steps per second: 58, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.353 [0.000, 0.938], loss: 57168.117188, mean_absolute_error: 68.335838, mean_q: 1.287849
 1383/5000: episode: 1382, duration: 0.014s, episode steps: 1, steps per second: 70, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.697 [0.115, 1.000], loss: 28508.929688, mean_absolute_error: 49.299019, mean_q: 1.289286
 1384/5000: episode: 1383, duration: 0.015s, episode steps: 1, steps per second: 69, episode reward: -0.001, mean reward: -0.00

 1420/5000: episode: 1419, duration: 0.015s, episode steps: 1, steps per second: 65, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.817 [0.461, 1.000], loss: 0.867582, mean_absolute_error: 30.954128, mean_q: 1.316253
 1421/5000: episode: 1420, duration: 0.020s, episode steps: 1, steps per second: 51, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.495 [0.000, 1.000], loss: 14162.774414, mean_absolute_error: 40.386490, mean_q: 1.316472
 1422/5000: episode: 1421, duration: 0.018s, episode steps: 1, steps per second: 57, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.620 [0.042, 1.000], loss: 28327.035156, mean_absolute_error: 49.829285, mean_q: 1.316858
 1423/5000: episode: 1422, duration: 0.018s, episode steps: 1, steps per second: 54, episode reward: -0.001, mean reward: -0.001 [-

 1461/5000: episode: 1460, duration: 0.017s, episode steps: 1, steps per second: 58, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.755 [0.210, 1.000], loss: 28399.316406, mean_absolute_error: 50.485462, mean_q: 1.329193
 1462/5000: episode: 1461, duration: 0.021s, episode steps: 1, steps per second: 48, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.652 [0.067, 1.000], loss: 14133.074219, mean_absolute_error: 41.010635, mean_q: 1.329410
 1463/5000: episode: 1462, duration: 0.018s, episode steps: 1, steps per second: 56, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.191 [0.000, 0.610], loss: 28394.585938, mean_absolute_error: 50.513832, mean_q: 1.329480
 1464/5000: episode: 1463, duration: 0.016s, episode steps: 1, steps per second: 62, episode reward: -0.001, mean reward: -0.00

 1502/5000: episode: 1501, duration: 0.016s, episode steps: 1, steps per second: 63, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.382 [0.000, 0.959], loss: 42448.597656, mean_absolute_error: 60.437996, mean_q: 1.335739
 1503/5000: episode: 1502, duration: 0.015s, episode steps: 1, steps per second: 66, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.788 [0.298, 1.000], loss: 14104.280273, mean_absolute_error: 41.599041, mean_q: 1.335932
 1504/5000: episode: 1503, duration: 0.014s, episode steps: 1, steps per second: 69, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.230 [0.000, 0.755], loss: 28472.875000, mean_absolute_error: 51.171402, mean_q: 1.335884
 1505/5000: episode: 1504, duration: 0.014s, episode steps: 1, steps per second: 71, episode reward: -0.001, mean reward: -0.00

 1544/5000: episode: 1543, duration: 0.015s, episode steps: 1, steps per second: 68, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.560 [0.011, 1.000], loss: 28288.406250, mean_absolute_error: 51.627129, mean_q: 1.342648
 1545/5000: episode: 1544, duration: 0.016s, episode steps: 1, steps per second: 63, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.220 [0.000, 0.728], loss: 28287.843750, mean_absolute_error: 51.622391, mean_q: 1.342252
 1546/5000: episode: 1545, duration: 0.016s, episode steps: 1, steps per second: 61, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.818 [0.483, 1.000], loss: 28154.292969, mean_absolute_error: 51.557457, mean_q: 1.342507
 1547/5000: episode: 1546, duration: 0.014s, episode steps: 1, steps per second: 70, episode reward: -0.001, mean reward: -0.00

 1586/5000: episode: 1585, duration: 0.015s, episode steps: 1, steps per second: 67, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.338 [0.000, 0.924], loss: 14187.521484, mean_absolute_error: 42.846790, mean_q: 1.352023
 1587/5000: episode: 1586, duration: 0.015s, episode steps: 1, steps per second: 65, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.185 [0.000, 0.557], loss: 0.914971, mean_absolute_error: 33.416580, mean_q: 1.351749
 1588/5000: episode: 1587, duration: 0.015s, episode steps: 1, steps per second: 68, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.792 [0.311, 1.000], loss: 56591.062500, mean_absolute_error: 71.065636, mean_q: 1.351743
 1589/5000: episode: 1588, duration: 0.014s, episode steps: 1, steps per second: 71, episode reward: -0.001, mean reward: -0.001 [-

 1628/5000: episode: 1627, duration: 0.015s, episode steps: 1, steps per second: 66, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.244 [0.000, 0.788], loss: 0.911435, mean_absolute_error: 34.074772, mean_q: 1.349136
 1629/5000: episode: 1628, duration: 0.016s, episode steps: 1, steps per second: 63, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.643 [0.059, 1.000], loss: 42328.859375, mean_absolute_error: 62.234314, mean_q: 1.348893
 1630/5000: episode: 1629, duration: 0.016s, episode steps: 1, steps per second: 61, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.727 [0.159, 1.000], loss: 14153.301758, mean_absolute_error: 43.511127, mean_q: 1.348694
 1631/5000: episode: 1630, duration: 0.016s, episode steps: 1, steps per second: 64, episode reward: -0.001, mean reward: -0.001 [-

 1669/5000: episode: 1668, duration: 0.014s, episode steps: 1, steps per second: 70, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.771 [0.249, 1.000], loss: 13988.841797, mean_absolute_error: 44.091553, mean_q: 1.347443
 1670/5000: episode: 1669, duration: 0.016s, episode steps: 1, steps per second: 64, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.818 [0.499, 1.000], loss: 0.907845, mean_absolute_error: 34.801262, mean_q: 1.346473
 1671/5000: episode: 1670, duration: 0.015s, episode steps: 1, steps per second: 66, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.255 [0.000, 0.811], loss: 0.906492, mean_absolute_error: 34.805450, mean_q: 1.345467
 1672/5000: episode: 1671, duration: 0.015s, episode steps: 1, steps per second: 69, episode reward: -0.001, mean reward: -0.001 [-0.00

 1711/5000: episode: 1710, duration: 0.015s, episode steps: 1, steps per second: 67, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.517 [0.002, 1.000], loss: 0.905861, mean_absolute_error: 35.506199, mean_q: 1.344986
 1712/5000: episode: 1711, duration: 0.017s, episode steps: 1, steps per second: 59, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.367 [0.000, 0.949], loss: 56245.132812, mean_absolute_error: 72.904907, mean_q: 1.343743
 1713/5000: episode: 1712, duration: 0.014s, episode steps: 1, steps per second: 70, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.794 [0.321, 1.000], loss: 28190.166016, mean_absolute_error: 54.270023, mean_q: 1.344033
 1714/5000: episode: 1713, duration: 0.014s, episode steps: 1, steps per second: 70, episode reward: -0.001, mean reward: -0.001 [-

 1753/5000: episode: 1752, duration: 0.014s, episode steps: 1, steps per second: 70, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.724 [0.153, 1.000], loss: 14066.291016, mean_absolute_error: 45.545486, mean_q: 1.335736
 1754/5000: episode: 1753, duration: 0.017s, episode steps: 1, steps per second: 59, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.383 [0.000, 0.961], loss: 14065.788086, mean_absolute_error: 45.557693, mean_q: 1.335322
 1755/5000: episode: 1754, duration: 0.014s, episode steps: 1, steps per second: 70, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.420 [0.000, 0.981], loss: 27986.099609, mean_absolute_error: 54.814476, mean_q: 1.334814
 1756/5000: episode: 1755, duration: 0.014s, episode steps: 1, steps per second: 71, episode reward: -0.001, mean reward: -0.00

 1795/5000: episode: 1794, duration: 0.014s, episode steps: 1, steps per second: 70, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.253 [0.000, 0.806], loss: 13890.741211, mean_absolute_error: 46.109489, mean_q: 1.327192
 1796/5000: episode: 1795, duration: 0.016s, episode steps: 1, steps per second: 62, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.798 [0.338, 1.000], loss: 14035.275391, mean_absolute_error: 46.226349, mean_q: 1.326665
 1797/5000: episode: 1796, duration: 0.015s, episode steps: 1, steps per second: 66, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.210 [0.000, 0.696], loss: 13889.100586, mean_absolute_error: 46.149452, mean_q: 1.326911
 1798/5000: episode: 1797, duration: 0.014s, episode steps: 1, steps per second: 70, episode reward: -0.001, mean reward: -0.00

 1837/5000: episode: 1836, duration: 0.014s, episode steps: 1, steps per second: 71, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.799 [0.340, 1.000], loss: 41726.773438, mean_absolute_error: 65.302299, mean_q: 1.310953
 1838/5000: episode: 1837, duration: 0.017s, episode steps: 1, steps per second: 58, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.790 [0.306, 1.000], loss: 27862.736328, mean_absolute_error: 56.101582, mean_q: 1.310243
 1839/5000: episode: 1838, duration: 0.016s, episode steps: 1, steps per second: 62, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.193 [0.000, 0.620], loss: 0.859345, mean_absolute_error: 37.617706, mean_q: 1.309986
 1840/5000: episode: 1839, duration: 0.014s, episode steps: 1, steps per second: 69, episode reward: -0.001, mean reward: -0.001 [-

 1879/5000: episode: 1878, duration: 0.015s, episode steps: 1, steps per second: 68, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.554 [0.010, 1.000], loss: 13977.334961, mean_absolute_error: 47.581749, mean_q: 1.298935
 1880/5000: episode: 1879, duration: 0.015s, episode steps: 1, steps per second: 66, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.208 [0.000, 0.687], loss: 0.843886, mean_absolute_error: 38.306877, mean_q: 1.298141
 1881/5000: episode: 1880, duration: 0.015s, episode steps: 1, steps per second: 68, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.452 [0.000, 0.992], loss: 55603.242188, mean_absolute_error: 75.245392, mean_q: 1.297001
 1882/5000: episode: 1881, duration: 0.014s, episode steps: 1, steps per second: 71, episode reward: -0.001, mean reward: -0.001 [-

 1921/5000: episode: 1920, duration: 0.014s, episode steps: 1, steps per second: 70, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.241 [0.000, 0.782], loss: 13790.779297, mean_absolute_error: 48.106464, mean_q: 1.271322
 1922/5000: episode: 1921, duration: 0.016s, episode steps: 1, steps per second: 62, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.530 [0.003, 1.000], loss: 27577.380859, mean_absolute_error: 57.275734, mean_q: 1.270952
 1923/5000: episode: 1922, duration: 0.015s, episode steps: 1, steps per second: 69, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.367 [0.000, 0.949], loss: 0.808340, mean_absolute_error: 38.988297, mean_q: 1.270485
 1924/5000: episode: 1923, duration: 0.014s, episode steps: 1, steps per second: 69, episode reward: -0.001, mean reward: -0.001 [-

 1963/5000: episode: 1962, duration: 0.016s, episode steps: 1, steps per second: 64, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.182 [0.000, 0.522], loss: 13922.704102, mean_absolute_error: 48.850422, mean_q: 1.254377
 1964/5000: episode: 1963, duration: 0.015s, episode steps: 1, steps per second: 66, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.275 [0.000, 0.846], loss: 13921.833984, mean_absolute_error: 48.877678, mean_q: 1.254367
 1965/5000: episode: 1964, duration: 0.014s, episode steps: 1, steps per second: 71, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.528 [0.003, 1.000], loss: 0.787335, mean_absolute_error: 39.638351, mean_q: 1.253856
 1966/5000: episode: 1965, duration: 0.015s, episode steps: 1, steps per second: 68, episode reward: -0.001, mean reward: -0.001 [-

 2005/5000: episode: 2004, duration: 0.014s, episode steps: 1, steps per second: 71, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.718 [0.144, 1.000], loss: 55390.753906, mean_absolute_error: 77.171288, mean_q: 1.241160
 2006/5000: episode: 2005, duration: 0.017s, episode steps: 1, steps per second: 60, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.278 [0.000, 0.849], loss: 27775.470703, mean_absolute_error: 58.853165, mean_q: 1.241459
 2007/5000: episode: 2006, duration: 0.015s, episode steps: 1, steps per second: 67, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.481 [0.000, 0.998], loss: 55062.234375, mean_absolute_error: 76.976624, mean_q: 1.241970
 2008/5000: episode: 2007, duration: 0.014s, episode steps: 1, steps per second: 72, episode reward: -0.001, mean reward: -0.00

 2047/5000: episode: 2046, duration: 0.015s, episode steps: 1, steps per second: 69, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.482 [0.000, 0.998], loss: 27555.167969, mean_absolute_error: 59.359154, mean_q: 1.222806
 2048/5000: episode: 2047, duration: 0.015s, episode steps: 1, steps per second: 65, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.588 [0.023, 1.000], loss: 13859.231445, mean_absolute_error: 50.277809, mean_q: 1.221316
 2049/5000: episode: 2048, duration: 0.015s, episode steps: 1, steps per second: 67, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.189 [0.000, 0.593], loss: 41408.406250, mean_absolute_error: 68.585297, mean_q: 1.220227
 2050/5000: episode: 2049, duration: 0.014s, episode steps: 1, steps per second: 71, episode reward: -0.001, mean reward: -0.00

 2089/5000: episode: 2088, duration: 0.015s, episode steps: 1, steps per second: 67, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.432 [0.000, 0.986], loss: 27337.378906, mean_absolute_error: 59.796204, mean_q: 1.186274
 2090/5000: episode: 2089, duration: 0.018s, episode steps: 1, steps per second: 57, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.641 [0.057, 1.000], loss: 13666.770508, mean_absolute_error: 50.731640, mean_q: 1.184718
 2091/5000: episode: 2090, duration: 0.015s, episode steps: 1, steps per second: 67, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.188 [0.000, 0.586], loss: 13848.197266, mean_absolute_error: 50.851387, mean_q: 1.182554
 2092/5000: episode: 2091, duration: 0.014s, episode steps: 1, steps per second: 71, episode reward: -0.001, mean reward: -0.00

 2131/5000: episode: 2130, duration: 0.015s, episode steps: 1, steps per second: 68, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.809 [0.392, 1.000], loss: 0.662996, mean_absolute_error: 42.026123, mean_q: 1.150515
 2132/5000: episode: 2131, duration: 0.017s, episode steps: 1, steps per second: 60, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.276 [0.000, 0.846], loss: 0.661754, mean_absolute_error: 42.043701, mean_q: 1.149437
 2133/5000: episode: 2132, duration: 0.015s, episode steps: 1, steps per second: 68, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.590 [0.024, 1.000], loss: 27470.109375, mean_absolute_error: 60.269985, mean_q: 1.147889
 2134/5000: episode: 2133, duration: 0.014s, episode steps: 1, steps per second: 70, episode reward: -0.001, mean reward: -0.001 [-0.00

 2173/5000: episode: 2172, duration: 0.015s, episode steps: 1, steps per second: 68, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.443 [0.000, 0.990], loss: 0.598059, mean_absolute_error: 42.106731, mean_q: 1.092671
 2174/5000: episode: 2173, duration: 0.017s, episode steps: 1, steps per second: 58, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.574 [0.017, 1.000], loss: 0.596211, mean_absolute_error: 42.098953, mean_q: 1.090979
 2175/5000: episode: 2174, duration: 0.014s, episode steps: 1, steps per second: 70, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.273 [0.000, 0.841], loss: 0.594666, mean_absolute_error: 42.098480, mean_q: 1.089563
 2176/5000: episode: 2175, duration: 0.014s, episode steps: 1, steps per second: 71, episode reward: -0.001, mean reward: -0.001 [-0.001, -

 2215/5000: episode: 2214, duration: 0.015s, episode steps: 1, steps per second: 68, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.325 [0.000, 0.911], loss: 0.532573, mean_absolute_error: 42.085884, mean_q: 1.031056
 2216/5000: episode: 2215, duration: 0.016s, episode steps: 1, steps per second: 63, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.691 [0.108, 1.000], loss: 0.530919, mean_absolute_error: 42.076508, mean_q: 1.029450
 2217/5000: episode: 2216, duration: 0.017s, episode steps: 1, steps per second: 60, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.815 [0.442, 1.000], loss: 0.529772, mean_absolute_error: 42.098324, mean_q: 1.028341
 2218/5000: episode: 2217, duration: 0.015s, episode steps: 1, steps per second: 68, episode reward: -0.001, mean reward: -0.001 [-0.001, -

 2257/5000: episode: 2256, duration: 0.015s, episode steps: 1, steps per second: 68, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.377 [0.000, 0.956], loss: 0.470418, mean_absolute_error: 42.066521, mean_q: 0.968965
 2258/5000: episode: 2257, duration: 0.016s, episode steps: 1, steps per second: 64, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.182 [0.000, 0.509], loss: 0.468947, mean_absolute_error: 42.067207, mean_q: 0.967445
 2259/5000: episode: 2258, duration: 0.014s, episode steps: 1, steps per second: 69, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.351 [0.000, 0.936], loss: 0.467564, mean_absolute_error: 42.067238, mean_q: 0.966017
 2260/5000: episode: 2259, duration: 0.014s, episode steps: 1, steps per second: 69, episode reward: -0.001, mean reward: -0.001 [-0.001, -

 2299/5000: episode: 2298, duration: 0.014s, episode steps: 1, steps per second: 73, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.644 [0.060, 1.000], loss: 0.413255, mean_absolute_error: 42.057503, mean_q: 0.908125
 2300/5000: episode: 2299, duration: 0.015s, episode steps: 1, steps per second: 66, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.736 [0.174, 1.000], loss: 0.411884, mean_absolute_error: 42.051254, mean_q: 0.906614
 2301/5000: episode: 2300, duration: 0.015s, episode steps: 1, steps per second: 68, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.331 [0.000, 0.917], loss: 0.410599, mean_absolute_error: 42.050102, mean_q: 0.905198
 2302/5000: episode: 2301, duration: 0.015s, episode steps: 1, steps per second: 67, episode reward: -0.001, mean reward: -0.001 [-0.001, -

 2341/5000: episode: 2340, duration: 0.014s, episode steps: 1, steps per second: 73, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.467 [0.000, 0.996], loss: 0.360766, mean_absolute_error: 42.026169, mean_q: 0.848427
 2342/5000: episode: 2341, duration: 0.017s, episode steps: 1, steps per second: 59, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.637 [0.054, 1.000], loss: 0.359630, mean_absolute_error: 42.031410, mean_q: 0.847090
 2343/5000: episode: 2342, duration: 0.014s, episode steps: 1, steps per second: 69, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.261 [0.000, 0.821], loss: 0.358471, mean_absolute_error: 42.030029, mean_q: 0.845722
 2344/5000: episode: 2343, duration: 0.014s, episode steps: 1, steps per second: 72, episode reward: -0.001, mean reward: -0.001 [-0.001, -

 2383/5000: episode: 2382, duration: 0.016s, episode steps: 1, steps per second: 63, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.287 [0.000, 0.864], loss: 0.313272, mean_absolute_error: 42.014950, mean_q: 0.790544
 2384/5000: episode: 2383, duration: 0.016s, episode steps: 1, steps per second: 62, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.807 [0.377, 1.000], loss: 0.312181, mean_absolute_error: 42.011761, mean_q: 0.789164
 2385/5000: episode: 2384, duration: 0.015s, episode steps: 1, steps per second: 67, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.197 [0.000, 0.640], loss: 0.311114, mean_absolute_error: 42.011475, mean_q: 0.787813
 2386/5000: episode: 2385, duration: 0.014s, episode steps: 1, steps per second: 72, episode reward: -0.001, mean reward: -0.001 [-0.001, -

 2425/5000: episode: 2424, duration: 0.016s, episode steps: 1, steps per second: 63, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.718 [0.144, 1.000], loss: 0.270244, mean_absolute_error: 41.991570, mean_q: 0.734177
 2426/5000: episode: 2425, duration: 0.016s, episode steps: 1, steps per second: 63, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.816 [0.457, 1.000], loss: 0.269267, mean_absolute_error: 41.989731, mean_q: 0.732845
 2427/5000: episode: 2426, duration: 0.015s, episode steps: 1, steps per second: 67, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.730 [0.163, 1.000], loss: 0.268284, mean_absolute_error: 41.991001, mean_q: 0.731505
 2428/5000: episode: 2427, duration: 0.014s, episode steps: 1, steps per second: 69, episode reward: -0.001, mean reward: -0.001 [-0.001, -

 2463/5000: episode: 2462, duration: 0.016s, episode steps: 1, steps per second: 62, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.211 [0.000, 0.697], loss: 0.235182, mean_absolute_error: 41.971672, mean_q: 0.684826
 2464/5000: episode: 2463, duration: 0.019s, episode steps: 1, steps per second: 52, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.805 [0.367, 1.000], loss: 0.233851, mean_absolute_error: 41.960014, mean_q: 0.682873
 2465/5000: episode: 2464, duration: 0.015s, episode steps: 1, steps per second: 66, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.761 [0.223, 1.000], loss: 0.233443, mean_absolute_error: 41.975483, mean_q: 0.682288
 2466/5000: episode: 2465, duration: 0.017s, episode steps: 1, steps per second: 60, episode reward: -0.001, mean reward: -0.001 [-0.001, -

 2502/5000: episode: 2501, duration: 0.015s, episode steps: 1, steps per second: 68, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.196 [0.000, 0.638], loss: 0.202737, mean_absolute_error: 41.952621, mean_q: 0.635764
 2503/5000: episode: 2502, duration: 0.017s, episode steps: 1, steps per second: 58, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.703 [0.123, 1.000], loss: 0.201905, mean_absolute_error: 41.954453, mean_q: 0.634457
 2504/5000: episode: 2503, duration: 0.015s, episode steps: 1, steps per second: 65, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.646 [0.061, 1.000], loss: 0.201145, mean_absolute_error: 41.950562, mean_q: 0.633258
 2505/5000: episode: 2504, duration: 0.015s, episode steps: 1, steps per second: 67, episode reward: -0.001, mean reward: -0.001 [-0.001, -

 2544/5000: episode: 2543, duration: 0.014s, episode steps: 1, steps per second: 71, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.282 [0.000, 0.856], loss: 0.171606, mean_absolute_error: 41.941444, mean_q: 0.584842
 2545/5000: episode: 2544, duration: 0.015s, episode steps: 1, steps per second: 66, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.610 [0.035, 1.000], loss: 0.170905, mean_absolute_error: 41.941521, mean_q: 0.583642
 2546/5000: episode: 2545, duration: 0.014s, episode steps: 1, steps per second: 70, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.204 [0.000, 0.672], loss: 0.170242, mean_absolute_error: 41.940426, mean_q: 0.582508
 2547/5000: episode: 2546, duration: 0.015s, episode steps: 1, steps per second: 67, episode reward: -0.001, mean reward: -0.001 [-0.001, -

 2586/5000: episode: 2585, duration: 0.016s, episode steps: 1, steps per second: 64, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.605 [0.032, 1.000], loss: 0.144224, mean_absolute_error: 41.926498, mean_q: 0.536072
 2587/5000: episode: 2586, duration: 0.016s, episode steps: 1, steps per second: 64, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.182 [0.000, 0.495], loss: 0.143611, mean_absolute_error: 41.930096, mean_q: 0.534930
 2588/5000: episode: 2587, duration: 0.015s, episode steps: 1, steps per second: 67, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.315 [0.000, 0.899], loss: 0.142980, mean_absolute_error: 41.919861, mean_q: 0.533749
 2589/5000: episode: 2588, duration: 0.014s, episode steps: 1, steps per second: 73, episode reward: -0.001, mean reward: -0.001 [-0.001, -

 2628/5000: episode: 2627, duration: 0.015s, episode steps: 1, steps per second: 67, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.749 [0.198, 1.000], loss: 0.120270, mean_absolute_error: 41.904625, mean_q: 0.489443
 2629/5000: episode: 2628, duration: 0.015s, episode steps: 1, steps per second: 66, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.271 [0.000, 0.839], loss: 0.119732, mean_absolute_error: 41.905270, mean_q: 0.488347
 2630/5000: episode: 2629, duration: 0.015s, episode steps: 1, steps per second: 67, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.514 [0.001, 1.000], loss: 0.119237, mean_absolute_error: 41.914558, mean_q: 0.487338
 2631/5000: episode: 2630, duration: 0.014s, episode steps: 1, steps per second: 70, episode reward: -0.001, mean reward: -0.001 [-0.001, -

 2670/5000: episode: 2669, duration: 0.016s, episode steps: 1, steps per second: 63, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.275 [0.000, 0.845], loss: 0.099540, mean_absolute_error: 41.892593, mean_q: 0.445179
 2671/5000: episode: 2670, duration: 0.019s, episode steps: 1, steps per second: 53, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.805 [0.367, 1.000], loss: 0.099071, mean_absolute_error: 41.891296, mean_q: 0.444125
 2672/5000: episode: 2671, duration: 0.015s, episode steps: 1, steps per second: 65, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.549 [0.008, 1.000], loss: 0.098622, mean_absolute_error: 41.893253, mean_q: 0.443119
 2673/5000: episode: 2672, duration: 0.014s, episode steps: 1, steps per second: 69, episode reward: -0.001, mean reward: -0.001 [-0.001, -

 2712/5000: episode: 2711, duration: 0.015s, episode steps: 1, steps per second: 67, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.419 [0.000, 0.980], loss: 0.081659, mean_absolute_error: 41.879494, mean_q: 0.403123
 2713/5000: episode: 2712, duration: 0.016s, episode steps: 1, steps per second: 63, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.436 [0.000, 0.987], loss: 0.081281, mean_absolute_error: 41.877670, mean_q: 0.402183
 2714/5000: episode: 2713, duration: 0.016s, episode steps: 1, steps per second: 64, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.795 [0.323, 1.000], loss: 0.080886, mean_absolute_error: 41.877220, mean_q: 0.401206
 2715/5000: episode: 2714, duration: 0.014s, episode steps: 1, steps per second: 69, episode reward: -0.001, mean reward: -0.001 [-0.001, -

 2754/5000: episode: 2753, duration: 0.014s, episode steps: 1, steps per second: 71, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.194 [0.000, 0.627], loss: 0.066452, mean_absolute_error: 41.868443, mean_q: 0.363555
 2755/5000: episode: 2754, duration: 0.016s, episode steps: 1, steps per second: 63, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.219 [0.000, 0.723], loss: 0.066119, mean_absolute_error: 41.864716, mean_q: 0.362636
 2756/5000: episode: 2755, duration: 0.015s, episode steps: 1, steps per second: 65, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.356 [0.000, 0.940], loss: 0.065803, mean_absolute_error: 41.860790, mean_q: 0.361766
 2757/5000: episode: 2756, duration: 0.014s, episode steps: 1, steps per second: 69, episode reward: -0.001, mean reward: -0.001 [-0.001, -

 2792/5000: episode: 2791, duration: 0.020s, episode steps: 1, steps per second: 49, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.226 [0.000, 0.744], loss: 0.054710, mean_absolute_error: 41.853012, mean_q: 0.329780
 2793/5000: episode: 2792, duration: 0.017s, episode steps: 1, steps per second: 60, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.584 [0.021, 1.000], loss: 0.054419, mean_absolute_error: 41.854950, mean_q: 0.328904
 2794/5000: episode: 2793, duration: 0.017s, episode steps: 1, steps per second: 60, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.810 [0.395, 1.000], loss: 0.054137, mean_absolute_error: 41.853741, mean_q: 0.328045
 2795/5000: episode: 2794, duration: 0.017s, episode steps: 1, steps per second: 59, episode reward: -0.001, mean reward: -0.001 [-0.001, -

 2834/5000: episode: 2833, duration: 0.014s, episode steps: 1, steps per second: 70, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.382 [0.000, 0.960], loss: 0.043728, mean_absolute_error: 41.841019, mean_q: 0.294725
 2835/5000: episode: 2834, duration: 0.017s, episode steps: 1, steps per second: 60, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.263 [0.000, 0.825], loss: 0.043487, mean_absolute_error: 41.836258, mean_q: 0.293900
 2836/5000: episode: 2835, duration: 0.017s, episode steps: 1, steps per second: 60, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.654 [0.069, 1.000], loss: 0.043254, mean_absolute_error: 41.846771, mean_q: 0.293121
 2837/5000: episode: 2836, duration: 0.015s, episode steps: 1, steps per second: 69, episode reward: -0.001, mean reward: -0.001 [-0.001, -

 2875/5000: episode: 2874, duration: 0.018s, episode steps: 1, steps per second: 55, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.743 [0.187, 1.000], loss: 0.034807, mean_absolute_error: 41.832581, mean_q: 0.262836
 2876/5000: episode: 2875, duration: 0.018s, episode steps: 1, steps per second: 57, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.818 [0.506, 1.000], loss: 0.034628, mean_absolute_error: 41.820053, mean_q: 0.262149
 2877/5000: episode: 2876, duration: 0.016s, episode steps: 1, steps per second: 62, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.750 [0.201, 1.000], loss: 0.034435, mean_absolute_error: 41.834450, mean_q: 0.261424
 2878/5000: episode: 2877, duration: 0.015s, episode steps: 1, steps per second: 65, episode reward: -0.001, mean reward: -0.001 [-0.001, -

 2913/5000: episode: 2912, duration: 0.026s, episode steps: 1, steps per second: 38, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.398 [0.000, 0.970], loss: 0.027942, mean_absolute_error: 41.825493, mean_q: 0.235395
 2914/5000: episode: 2913, duration: 0.019s, episode steps: 1, steps per second: 53, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.182 [0.000, 0.490], loss: 0.027778, mean_absolute_error: 41.810314, mean_q: 0.234682
 2915/5000: episode: 2914, duration: 0.018s, episode steps: 1, steps per second: 56, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.805 [0.367, 1.000], loss: 0.027615, mean_absolute_error: 41.822998, mean_q: 0.234007
 2916/5000: episode: 2915, duration: 0.016s, episode steps: 1, steps per second: 63, episode reward: -0.001, mean reward: -0.001 [-0.001, -

 2950/5000: episode: 2949, duration: 0.015s, episode steps: 1, steps per second: 69, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.699 [0.117, 1.000], loss: 0.022388, mean_absolute_error: 41.805298, mean_q: 0.210584
 2951/5000: episode: 2950, duration: 0.016s, episode steps: 1, steps per second: 64, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.489 [0.000, 0.999], loss: 0.022242, mean_absolute_error: 41.810760, mean_q: 0.209894
 2952/5000: episode: 2951, duration: 0.016s, episode steps: 1, steps per second: 64, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.818 [0.495, 1.000], loss: 0.022098, mean_absolute_error: 41.808784, mean_q: 0.209212
 2953/5000: episode: 2952, duration: 0.014s, episode steps: 1, steps per second: 70, episode reward: -0.001, mean reward: -0.001 [-0.001, -

 2985/5000: episode: 2984, duration: 0.015s, episode steps: 1, steps per second: 68, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.798 [0.334, 1.000], loss: 0.017976, mean_absolute_error: 41.807907, mean_q: 0.188602
 2986/5000: episode: 2985, duration: 0.016s, episode steps: 1, steps per second: 62, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.722 [0.151, 1.000], loss: 0.017877, mean_absolute_error: 41.803650, mean_q: 0.188069
 2987/5000: episode: 2986, duration: 0.015s, episode steps: 1, steps per second: 66, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.491 [0.000, 0.999], loss: 0.017727, mean_absolute_error: 41.803993, mean_q: 0.187284
 2988/5000: episode: 2987, duration: 0.014s, episode steps: 1, steps per second: 71, episode reward: -0.001, mean reward: -0.001 [-0.001, -

 3027/5000: episode: 3026, duration: 0.014s, episode steps: 1, steps per second: 73, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.810 [0.396, 1.000], loss: 0.013701, mean_absolute_error: 41.793808, mean_q: 0.164520
 3028/5000: episode: 3027, duration: 0.016s, episode steps: 1, steps per second: 62, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.571 [0.015, 1.000], loss: 0.013611, mean_absolute_error: 41.794098, mean_q: 0.163979
 3029/5000: episode: 3028, duration: 0.015s, episode steps: 1, steps per second: 65, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.446 [0.000, 0.991], loss: 0.013511, mean_absolute_error: 41.792267, mean_q: 0.163364
 3030/5000: episode: 3029, duration: 0.014s, episode steps: 1, steps per second: 69, episode reward: -0.001, mean reward: -0.001 [-0.001, -

 3067/5000: episode: 3066, duration: 0.014s, episode steps: 1, steps per second: 70, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.775 [0.260, 1.000], loss: 0.010511, mean_absolute_error: 41.770401, mean_q: 0.143509
 3068/5000: episode: 3067, duration: 0.020s, episode steps: 1, steps per second: 51, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.785 [0.289, 1.000], loss: 0.010376, mean_absolute_error: 41.790672, mean_q: 0.143031
 3069/5000: episode: 3068, duration: 0.015s, episode steps: 1, steps per second: 65, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.339 [0.000, 0.925], loss: 0.010309, mean_absolute_error: 41.789154, mean_q: 0.142570
 3070/5000: episode: 3069, duration: 0.014s, episode steps: 1, steps per second: 72, episode reward: -0.001, mean reward: -0.001 [-0.001, -

 3109/5000: episode: 3108, duration: 0.015s, episode steps: 1, steps per second: 69, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.187 [0.000, 0.582], loss: 0.007774, mean_absolute_error: 41.778526, mean_q: 0.123676
 3110/5000: episode: 3109, duration: 0.017s, episode steps: 1, steps per second: 58, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.299 [0.000, 0.880], loss: 0.007719, mean_absolute_error: 41.778511, mean_q: 0.123236
 3111/5000: episode: 3110, duration: 0.019s, episode steps: 1, steps per second: 54, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.189 [0.000, 0.593], loss: 0.007659, mean_absolute_error: 41.773834, mean_q: 0.122742
 3112/5000: episode: 3111, duration: 0.019s, episode steps: 1, steps per second: 53, episode reward: -0.001, mean reward: -0.001 [-0.001, -

 3149/5000: episode: 3148, duration: 0.014s, episode steps: 1, steps per second: 69, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.186 [0.000, 0.574], loss: 0.005795, mean_absolute_error: 41.777260, mean_q: 0.106642
 3150/5000: episode: 3149, duration: 0.017s, episode steps: 1, steps per second: 60, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.194 [0.000, 0.626], loss: 0.005757, mean_absolute_error: 41.770584, mean_q: 0.106275
 3151/5000: episode: 3150, duration: 0.015s, episode steps: 1, steps per second: 67, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.788 [0.299, 1.000], loss: 0.005719, mean_absolute_error: 41.772827, mean_q: 0.105930
 3152/5000: episode: 3151, duration: 0.014s, episode steps: 1, steps per second: 73, episode reward: -0.001, mean reward: -0.001 [-0.001, -

 3191/5000: episode: 3190, duration: 0.027s, episode steps: 1, steps per second: 38, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.333 [0.000, 0.919], loss: 0.004192, mean_absolute_error: 41.777004, mean_q: 0.090561
 3192/5000: episode: 3191, duration: 0.016s, episode steps: 1, steps per second: 61, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.806 [0.377, 1.000], loss: 0.004169, mean_absolute_error: 41.767136, mean_q: 0.090273
 3193/5000: episode: 3192, duration: 0.015s, episode steps: 1, steps per second: 67, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.791 [0.309, 1.000], loss: 0.004130, mean_absolute_error: 41.771339, mean_q: 0.089859
 3194/5000: episode: 3193, duration: 0.014s, episode steps: 1, steps per second: 72, episode reward: -0.001, mean reward: -0.001 [-0.001, -

 3233/5000: episode: 3232, duration: 0.015s, episode steps: 1, steps per second: 65, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.810 [0.400, 1.000], loss: 0.002995, mean_absolute_error: 41.770828, mean_q: 0.076378
 3234/5000: episode: 3233, duration: 0.016s, episode steps: 1, steps per second: 62, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.480 [0.000, 0.998], loss: 0.002988, mean_absolute_error: 41.765034, mean_q: 0.076269
 3235/5000: episode: 3234, duration: 0.014s, episode steps: 1, steps per second: 70, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.699 [0.118, 1.000], loss: 0.002955, mean_absolute_error: 41.759521, mean_q: 0.075828
 3236/5000: episode: 3235, duration: 0.015s, episode steps: 1, steps per second: 66, episode reward: -0.001, mean reward: -0.001 [-0.001, -

 3272/5000: episode: 3271, duration: 0.019s, episode steps: 1, steps per second: 52, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.201 [0.000, 0.658], loss: 0.002168, mean_absolute_error: 41.763924, mean_q: 0.064827
 3273/5000: episode: 3272, duration: 0.015s, episode steps: 1, steps per second: 68, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.773 [0.253, 1.000], loss: 0.002150, mean_absolute_error: 41.762524, mean_q: 0.064533
 3274/5000: episode: 3273, duration: 0.015s, episode steps: 1, steps per second: 67, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.223 [0.000, 0.734], loss: 0.002141, mean_absolute_error: 41.751770, mean_q: 0.064369
 3275/5000: episode: 3274, duration: 0.014s, episode steps: 1, steps per second: 69, episode reward: -0.001, mean reward: -0.001 [-0.001, -

 3314/5000: episode: 3313, duration: 0.015s, episode steps: 1, steps per second: 68, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.187 [0.000, 0.579], loss: 0.001501, mean_absolute_error: 41.757542, mean_q: 0.053763
 3315/5000: episode: 3314, duration: 0.018s, episode steps: 1, steps per second: 56, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.383 [0.000, 0.961], loss: 0.001490, mean_absolute_error: 41.758427, mean_q: 0.053560
 3316/5000: episode: 3315, duration: 0.014s, episode steps: 1, steps per second: 73, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.222 [0.000, 0.732], loss: 0.001474, mean_absolute_error: 41.761234, mean_q: 0.053278
 3317/5000: episode: 3316, duration: 0.014s, episode steps: 1, steps per second: 73, episode reward: -0.001, mean reward: -0.001 [-0.001, -

 3355/5000: episode: 3354, duration: 0.015s, episode steps: 1, steps per second: 67, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.219 [0.000, 0.725], loss: 0.001035, mean_absolute_error: 41.760677, mean_q: 0.044471
 3356/5000: episode: 3355, duration: 0.016s, episode steps: 1, steps per second: 62, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.596 [0.027, 1.000], loss: 0.001017, mean_absolute_error: 41.743416, mean_q: 0.043932
 3357/5000: episode: 3356, duration: 0.018s, episode steps: 1, steps per second: 57, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.615 [0.038, 1.000], loss: 0.001020, mean_absolute_error: 41.754364, mean_q: 0.044117
 3358/5000: episode: 3357, duration: 0.015s, episode steps: 1, steps per second: 67, episode reward: -0.001, mean reward: -0.001 [-0.001, -

 3396/5000: episode: 3395, duration: 0.015s, episode steps: 1, steps per second: 68, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.182 [0.000, 0.514], loss: 0.000713, mean_absolute_error: 41.746803, mean_q: 0.036581
 3397/5000: episode: 3396, duration: 0.018s, episode steps: 1, steps per second: 56, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.182 [0.000, 0.521], loss: 0.000700, mean_absolute_error: 41.750717, mean_q: 0.036348
 3398/5000: episode: 3397, duration: 0.015s, episode steps: 1, steps per second: 67, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.601 [0.030, 1.000], loss: 0.000692, mean_absolute_error: 41.754620, mean_q: 0.036135
 3399/5000: episode: 3398, duration: 0.014s, episode steps: 1, steps per second: 70, episode reward: -0.001, mean reward: -0.001 [-0.001, -

 3438/5000: episode: 3437, duration: 0.015s, episode steps: 1, steps per second: 67, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.485 [0.000, 0.999], loss: 0.000467, mean_absolute_error: 41.746925, mean_q: 0.029502
 3439/5000: episode: 3438, duration: 0.015s, episode steps: 1, steps per second: 65, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.646 [0.061, 1.000], loss: 0.000460, mean_absolute_error: 41.752689, mean_q: 0.029287
 3440/5000: episode: 3439, duration: 0.015s, episode steps: 1, steps per second: 65, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.211 [0.000, 0.698], loss: 0.000458, mean_absolute_error: 41.749859, mean_q: 0.029207
 3441/5000: episode: 3440, duration: 0.014s, episode steps: 1, steps per second: 70, episode reward: -0.001, mean reward: -0.001 [-0.001, -

 3480/5000: episode: 3479, duration: 0.014s, episode steps: 1, steps per second: 71, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.490 [0.000, 0.999], loss: 0.000303, mean_absolute_error: 41.748508, mean_q: 0.023545
 3481/5000: episode: 3480, duration: 0.016s, episode steps: 1, steps per second: 63, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.201 [0.000, 0.658], loss: 0.000305, mean_absolute_error: 41.742325, mean_q: 0.023498
 3482/5000: episode: 3481, duration: 0.015s, episode steps: 1, steps per second: 65, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.809 [0.395, 1.000], loss: 0.000299, mean_absolute_error: 41.737232, mean_q: 0.023215
 3483/5000: episode: 3482, duration: 0.014s, episode steps: 1, steps per second: 70, episode reward: -0.001, mean reward: -0.001 [-0.001, -

 3522/5000: episode: 3521, duration: 0.015s, episode steps: 1, steps per second: 66, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.666 [0.080, 1.000], loss: 0.000191, mean_absolute_error: 41.748184, mean_q: 0.018483
 3523/5000: episode: 3522, duration: 0.015s, episode steps: 1, steps per second: 68, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.404 [0.000, 0.973], loss: 0.000191, mean_absolute_error: 41.745972, mean_q: 0.018477
 3524/5000: episode: 3523, duration: 0.015s, episode steps: 1, steps per second: 66, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.766 [0.236, 1.000], loss: 0.000187, mean_absolute_error: 41.751122, mean_q: 0.018320
 3525/5000: episode: 3524, duration: 0.015s, episode steps: 1, steps per second: 69, episode reward: -0.001, mean reward: -0.001 [-0.001, -

 3564/5000: episode: 3563, duration: 0.014s, episode steps: 1, steps per second: 69, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.796 [0.327, 1.000], loss: 0.000126, mean_absolute_error: 41.742393, mean_q: 0.014584
 3565/5000: episode: 3564, duration: 0.015s, episode steps: 1, steps per second: 66, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.766 [0.235, 1.000], loss: 0.000120, mean_absolute_error: 41.745899, mean_q: 0.014356
 3566/5000: episode: 3565, duration: 0.014s, episode steps: 1, steps per second: 70, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.316 [0.000, 0.901], loss: 0.000120, mean_absolute_error: 41.738735, mean_q: 0.014288
 3567/5000: episode: 3566, duration: 0.014s, episode steps: 1, steps per second: 70, episode reward: -0.001, mean reward: -0.001 [-0.001, -

 3606/5000: episode: 3605, duration: 0.015s, episode steps: 1, steps per second: 67, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.212 [0.000, 0.701], loss: 0.000074, mean_absolute_error: 41.746056, mean_q: 0.011068
 3607/5000: episode: 3606, duration: 0.016s, episode steps: 1, steps per second: 63, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.233 [0.000, 0.762], loss: 0.000074, mean_absolute_error: 41.745522, mean_q: 0.011056
 3608/5000: episode: 3607, duration: 0.014s, episode steps: 1, steps per second: 70, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.391 [0.000, 0.966], loss: 0.000073, mean_absolute_error: 41.746750, mean_q: 0.010949
 3609/5000: episode: 3608, duration: 0.014s, episode steps: 1, steps per second: 70, episode reward: -0.001, mean reward: -0.001 [-0.001, -

 3648/5000: episode: 3647, duration: 0.015s, episode steps: 1, steps per second: 68, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.769 [0.244, 1.000], loss: 0.000046, mean_absolute_error: 41.744431, mean_q: 0.008341
 3649/5000: episode: 3648, duration: 0.017s, episode steps: 1, steps per second: 60, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.218 [0.000, 0.720], loss: 0.000044, mean_absolute_error: 41.745159, mean_q: 0.008241
 3650/5000: episode: 3649, duration: 0.015s, episode steps: 1, steps per second: 68, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.608 [0.034, 1.000], loss: 0.000045, mean_absolute_error: 41.741150, mean_q: 0.008261
 3651/5000: episode: 3650, duration: 0.014s, episode steps: 1, steps per second: 71, episode reward: -0.001, mean reward: -0.001 [-0.001, -

 3690/5000: episode: 3689, duration: 0.015s, episode steps: 1, steps per second: 68, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.544 [0.007, 1.000], loss: 0.000030, mean_absolute_error: 41.740135, mean_q: 0.006214
 3691/5000: episode: 3690, duration: 0.017s, episode steps: 1, steps per second: 60, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.190 [0.000, 0.601], loss: 0.000026, mean_absolute_error: 41.747040, mean_q: 0.006036
 3692/5000: episode: 3691, duration: 0.015s, episode steps: 1, steps per second: 66, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.722 [0.150, 1.000], loss: 0.000030, mean_absolute_error: 41.740025, mean_q: 0.006078
 3693/5000: episode: 3692, duration: 0.014s, episode steps: 1, steps per second: 70, episode reward: -0.001, mean reward: -0.001 [-0.001, -

 3732/5000: episode: 3731, duration: 0.015s, episode steps: 1, steps per second: 66, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.698 [0.116, 1.000], loss: 0.000016, mean_absolute_error: 41.738289, mean_q: 0.004322
 3733/5000: episode: 3732, duration: 0.016s, episode steps: 1, steps per second: 63, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.334 [0.000, 0.920], loss: 0.000031, mean_absolute_error: 41.729950, mean_q: 0.004495
 3734/5000: episode: 3733, duration: 0.015s, episode steps: 1, steps per second: 67, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.185 [0.000, 0.556], loss: 0.000020, mean_absolute_error: 41.735676, mean_q: 0.004374
 3735/5000: episode: 3734, duration: 0.021s, episode steps: 1, steps per second: 48, episode reward: -0.001, mean reward: -0.001 [-0.001, -

 3774/5000: episode: 3773, duration: 0.017s, episode steps: 1, steps per second: 57, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.779 [0.271, 1.000], loss: 0.000011, mean_absolute_error: 41.738457, mean_q: 0.003108
 3775/5000: episode: 3774, duration: 0.016s, episode steps: 1, steps per second: 62, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.230 [0.000, 0.753], loss: 0.000009, mean_absolute_error: 41.745049, mean_q: 0.003023
 3776/5000: episode: 3775, duration: 0.014s, episode steps: 1, steps per second: 69, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.352 [0.000, 0.937], loss: 0.000011, mean_absolute_error: 41.738586, mean_q: 0.002997
 3777/5000: episode: 3776, duration: 0.014s, episode steps: 1, steps per second: 72, episode reward: -0.001, mean reward: -0.001 [-0.001, -

 3816/5000: episode: 3815, duration: 0.015s, episode steps: 1, steps per second: 66, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.283 [0.000, 0.858], loss: 0.000008, mean_absolute_error: 41.739662, mean_q: 0.001935
 3817/5000: episode: 3816, duration: 0.017s, episode steps: 1, steps per second: 58, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.182 [0.000, 0.502], loss: 0.000008, mean_absolute_error: 41.737114, mean_q: 0.002033
 3818/5000: episode: 3817, duration: 0.015s, episode steps: 1, steps per second: 68, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.663 [0.077, 1.000], loss: 0.000007, mean_absolute_error: 41.739403, mean_q: 0.001999
 3819/5000: episode: 3818, duration: 0.014s, episode steps: 1, steps per second: 71, episode reward: -0.001, mean reward: -0.001 [-0.001, -

 3858/5000: episode: 3857, duration: 0.014s, episode steps: 1, steps per second: 69, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.808 [0.386, 1.000], loss: 0.000004, mean_absolute_error: 41.740395, mean_q: 0.001122
 3859/5000: episode: 3858, duration: 0.017s, episode steps: 1, steps per second: 58, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.606 [0.032, 1.000], loss: 0.000005, mean_absolute_error: 41.737015, mean_q: 0.001134
 3860/5000: episode: 3859, duration: 0.015s, episode steps: 1, steps per second: 65, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.652 [0.067, 1.000], loss: 0.000008, mean_absolute_error: 41.731564, mean_q: 0.001148
 3861/5000: episode: 3860, duration: 0.014s, episode steps: 1, steps per second: 71, episode reward: -0.001, mean reward: -0.001 [-0.001, -

 3900/5000: episode: 3899, duration: 0.015s, episode steps: 1, steps per second: 69, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.720 [0.148, 1.000], loss: 0.000004, mean_absolute_error: 41.739811, mean_q: 0.000525
 3901/5000: episode: 3900, duration: 0.017s, episode steps: 1, steps per second: 58, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.399 [0.000, 0.970], loss: 0.000004, mean_absolute_error: 41.738266, mean_q: 0.000519
 3902/5000: episode: 3901, duration: 0.016s, episode steps: 1, steps per second: 65, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.735 [0.172, 1.000], loss: 0.000003, mean_absolute_error: 41.741692, mean_q: 0.000450
 3903/5000: episode: 3902, duration: 0.014s, episode steps: 1, steps per second: 70, episode reward: -0.001, mean reward: -0.001 [-0.001, -

 3942/5000: episode: 3941, duration: 0.014s, episode steps: 1, steps per second: 69, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.447 [0.000, 0.991], loss: 0.000002, mean_absolute_error: 41.741821, mean_q: 0.000098
 3943/5000: episode: 3942, duration: 0.016s, episode steps: 1, steps per second: 63, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.815 [0.437, 1.000], loss: 0.000003, mean_absolute_error: 41.740108, mean_q: 0.000135
 3944/5000: episode: 3943, duration: 0.017s, episode steps: 1, steps per second: 59, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.479 [0.000, 0.998], loss: 0.000003, mean_absolute_error: 41.738235, mean_q: 0.000112
 3945/5000: episode: 3944, duration: 0.015s, episode steps: 1, steps per second: 67, episode reward: -0.001, mean reward: -0.001 [-0.001, -

 3984/5000: episode: 3983, duration: 0.014s, episode steps: 1, steps per second: 70, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.534 [0.004, 1.000], loss: 0.000002, mean_absolute_error: 41.741287, mean_q: -0.000198
 3985/5000: episode: 3984, duration: 0.016s, episode steps: 1, steps per second: 62, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.511 [0.001, 1.000], loss: 0.000006, mean_absolute_error: 41.735813, mean_q: -0.000123
 3986/5000: episode: 3985, duration: 0.015s, episode steps: 1, steps per second: 69, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.512 [0.001, 1.000], loss: 0.000001, mean_absolute_error: 41.742100, mean_q: -0.000239
 3987/5000: episode: 3986, duration: 0.015s, episode steps: 1, steps per second: 67, episode reward: -0.001, mean reward: -0.001 [-0.001

 4026/5000: episode: 4025, duration: 0.018s, episode steps: 1, steps per second: 55, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.637 [0.054, 1.000], loss: 0.000002, mean_absolute_error: 41.739449, mean_q: -0.000349
 4027/5000: episode: 4026, duration: 0.017s, episode steps: 1, steps per second: 58, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.186 [0.000, 0.565], loss: 0.000002, mean_absolute_error: 41.740841, mean_q: -0.000372
 4028/5000: episode: 4027, duration: 0.014s, episode steps: 1, steps per second: 71, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.307 [0.000, 0.891], loss: 0.000006, mean_absolute_error: 41.734879, mean_q: -0.000342
 4029/5000: episode: 4028, duration: 0.014s, episode steps: 1, steps per second: 73, episode reward: -0.001, mean reward: -0.001 [-0.001

 4068/5000: episode: 4067, duration: 0.015s, episode steps: 1, steps per second: 69, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.795 [0.324, 1.000], loss: 0.000002, mean_absolute_error: 41.737465, mean_q: -0.000453
 4069/5000: episode: 4068, duration: 0.017s, episode steps: 1, steps per second: 60, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.197 [0.000, 0.642], loss: 0.000007, mean_absolute_error: 41.729370, mean_q: -0.000461
 4070/5000: episode: 4069, duration: 0.015s, episode steps: 1, steps per second: 66, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.688 [0.103, 1.000], loss: 0.000004, mean_absolute_error: 41.735565, mean_q: -0.000421
 4071/5000: episode: 4070, duration: 0.014s, episode steps: 1, steps per second: 70, episode reward: -0.001, mean reward: -0.001 [-0.001

 4110/5000: episode: 4109, duration: 0.014s, episode steps: 1, steps per second: 72, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.787 [0.296, 1.000], loss: 0.000004, mean_absolute_error: 41.736679, mean_q: -0.000456
 4111/5000: episode: 4110, duration: 0.015s, episode steps: 1, steps per second: 65, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.420 [0.000, 0.981], loss: 0.000005, mean_absolute_error: 41.733490, mean_q: -0.000471
 4112/5000: episode: 4111, duration: 0.015s, episode steps: 1, steps per second: 65, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.217 [0.000, 0.719], loss: 0.000002, mean_absolute_error: 41.741291, mean_q: -0.000511
 4113/5000: episode: 4112, duration: 0.015s, episode steps: 1, steps per second: 69, episode reward: -0.001, mean reward: -0.001 [-0.001

 4152/5000: episode: 4151, duration: 0.015s, episode steps: 1, steps per second: 68, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.416 [0.000, 0.979], loss: 0.000003, mean_absolute_error: 41.735542, mean_q: -0.000460
 4153/5000: episode: 4152, duration: 0.017s, episode steps: 1, steps per second: 58, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.558 [0.011, 1.000], loss: 0.000005, mean_absolute_error: 41.731297, mean_q: -0.000441
 4154/5000: episode: 4153, duration: 0.014s, episode steps: 1, steps per second: 69, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.724 [0.153, 1.000], loss: 0.000002, mean_absolute_error: 41.741272, mean_q: -0.000527
 4155/5000: episode: 4154, duration: 0.014s, episode steps: 1, steps per second: 69, episode reward: -0.001, mean reward: -0.001 [-0.001

 4194/5000: episode: 4193, duration: 0.014s, episode steps: 1, steps per second: 71, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.749 [0.197, 1.000], loss: 0.000003, mean_absolute_error: 41.734665, mean_q: -0.000499
 4195/5000: episode: 4194, duration: 0.016s, episode steps: 1, steps per second: 64, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.613 [0.037, 1.000], loss: 0.000005, mean_absolute_error: 41.732933, mean_q: -0.000504
 4196/5000: episode: 4195, duration: 0.016s, episode steps: 1, steps per second: 62, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.783 [0.282, 1.000], loss: 0.000003, mean_absolute_error: 41.738312, mean_q: -0.000539
 4197/5000: episode: 4196, duration: 0.014s, episode steps: 1, steps per second: 71, episode reward: -0.001, mean reward: -0.001 [-0.001

 4236/5000: episode: 4235, duration: 0.014s, episode steps: 1, steps per second: 70, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.809 [0.389, 1.000], loss: 0.000004, mean_absolute_error: 41.732185, mean_q: -0.000921
 4237/5000: episode: 4236, duration: 0.015s, episode steps: 1, steps per second: 65, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.335 [0.000, 0.921], loss: 0.000002, mean_absolute_error: 41.741070, mean_q: -0.000607
 4238/5000: episode: 4237, duration: 0.015s, episode steps: 1, steps per second: 68, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.801 [0.351, 1.000], loss: 0.000003, mean_absolute_error: 41.736389, mean_q: -0.000634
 4239/5000: episode: 4238, duration: 0.014s, episode steps: 1, steps per second: 72, episode reward: -0.001, mean reward: -0.001 [-0.001

 4278/5000: episode: 4277, duration: 0.014s, episode steps: 1, steps per second: 71, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.704 [0.124, 1.000], loss: 0.000002, mean_absolute_error: 41.741287, mean_q: -0.000669
 4279/5000: episode: 4278, duration: 0.016s, episode steps: 1, steps per second: 61, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.329 [0.000, 0.915], loss: 0.000001, mean_absolute_error: 41.745106, mean_q: -0.000732
 4280/5000: episode: 4279, duration: 0.018s, episode steps: 1, steps per second: 55, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.592 [0.025, 1.000], loss: 0.000001, mean_absolute_error: 41.742023, mean_q: -0.000674
 4281/5000: episode: 4280, duration: 0.015s, episode steps: 1, steps per second: 68, episode reward: -0.001, mean reward: -0.001 [-0.001

 4320/5000: episode: 4319, duration: 0.015s, episode steps: 1, steps per second: 66, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.757 [0.216, 1.000], loss: 0.000002, mean_absolute_error: 41.739861, mean_q: -0.000703
 4321/5000: episode: 4320, duration: 0.017s, episode steps: 1, steps per second: 59, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.617 [0.040, 1.000], loss: 0.000004, mean_absolute_error: 41.733795, mean_q: -0.000647
 4322/5000: episode: 4321, duration: 0.015s, episode steps: 1, steps per second: 65, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.761 [0.223, 1.000], loss: 0.000001, mean_absolute_error: 41.741158, mean_q: -0.000686
 4323/5000: episode: 4322, duration: 0.014s, episode steps: 1, steps per second: 70, episode reward: -0.001, mean reward: -0.001 [-0.001

 4362/5000: episode: 4361, duration: 0.014s, episode steps: 1, steps per second: 69, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.738 [0.177, 1.000], loss: 0.000002, mean_absolute_error: 41.737930, mean_q: -0.000713
 4363/5000: episode: 4362, duration: 0.016s, episode steps: 1, steps per second: 63, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.817 [0.469, 1.000], loss: 0.000002, mean_absolute_error: 41.741943, mean_q: -0.000741
 4364/5000: episode: 4363, duration: 0.015s, episode steps: 1, steps per second: 69, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.182 [0.000, 0.493], loss: 0.000028, mean_absolute_error: 41.726910, mean_q: -0.000452
 4365/5000: episode: 4364, duration: 0.014s, episode steps: 1, steps per second: 69, episode reward: -0.001, mean reward: -0.001 [-0.001

 4404/5000: episode: 4403, duration: 0.015s, episode steps: 1, steps per second: 66, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.761 [0.224, 1.000], loss: 0.000001, mean_absolute_error: 41.742504, mean_q: -0.000769
 4405/5000: episode: 4404, duration: 0.017s, episode steps: 1, steps per second: 59, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.219 [0.000, 0.725], loss: 0.000003, mean_absolute_error: 41.736927, mean_q: -0.000775
 4406/5000: episode: 4405, duration: 0.023s, episode steps: 1, steps per second: 43, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.247 [0.000, 0.795], loss: 0.000003, mean_absolute_error: 41.739407, mean_q: -0.000766
 4407/5000: episode: 4406, duration: 0.016s, episode steps: 1, steps per second: 63, episode reward: -0.001, mean reward: -0.001 [-0.001

 4445/5000: episode: 4444, duration: 0.015s, episode steps: 1, steps per second: 65, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.810 [0.398, 1.000], loss: 0.000005, mean_absolute_error: 41.736221, mean_q: -0.000757
 4446/5000: episode: 4445, duration: 0.017s, episode steps: 1, steps per second: 58, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.249 [0.000, 0.797], loss: 0.000001, mean_absolute_error: 41.741699, mean_q: -0.000794
 4447/5000: episode: 4446, duration: 0.015s, episode steps: 1, steps per second: 65, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.250 [0.000, 0.799], loss: 0.000002, mean_absolute_error: 41.737663, mean_q: -0.000781
 4448/5000: episode: 4447, duration: 0.014s, episode steps: 1, steps per second: 69, episode reward: -0.001, mean reward: -0.001 [-0.001

 4487/5000: episode: 4486, duration: 0.015s, episode steps: 1, steps per second: 68, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.758 [0.217, 1.000], loss: 0.000001, mean_absolute_error: 41.741783, mean_q: -0.000870
 4488/5000: episode: 4487, duration: 0.016s, episode steps: 1, steps per second: 61, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.199 [0.000, 0.651], loss: 0.000002, mean_absolute_error: 41.740311, mean_q: -0.000808
 4489/5000: episode: 4488, duration: 0.015s, episode steps: 1, steps per second: 65, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.808 [0.386, 1.000], loss: 0.000005, mean_absolute_error: 41.730942, mean_q: -0.000703
 4490/5000: episode: 4489, duration: 0.014s, episode steps: 1, steps per second: 70, episode reward: -0.001, mean reward: -0.001 [-0.001

 4529/5000: episode: 4528, duration: 0.015s, episode steps: 1, steps per second: 68, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.587 [0.022, 1.000], loss: 0.000005, mean_absolute_error: 41.739288, mean_q: -0.000764
 4530/5000: episode: 4529, duration: 0.015s, episode steps: 1, steps per second: 65, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.182 [0.000, 0.521], loss: 0.000003, mean_absolute_error: 41.738625, mean_q: -0.000809
 4531/5000: episode: 4530, duration: 0.017s, episode steps: 1, steps per second: 59, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.801 [0.351, 1.000], loss: 0.000003, mean_absolute_error: 41.738945, mean_q: -0.000801
 4532/5000: episode: 4531, duration: 0.014s, episode steps: 1, steps per second: 72, episode reward: -0.001, mean reward: -0.001 [-0.001

 4571/5000: episode: 4570, duration: 0.019s, episode steps: 1, steps per second: 52, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.405 [0.000, 0.973], loss: 0.000001, mean_absolute_error: 41.739498, mean_q: -0.000787
 4572/5000: episode: 4571, duration: 0.015s, episode steps: 1, steps per second: 65, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.255 [0.000, 0.809], loss: 0.000006, mean_absolute_error: 41.734867, mean_q: -0.000733
 4573/5000: episode: 4572, duration: 0.014s, episode steps: 1, steps per second: 71, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.411 [0.000, 0.977], loss: 0.000003, mean_absolute_error: 41.733639, mean_q: -0.000711
 4574/5000: episode: 4573, duration: 0.014s, episode steps: 1, steps per second: 72, episode reward: -0.001, mean reward: -0.001 [-0.001

 4613/5000: episode: 4612, duration: 0.016s, episode steps: 1, steps per second: 64, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.286 [0.000, 0.862], loss: 0.000003, mean_absolute_error: 41.732861, mean_q: -0.000812
 4614/5000: episode: 4613, duration: 0.017s, episode steps: 1, steps per second: 59, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.335 [0.000, 0.921], loss: 0.000003, mean_absolute_error: 41.737179, mean_q: -0.000760
 4615/5000: episode: 4614, duration: 0.017s, episode steps: 1, steps per second: 59, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.183 [0.000, 0.537], loss: 0.000011, mean_absolute_error: 41.731674, mean_q: -0.000562
 4616/5000: episode: 4615, duration: 0.015s, episode steps: 1, steps per second: 66, episode reward: -0.001, mean reward: -0.001 [-0.001

 4655/5000: episode: 4654, duration: 0.015s, episode steps: 1, steps per second: 67, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.195 [0.000, 0.633], loss: 0.000002, mean_absolute_error: 41.740578, mean_q: -0.000871
 4656/5000: episode: 4655, duration: 0.017s, episode steps: 1, steps per second: 58, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.433 [0.000, 0.986], loss: 0.000006, mean_absolute_error: 41.734451, mean_q: -0.000936
 4657/5000: episode: 4656, duration: 0.015s, episode steps: 1, steps per second: 69, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.188 [0.000, 0.590], loss: 0.000003, mean_absolute_error: 41.737534, mean_q: -0.000789
 4658/5000: episode: 4657, duration: 0.015s, episode steps: 1, steps per second: 66, episode reward: -0.001, mean reward: -0.001 [-0.001

 4697/5000: episode: 4696, duration: 0.015s, episode steps: 1, steps per second: 66, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.191 [0.000, 0.609], loss: 0.000002, mean_absolute_error: 41.741043, mean_q: -0.000897
 4698/5000: episode: 4697, duration: 0.017s, episode steps: 1, steps per second: 57, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.716 [0.141, 1.000], loss: 0.000003, mean_absolute_error: 41.737751, mean_q: -0.000868
 4699/5000: episode: 4698, duration: 0.015s, episode steps: 1, steps per second: 68, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.207 [0.000, 0.682], loss: 0.000002, mean_absolute_error: 41.737343, mean_q: -0.000861
 4700/5000: episode: 4699, duration: 0.014s, episode steps: 1, steps per second: 71, episode reward: -0.001, mean reward: -0.001 [-0.001

 4739/5000: episode: 4738, duration: 0.016s, episode steps: 1, steps per second: 64, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.576 [0.017, 1.000], loss: 0.000003, mean_absolute_error: 41.733665, mean_q: -0.000873
 4740/5000: episode: 4739, duration: 0.017s, episode steps: 1, steps per second: 60, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.666 [0.080, 1.000], loss: 0.000002, mean_absolute_error: 41.739155, mean_q: -0.000835
 4741/5000: episode: 4740, duration: 0.015s, episode steps: 1, steps per second: 66, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.233 [0.000, 0.762], loss: 0.000001, mean_absolute_error: 41.743019, mean_q: -0.000872
 4742/5000: episode: 4741, duration: 0.015s, episode steps: 1, steps per second: 68, episode reward: -0.001, mean reward: -0.001 [-0.001

 4781/5000: episode: 4780, duration: 0.014s, episode steps: 1, steps per second: 69, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.532 [0.004, 1.000], loss: 0.000005, mean_absolute_error: 41.730492, mean_q: -0.000772
 4782/5000: episode: 4781, duration: 0.017s, episode steps: 1, steps per second: 58, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.812 [0.413, 1.000], loss: 0.000012, mean_absolute_error: 41.730499, mean_q: -0.000833
 4783/5000: episode: 4782, duration: 0.015s, episode steps: 1, steps per second: 67, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.615 [0.038, 1.000], loss: 0.000002, mean_absolute_error: 41.741135, mean_q: -0.000918
 4784/5000: episode: 4783, duration: 0.014s, episode steps: 1, steps per second: 71, episode reward: -0.001, mean reward: -0.001 [-0.001

 4823/5000: episode: 4822, duration: 0.014s, episode steps: 1, steps per second: 74, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.218 [0.000, 0.722], loss: 0.000002, mean_absolute_error: 41.741936, mean_q: -0.000883
 4824/5000: episode: 4823, duration: 0.015s, episode steps: 1, steps per second: 67, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.459 [0.000, 0.994], loss: 0.000005, mean_absolute_error: 41.736801, mean_q: -0.000872
 4825/5000: episode: 4824, duration: 0.015s, episode steps: 1, steps per second: 69, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.791 [0.308, 1.000], loss: 0.000002, mean_absolute_error: 41.735985, mean_q: -0.000851
 4826/5000: episode: 4825, duration: 0.014s, episode steps: 1, steps per second: 70, episode reward: -0.001, mean reward: -0.001 [-0.001

 4861/5000: episode: 4860, duration: 0.020s, episode steps: 1, steps per second: 50, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.804 [0.362, 1.000], loss: 0.000001, mean_absolute_error: 41.740776, mean_q: -0.000894
 4862/5000: episode: 4861, duration: 0.018s, episode steps: 1, steps per second: 55, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.185 [0.000, 0.555], loss: 0.000005, mean_absolute_error: 41.733192, mean_q: -0.000843
 4863/5000: episode: 4862, duration: 0.017s, episode steps: 1, steps per second: 59, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.187 [0.000, 0.581], loss: 0.000005, mean_absolute_error: 41.735046, mean_q: -0.000819
 4864/5000: episode: 4863, duration: 0.017s, episode steps: 1, steps per second: 57, episode reward: -0.001, mean reward: -0.001 [-0.001

 4899/5000: episode: 4898, duration: 0.015s, episode steps: 1, steps per second: 66, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.501 [0.000, 1.000], loss: 0.000002, mean_absolute_error: 41.738293, mean_q: -0.000864
 4900/5000: episode: 4899, duration: 0.017s, episode steps: 1, steps per second: 60, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.742 [0.184, 1.000], loss: 0.000003, mean_absolute_error: 41.737694, mean_q: -0.000852
 4901/5000: episode: 4900, duration: 0.015s, episode steps: 1, steps per second: 65, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.702 [0.121, 1.000], loss: 0.000003, mean_absolute_error: 41.738205, mean_q: -0.000848
 4902/5000: episode: 4901, duration: 0.014s, episode steps: 1, steps per second: 73, episode reward: -0.001, mean reward: -0.001 [-0.001

 4940/5000: episode: 4939, duration: 0.019s, episode steps: 1, steps per second: 52, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.238 [0.000, 0.775], loss: 0.000002, mean_absolute_error: 41.742748, mean_q: -0.000954
 4941/5000: episode: 4940, duration: 0.020s, episode steps: 1, steps per second: 51, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.352 [0.000, 0.937], loss: 0.000002, mean_absolute_error: 41.742893, mean_q: -0.000905
 4942/5000: episode: 4941, duration: 0.016s, episode steps: 1, steps per second: 63, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.311 [0.000, 0.894], loss: 0.000001, mean_absolute_error: 41.745308, mean_q: -0.000948
 4943/5000: episode: 4942, duration: 0.017s, episode steps: 1, steps per second: 59, episode reward: -0.001, mean reward: -0.001 [-0.001

 4976/5000: episode: 4975, duration: 0.019s, episode steps: 1, steps per second: 52, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.484 [0.000, 0.999], loss: 0.000003, mean_absolute_error: 41.735519, mean_q: -0.000849
 4977/5000: episode: 4976, duration: 0.019s, episode steps: 1, steps per second: 52, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.586 [0.022, 1.000], loss: 0.000008, mean_absolute_error: 41.734226, mean_q: -0.000958
 4978/5000: episode: 4977, duration: 0.016s, episode steps: 1, steps per second: 61, episode reward: -0.001, mean reward: -0.001 [-0.001, -0.001], mean action: 0.000 [0.000, 0.000], mean observation: 0.384 [0.000, 0.961], loss: 0.000007, mean_absolute_error: 41.729748, mean_q: -0.000873
 4979/5000: episode: 4978, duration: 0.015s, episode steps: 1, steps per second: 67, episode reward: -0.001, mean reward: -0.001 [-0.001

In [17]:
# After training is done, we save the final weights.
dqn.save_weights('duel_dqn_{}_weights.h5f'.format('stock_bot'), overwrite=True)

In [18]:
dqn.test(env, nb_episodes=5, visualize=False)

Testing for 5 episodes ...
Episode 1: reward: -0.001, steps: 1
Episode 2: reward: -0.001, steps: 1
Episode 3: reward: -0.001, steps: 1
Episode 4: reward: -0.001, steps: 1
Episode 5: reward: -0.001, steps: 1


<keras.callbacks.History at 0x7fc38c8f7278>