In [1]:
import gym
import numpy as np


In [2]:
%matplotlib inline
from matplotlib import animation
import matplotlib.pyplot as plt
from IPython.display import display, HTML

def display_frames_as_gif(frames):
    plt.figure(figsize=(frames[0].shape[1] / 72.0, frames[0].shape[0] / 72.0), dpi = 144)
    patch = plt.imshow(frames[0])
    plt.axis('off')
    def animate(i):
        patch.set_data(frames[i])
    anim = animation.FuncAnimation(plt.gcf(), animate, frames = len(frames), interval=50)
    plt.close(anim._fig)
    display(HTML(anim.to_jshtml()))

from gym.wrappers import AtariPreprocessing
gym.new_step_api=True
env = gym.make('Pong-v0')

H = 400 # number of hidden layer neurons
D = 80 * 80 # input dimensionality: 80x80 grid
model = {}
model['W1'] = np.random.randn(H,D) / np.sqrt(D) # "Xavier" initialization
model['W2'] = np.random.randn(H) / np.sqrt(H)
# hyperparameters
batch_size = 10 # every how many episodes to do a param update?
learning_rate = 1e-4
gamma = 0.99 # discount factor for reward
decay_rate = 0.99 # decay factor for RMSProp leaky sum of grad^2
grad_buffer = { k : np.zeros_like(v) for k,v in model.items() } # update buffers that add up gradients over a batch
rmsprop_cache = { k : np.zeros_like(v) for k,v in model.items() } # rmsprop memory

def sigmoid(x): 
  return 1.0 / (1.0 + np.exp(-x)) # sigmoid "squashing" function to interval [0,1]

def prepro(I):
  I=np.asarray(I)
  I = I[35:195] # crop
  I = I[::2,::2,0] # downsample by factor of 2
  
  I[I == 144] = 0 # erase background (background type 1)
  
  I[I == 109] = 0 # erase background (background type 2)
  
  I[I != 0] = 1 # everything else (paddles, ball) just set to 1
  return I.astype(float).ravel()

def discount_rewards(r):
  discounted_r = np.zeros_like(r)
  running_add = 0
  for t in reversed(range(0, r.size)):
    if r[t] != 0: running_add = 0 # reset the sum, since this was a game boundary (pong specific!)
    running_add = running_add * gamma + r[t]
    discounted_r[t] = running_add
  return discounted_r

def policy_forward(x):
  h = np.dot(model['W1'], x)
  h[h<0] = 0 # ReLU nonlinearity
  logp = np.dot(model['W2'], h)
  p = sigmoid(logp)
  return p, h # return probability of taking action 2, and hidden state

def policy_backward(epx, eph, epdlogp):
  """ backward pass. (eph is array of intermediate hidden states) """
  dW2 = np.dot(eph.T, epdlogp).ravel()
  dh = np.outer(epdlogp, model['W2'])
  dh[eph <= 0] = 0 # backpro prelu
  dW1 = np.dot(dh.T, epx)
  return {'W1':dW1, 'W2':dW2}

def model_step(model, observation, prev_x):

  cur_x = prepro(observation)
  x = cur_x - prev_x if prev_x is not None else np.zeros(D)
  prev_x = cur_x
  
  # forward the policy network and sample an action from the returned probability
  aprob, _ = policy_forward(x)
  action = 2 if aprob >= 0.5 else 3 # roll the dice!
  
  return action, prev_x

def play_game(env, model):
  observation = env.reset()

  frames = []
  cumulated_reward = 0

  prev_x = None # used in computing the difference frame

  for t in range(1000):
      frames.append(env.render(mode = 'rgb_array'))
      action, prev_x = model_step(model, observation, prev_x)
      observation, reward, done, info = env.step(action)
      cumulated_reward += reward
      if done:
          print("Episode finished after {} timesteps, accumulated reward = {}".format(t+1, cumulated_reward))
          break
  print("Episode finished without success, accumulated reward = {}".format(cumulated_reward))
  env.close()
  display_frames_as_gif(frames)

def train_model(env, model, total_episodes = 100):
  hist = []
  observation = env.reset()

  prev_x = None # used in computing the difference frame
  xs,hs,dlogps,drs = [],[],[],[]
  running_reward = None
  reward_sum = 0
  episode_number = 0

  while True:
  
    cur_x = prepro(observation)
    x = cur_x - prev_x if prev_x is not None else np.zeros(D)
    prev_x = cur_x

    # forward the policy network and sample an action from the returned probability
    aprob, h = policy_forward(x)
    action = 2 if np.random.uniform() < aprob else 3 # roll the dice!

    # record various intermediates (needed later for backprop)
    xs.append(x) # observation
    hs.append(h) # hidden state
    y = 1 if action == 2 else 0 # a "fake label"
    dlogps.append(y - aprob) # grad that encourages the action that was taken to be taken (see http://cs231n.github.io/neural-networks-2/#losses if confused)

    # step the environment and get new measurements
    observation, reward, done, info = env.step(action)
    reward_sum += reward

    drs.append(reward) # record reward (has to be done after we call step() to get reward for previous action)

    if done: # an episode finished
      episode_number += 1

      # stack together all inputs, hidden states, action gradients, and rewards for this episode
      epx = np.vstack(xs)
      eph = np.vstack(hs)
      epdlogp = np.vstack(dlogps)
      epr = np.vstack(drs)
      xs,hs,dlogps,drs = [],[],[],[] # reset array memory

      # compute the discounted reward backwards through time
      discounted_epr = discount_rewards(epr)
      # standardize the rewards to be unit normal (helps control the gradient estimator variance)
      discounted_epr -= np.mean(discounted_epr)
      discounted_epr /= np.std(discounted_epr)

      epdlogp *= discounted_epr # modulate the gradient with advantage (PG magic happens right here.)
      grad = policy_backward(epx, eph, epdlogp)
      for k in model: grad_buffer[k] += grad[k] # accumulate grad over batch

      # perform rmsprop parameter update every batch_size episodes
      if episode_number % batch_size == 0:
        for k,v in model.items():
          g = grad_buffer[k] # gradient
          rmsprop_cache[k] = decay_rate * rmsprop_cache[k] + (1 - decay_rate) * g**2
          model[k] += learning_rate * g / (np.sqrt(rmsprop_cache[k]) + 1e-5)
          grad_buffer[k] = np.zeros_like(v) # reset batch gradient buffer

      # boring book-keeping
      running_reward = reward_sum if running_reward is None else running_reward * 0.99 + reward_sum * 0.01
      hist.append((episode_number, reward_sum, running_reward))
      print ('episode %f, reward total was %f. running mean: %f' % (episode_number, reward_sum, running_reward))
      reward_sum = 0
      observation = env.reset() # reset env
      prev_x = None
      if episode_number == total_episodes: 
        return hist

   
    

  logger.warn(
  deprecation(
  deprecation(


In [3]:
%time hist1 = train_model(env, model, total_episodes=7000)

  logger.deprecation(


episode 1.000000, reward total was -21.000000. running mean: -21.000000
episode 2.000000, reward total was -21.000000. running mean: -21.000000
episode 3.000000, reward total was -19.000000. running mean: -20.980000
episode 4.000000, reward total was -21.000000. running mean: -20.980200
episode 5.000000, reward total was -20.000000. running mean: -20.970398
episode 6.000000, reward total was -20.000000. running mean: -20.960694
episode 7.000000, reward total was -21.000000. running mean: -20.961087
episode 8.000000, reward total was -20.000000. running mean: -20.951476
episode 9.000000, reward total was -21.000000. running mean: -20.951961
episode 10.000000, reward total was -20.000000. running mean: -20.942442
episode 11.000000, reward total was -21.000000. running mean: -20.943017
episode 12.000000, reward total was -21.000000. running mean: -20.943587
episode 13.000000, reward total was -18.000000. running mean: -20.914151
episode 14.000000, reward total was -21.000000. running mean

episode 114.000000, reward total was -21.000000. running mean: -20.710363
episode 115.000000, reward total was -21.000000. running mean: -20.713260
episode 116.000000, reward total was -21.000000. running mean: -20.716127
episode 117.000000, reward total was -21.000000. running mean: -20.718966
episode 118.000000, reward total was -21.000000. running mean: -20.721776
episode 119.000000, reward total was -21.000000. running mean: -20.724559
episode 120.000000, reward total was -20.000000. running mean: -20.717313
episode 121.000000, reward total was -21.000000. running mean: -20.720140
episode 122.000000, reward total was -21.000000. running mean: -20.722938
episode 123.000000, reward total was -20.000000. running mean: -20.715709
episode 124.000000, reward total was -16.000000. running mean: -20.668552
episode 125.000000, reward total was -21.000000. running mean: -20.671866
episode 126.000000, reward total was -19.000000. running mean: -20.655148
episode 127.000000, reward total was -

episode 225.000000, reward total was -20.000000. running mean: -20.376668
episode 226.000000, reward total was -18.000000. running mean: -20.352901
episode 227.000000, reward total was -20.000000. running mean: -20.349372
episode 228.000000, reward total was -19.000000. running mean: -20.335878
episode 229.000000, reward total was -21.000000. running mean: -20.342519
episode 230.000000, reward total was -21.000000. running mean: -20.349094
episode 231.000000, reward total was -20.000000. running mean: -20.345603
episode 232.000000, reward total was -21.000000. running mean: -20.352147
episode 233.000000, reward total was -19.000000. running mean: -20.338626
episode 234.000000, reward total was -21.000000. running mean: -20.345240
episode 235.000000, reward total was -21.000000. running mean: -20.351787
episode 236.000000, reward total was -20.000000. running mean: -20.348269
episode 237.000000, reward total was -21.000000. running mean: -20.354787
episode 238.000000, reward total was -

episode 336.000000, reward total was -21.000000. running mean: -20.315967
episode 337.000000, reward total was -21.000000. running mean: -20.322807
episode 338.000000, reward total was -21.000000. running mean: -20.329579
episode 339.000000, reward total was -19.000000. running mean: -20.316283
episode 340.000000, reward total was -21.000000. running mean: -20.323120
episode 341.000000, reward total was -21.000000. running mean: -20.329889
episode 342.000000, reward total was -21.000000. running mean: -20.336590
episode 343.000000, reward total was -20.000000. running mean: -20.333224
episode 344.000000, reward total was -20.000000. running mean: -20.329892
episode 345.000000, reward total was -19.000000. running mean: -20.316593
episode 346.000000, reward total was -19.000000. running mean: -20.303427
episode 347.000000, reward total was -21.000000. running mean: -20.310393
episode 348.000000, reward total was -20.000000. running mean: -20.307289
episode 349.000000, reward total was -

episode 447.000000, reward total was -21.000000. running mean: -20.345795
episode 448.000000, reward total was -17.000000. running mean: -20.312337
episode 449.000000, reward total was -20.000000. running mean: -20.309214
episode 450.000000, reward total was -21.000000. running mean: -20.316122
episode 451.000000, reward total was -19.000000. running mean: -20.302960
episode 452.000000, reward total was -20.000000. running mean: -20.299931
episode 453.000000, reward total was -21.000000. running mean: -20.306932
episode 454.000000, reward total was -19.000000. running mean: -20.293862
episode 455.000000, reward total was -21.000000. running mean: -20.300924
episode 456.000000, reward total was -20.000000. running mean: -20.297914
episode 457.000000, reward total was -21.000000. running mean: -20.304935
episode 458.000000, reward total was -21.000000. running mean: -20.311886
episode 459.000000, reward total was -19.000000. running mean: -20.298767
episode 460.000000, reward total was -

episode 558.000000, reward total was -20.000000. running mean: -20.293574
episode 559.000000, reward total was -21.000000. running mean: -20.300638
episode 560.000000, reward total was -20.000000. running mean: -20.297632
episode 561.000000, reward total was -21.000000. running mean: -20.304656
episode 562.000000, reward total was -21.000000. running mean: -20.311609
episode 563.000000, reward total was -19.000000. running mean: -20.298493
episode 564.000000, reward total was -21.000000. running mean: -20.305508
episode 565.000000, reward total was -21.000000. running mean: -20.312453
episode 566.000000, reward total was -21.000000. running mean: -20.319329
episode 567.000000, reward total was -20.000000. running mean: -20.316135
episode 568.000000, reward total was -21.000000. running mean: -20.322974
episode 569.000000, reward total was -19.000000. running mean: -20.309744
episode 570.000000, reward total was -21.000000. running mean: -20.316647
episode 571.000000, reward total was -

episode 669.000000, reward total was -21.000000. running mean: -20.271277
episode 670.000000, reward total was -20.000000. running mean: -20.268564
episode 671.000000, reward total was -21.000000. running mean: -20.275878
episode 672.000000, reward total was -21.000000. running mean: -20.283119
episode 673.000000, reward total was -21.000000. running mean: -20.290288
episode 674.000000, reward total was -21.000000. running mean: -20.297385
episode 675.000000, reward total was -21.000000. running mean: -20.304411
episode 676.000000, reward total was -21.000000. running mean: -20.311367
episode 677.000000, reward total was -20.000000. running mean: -20.308254
episode 678.000000, reward total was -21.000000. running mean: -20.315171
episode 679.000000, reward total was -20.000000. running mean: -20.312019
episode 680.000000, reward total was -21.000000. running mean: -20.318899
episode 681.000000, reward total was -18.000000. running mean: -20.295710
episode 682.000000, reward total was -

episode 780.000000, reward total was -21.000000. running mean: -20.185675
episode 781.000000, reward total was -21.000000. running mean: -20.193818
episode 782.000000, reward total was -21.000000. running mean: -20.201880
episode 783.000000, reward total was -21.000000. running mean: -20.209861
episode 784.000000, reward total was -21.000000. running mean: -20.217762
episode 785.000000, reward total was -21.000000. running mean: -20.225585
episode 786.000000, reward total was -21.000000. running mean: -20.233329
episode 787.000000, reward total was -21.000000. running mean: -20.240996
episode 788.000000, reward total was -21.000000. running mean: -20.248586
episode 789.000000, reward total was -20.000000. running mean: -20.246100
episode 790.000000, reward total was -21.000000. running mean: -20.253639
episode 791.000000, reward total was -21.000000. running mean: -20.261102
episode 792.000000, reward total was -18.000000. running mean: -20.238491
episode 793.000000, reward total was -

episode 891.000000, reward total was -21.000000. running mean: -20.249882
episode 892.000000, reward total was -20.000000. running mean: -20.247383
episode 893.000000, reward total was -18.000000. running mean: -20.224909
episode 894.000000, reward total was -20.000000. running mean: -20.222660
episode 895.000000, reward total was -20.000000. running mean: -20.220434
episode 896.000000, reward total was -21.000000. running mean: -20.228229
episode 897.000000, reward total was -20.000000. running mean: -20.225947
episode 898.000000, reward total was -20.000000. running mean: -20.223688
episode 899.000000, reward total was -21.000000. running mean: -20.231451
episode 900.000000, reward total was -20.000000. running mean: -20.229136
episode 901.000000, reward total was -20.000000. running mean: -20.226845
episode 902.000000, reward total was -20.000000. running mean: -20.224576
episode 903.000000, reward total was -20.000000. running mean: -20.222331
episode 904.000000, reward total was -

episode 1002.000000, reward total was -21.000000. running mean: -20.124768
episode 1003.000000, reward total was -21.000000. running mean: -20.133520
episode 1004.000000, reward total was -21.000000. running mean: -20.142185
episode 1005.000000, reward total was -19.000000. running mean: -20.130763
episode 1006.000000, reward total was -21.000000. running mean: -20.139455
episode 1007.000000, reward total was -20.000000. running mean: -20.138061
episode 1008.000000, reward total was -21.000000. running mean: -20.146680
episode 1009.000000, reward total was -20.000000. running mean: -20.145213
episode 1010.000000, reward total was -21.000000. running mean: -20.153761
episode 1011.000000, reward total was -21.000000. running mean: -20.162223
episode 1012.000000, reward total was -21.000000. running mean: -20.170601
episode 1013.000000, reward total was -21.000000. running mean: -20.178895
episode 1014.000000, reward total was -21.000000. running mean: -20.187106
episode 1015.000000, rewa

episode 1112.000000, reward total was -19.000000. running mean: -20.229001
episode 1113.000000, reward total was -20.000000. running mean: -20.226711
episode 1114.000000, reward total was -20.000000. running mean: -20.224444
episode 1115.000000, reward total was -20.000000. running mean: -20.222200
episode 1116.000000, reward total was -21.000000. running mean: -20.229978
episode 1117.000000, reward total was -19.000000. running mean: -20.217678
episode 1118.000000, reward total was -18.000000. running mean: -20.195501
episode 1119.000000, reward total was -21.000000. running mean: -20.203546
episode 1120.000000, reward total was -21.000000. running mean: -20.211511
episode 1121.000000, reward total was -17.000000. running mean: -20.179396
episode 1122.000000, reward total was -21.000000. running mean: -20.187602
episode 1123.000000, reward total was -20.000000. running mean: -20.185726
episode 1124.000000, reward total was -19.000000. running mean: -20.173868
episode 1125.000000, rewa

episode 1222.000000, reward total was -20.000000. running mean: -20.207511
episode 1223.000000, reward total was -19.000000. running mean: -20.195436
episode 1224.000000, reward total was -21.000000. running mean: -20.203482
episode 1225.000000, reward total was -19.000000. running mean: -20.191447
episode 1226.000000, reward total was -21.000000. running mean: -20.199532
episode 1227.000000, reward total was -19.000000. running mean: -20.187537
episode 1228.000000, reward total was -21.000000. running mean: -20.195662
episode 1229.000000, reward total was -21.000000. running mean: -20.203705
episode 1230.000000, reward total was -20.000000. running mean: -20.201668
episode 1231.000000, reward total was -19.000000. running mean: -20.189651
episode 1232.000000, reward total was -20.000000. running mean: -20.187755
episode 1233.000000, reward total was -21.000000. running mean: -20.195877
episode 1234.000000, reward total was -21.000000. running mean: -20.203918
episode 1235.000000, rewa

episode 1332.000000, reward total was -21.000000. running mean: -20.236377
episode 1333.000000, reward total was -16.000000. running mean: -20.194014
episode 1334.000000, reward total was -20.000000. running mean: -20.192073
episode 1335.000000, reward total was -21.000000. running mean: -20.200153
episode 1336.000000, reward total was -20.000000. running mean: -20.198151
episode 1337.000000, reward total was -21.000000. running mean: -20.206170
episode 1338.000000, reward total was -20.000000. running mean: -20.204108
episode 1339.000000, reward total was -20.000000. running mean: -20.202067
episode 1340.000000, reward total was -21.000000. running mean: -20.210046
episode 1341.000000, reward total was -20.000000. running mean: -20.207946
episode 1342.000000, reward total was -19.000000. running mean: -20.195866
episode 1343.000000, reward total was -21.000000. running mean: -20.203908
episode 1344.000000, reward total was -21.000000. running mean: -20.211869
episode 1345.000000, rewa

episode 1442.000000, reward total was -20.000000. running mean: -20.145525
episode 1443.000000, reward total was -21.000000. running mean: -20.154069
episode 1444.000000, reward total was -21.000000. running mean: -20.162529
episode 1445.000000, reward total was -21.000000. running mean: -20.170903
episode 1446.000000, reward total was -19.000000. running mean: -20.159194
episode 1447.000000, reward total was -19.000000. running mean: -20.147602
episode 1448.000000, reward total was -21.000000. running mean: -20.156126
episode 1449.000000, reward total was -20.000000. running mean: -20.154565
episode 1450.000000, reward total was -20.000000. running mean: -20.153019
episode 1451.000000, reward total was -19.000000. running mean: -20.141489
episode 1452.000000, reward total was -21.000000. running mean: -20.150074
episode 1453.000000, reward total was -20.000000. running mean: -20.148574
episode 1454.000000, reward total was -19.000000. running mean: -20.137088
episode 1455.000000, rewa

episode 1552.000000, reward total was -20.000000. running mean: -20.093072
episode 1553.000000, reward total was -19.000000. running mean: -20.082141
episode 1554.000000, reward total was -21.000000. running mean: -20.091320
episode 1555.000000, reward total was -21.000000. running mean: -20.100407
episode 1556.000000, reward total was -21.000000. running mean: -20.109403
episode 1557.000000, reward total was -21.000000. running mean: -20.118309
episode 1558.000000, reward total was -20.000000. running mean: -20.117125
episode 1559.000000, reward total was -20.000000. running mean: -20.115954
episode 1560.000000, reward total was -20.000000. running mean: -20.114795
episode 1561.000000, reward total was -20.000000. running mean: -20.113647
episode 1562.000000, reward total was -18.000000. running mean: -20.092510
episode 1563.000000, reward total was -21.000000. running mean: -20.101585
episode 1564.000000, reward total was -21.000000. running mean: -20.110569
episode 1565.000000, rewa

episode 1662.000000, reward total was -19.000000. running mean: -20.050998
episode 1663.000000, reward total was -21.000000. running mean: -20.060488
episode 1664.000000, reward total was -21.000000. running mean: -20.069883
episode 1665.000000, reward total was -21.000000. running mean: -20.079184
episode 1666.000000, reward total was -21.000000. running mean: -20.088392
episode 1667.000000, reward total was -21.000000. running mean: -20.097508
episode 1668.000000, reward total was -20.000000. running mean: -20.096533
episode 1669.000000, reward total was -21.000000. running mean: -20.105568
episode 1670.000000, reward total was -19.000000. running mean: -20.094512
episode 1671.000000, reward total was -19.000000. running mean: -20.083567
episode 1672.000000, reward total was -18.000000. running mean: -20.062731
episode 1673.000000, reward total was -21.000000. running mean: -20.072104
episode 1674.000000, reward total was -20.000000. running mean: -20.071383
episode 1675.000000, rewa

episode 1772.000000, reward total was -21.000000. running mean: -20.072165
episode 1773.000000, reward total was -21.000000. running mean: -20.081443
episode 1774.000000, reward total was -20.000000. running mean: -20.080629
episode 1775.000000, reward total was -19.000000. running mean: -20.069822
episode 1776.000000, reward total was -21.000000. running mean: -20.079124
episode 1777.000000, reward total was -21.000000. running mean: -20.088333
episode 1778.000000, reward total was -19.000000. running mean: -20.077450
episode 1779.000000, reward total was -21.000000. running mean: -20.086675
episode 1780.000000, reward total was -17.000000. running mean: -20.055808
episode 1781.000000, reward total was -20.000000. running mean: -20.055250
episode 1782.000000, reward total was -17.000000. running mean: -20.024698
episode 1783.000000, reward total was -21.000000. running mean: -20.034451
episode 1784.000000, reward total was -21.000000. running mean: -20.044106
episode 1785.000000, rewa

episode 1882.000000, reward total was -19.000000. running mean: -19.866356
episode 1883.000000, reward total was -19.000000. running mean: -19.857692
episode 1884.000000, reward total was -20.000000. running mean: -19.859115
episode 1885.000000, reward total was -21.000000. running mean: -19.870524
episode 1886.000000, reward total was -20.000000. running mean: -19.871819
episode 1887.000000, reward total was -19.000000. running mean: -19.863101
episode 1888.000000, reward total was -19.000000. running mean: -19.854470
episode 1889.000000, reward total was -20.000000. running mean: -19.855925
episode 1890.000000, reward total was -21.000000. running mean: -19.867366
episode 1891.000000, reward total was -20.000000. running mean: -19.868692
episode 1892.000000, reward total was -19.000000. running mean: -19.860005
episode 1893.000000, reward total was -21.000000. running mean: -19.871405
episode 1894.000000, reward total was -20.000000. running mean: -19.872691
episode 1895.000000, rewa

episode 1992.000000, reward total was -19.000000. running mean: -19.879321
episode 1993.000000, reward total was -21.000000. running mean: -19.890528
episode 1994.000000, reward total was -21.000000. running mean: -19.901623
episode 1995.000000, reward total was -18.000000. running mean: -19.882606
episode 1996.000000, reward total was -20.000000. running mean: -19.883780
episode 1997.000000, reward total was -20.000000. running mean: -19.884942
episode 1998.000000, reward total was -18.000000. running mean: -19.866093
episode 1999.000000, reward total was -21.000000. running mean: -19.877432
episode 2000.000000, reward total was -21.000000. running mean: -19.888658
episode 2001.000000, reward total was -19.000000. running mean: -19.879771
episode 2002.000000, reward total was -21.000000. running mean: -19.890974
episode 2003.000000, reward total was -20.000000. running mean: -19.892064
episode 2004.000000, reward total was -21.000000. running mean: -19.903143
episode 2005.000000, rewa

episode 2102.000000, reward total was -19.000000. running mean: -19.870126
episode 2103.000000, reward total was -21.000000. running mean: -19.881425
episode 2104.000000, reward total was -21.000000. running mean: -19.892610
episode 2105.000000, reward total was -19.000000. running mean: -19.883684
episode 2106.000000, reward total was -20.000000. running mean: -19.884848
episode 2107.000000, reward total was -19.000000. running mean: -19.875999
episode 2108.000000, reward total was -20.000000. running mean: -19.877239
episode 2109.000000, reward total was -20.000000. running mean: -19.878467
episode 2110.000000, reward total was -21.000000. running mean: -19.889682
episode 2111.000000, reward total was -19.000000. running mean: -19.880785
episode 2112.000000, reward total was -20.000000. running mean: -19.881977
episode 2113.000000, reward total was -20.000000. running mean: -19.883158
episode 2114.000000, reward total was -18.000000. running mean: -19.864326
episode 2115.000000, rewa

episode 2212.000000, reward total was -21.000000. running mean: -19.963772
episode 2213.000000, reward total was -19.000000. running mean: -19.954135
episode 2214.000000, reward total was -20.000000. running mean: -19.954593
episode 2215.000000, reward total was -16.000000. running mean: -19.915047
episode 2216.000000, reward total was -21.000000. running mean: -19.925897
episode 2217.000000, reward total was -20.000000. running mean: -19.926638
episode 2218.000000, reward total was -20.000000. running mean: -19.927372
episode 2219.000000, reward total was -21.000000. running mean: -19.938098
episode 2220.000000, reward total was -18.000000. running mean: -19.918717
episode 2221.000000, reward total was -20.000000. running mean: -19.919530
episode 2222.000000, reward total was -20.000000. running mean: -19.920334
episode 2223.000000, reward total was -17.000000. running mean: -19.891131
episode 2224.000000, reward total was -18.000000. running mean: -19.872220
episode 2225.000000, rewa

episode 2322.000000, reward total was -20.000000. running mean: -19.788800
episode 2323.000000, reward total was -18.000000. running mean: -19.770912
episode 2324.000000, reward total was -21.000000. running mean: -19.783203
episode 2325.000000, reward total was -21.000000. running mean: -19.795371
episode 2326.000000, reward total was -19.000000. running mean: -19.787417
episode 2327.000000, reward total was -21.000000. running mean: -19.799543
episode 2328.000000, reward total was -19.000000. running mean: -19.791548
episode 2329.000000, reward total was -20.000000. running mean: -19.793632
episode 2330.000000, reward total was -20.000000. running mean: -19.795696
episode 2331.000000, reward total was -17.000000. running mean: -19.767739
episode 2332.000000, reward total was -21.000000. running mean: -19.780062
episode 2333.000000, reward total was -20.000000. running mean: -19.782261
episode 2334.000000, reward total was -18.000000. running mean: -19.764438
episode 2335.000000, rewa

episode 2432.000000, reward total was -21.000000. running mean: -19.626819
episode 2433.000000, reward total was -20.000000. running mean: -19.630551
episode 2434.000000, reward total was -17.000000. running mean: -19.604245
episode 2435.000000, reward total was -21.000000. running mean: -19.618203
episode 2436.000000, reward total was -19.000000. running mean: -19.612021
episode 2437.000000, reward total was -18.000000. running mean: -19.595901
episode 2438.000000, reward total was -21.000000. running mean: -19.609942
episode 2439.000000, reward total was -20.000000. running mean: -19.613842
episode 2440.000000, reward total was -21.000000. running mean: -19.627704
episode 2441.000000, reward total was -19.000000. running mean: -19.621427
episode 2442.000000, reward total was -21.000000. running mean: -19.635212
episode 2443.000000, reward total was -19.000000. running mean: -19.628860
episode 2444.000000, reward total was -20.000000. running mean: -19.632572
episode 2445.000000, rewa

episode 2542.000000, reward total was -18.000000. running mean: -19.777380
episode 2543.000000, reward total was -21.000000. running mean: -19.789607
episode 2544.000000, reward total was -20.000000. running mean: -19.791711
episode 2545.000000, reward total was -19.000000. running mean: -19.783793
episode 2546.000000, reward total was -20.000000. running mean: -19.785955
episode 2547.000000, reward total was -21.000000. running mean: -19.798096
episode 2548.000000, reward total was -19.000000. running mean: -19.790115
episode 2549.000000, reward total was -18.000000. running mean: -19.772214
episode 2550.000000, reward total was -19.000000. running mean: -19.764492
episode 2551.000000, reward total was -19.000000. running mean: -19.756847
episode 2552.000000, reward total was -21.000000. running mean: -19.769278
episode 2553.000000, reward total was -21.000000. running mean: -19.781586
episode 2554.000000, reward total was -18.000000. running mean: -19.763770
episode 2555.000000, rewa

episode 2652.000000, reward total was -20.000000. running mean: -19.658120
episode 2653.000000, reward total was -21.000000. running mean: -19.671539
episode 2654.000000, reward total was -20.000000. running mean: -19.674824
episode 2655.000000, reward total was -20.000000. running mean: -19.678076
episode 2656.000000, reward total was -19.000000. running mean: -19.671295
episode 2657.000000, reward total was -21.000000. running mean: -19.684582
episode 2658.000000, reward total was -17.000000. running mean: -19.657736
episode 2659.000000, reward total was -18.000000. running mean: -19.641159
episode 2660.000000, reward total was -20.000000. running mean: -19.644747
episode 2661.000000, reward total was -19.000000. running mean: -19.638300
episode 2662.000000, reward total was -21.000000. running mean: -19.651917
episode 2663.000000, reward total was -20.000000. running mean: -19.655397
episode 2664.000000, reward total was -19.000000. running mean: -19.648843
episode 2665.000000, rewa

episode 2762.000000, reward total was -17.000000. running mean: -19.687805
episode 2763.000000, reward total was -20.000000. running mean: -19.690927
episode 2764.000000, reward total was -20.000000. running mean: -19.694018
episode 2765.000000, reward total was -20.000000. running mean: -19.697077
episode 2766.000000, reward total was -20.000000. running mean: -19.700107
episode 2767.000000, reward total was -19.000000. running mean: -19.693106
episode 2768.000000, reward total was -18.000000. running mean: -19.676175
episode 2769.000000, reward total was -21.000000. running mean: -19.689413
episode 2770.000000, reward total was -20.000000. running mean: -19.692519
episode 2771.000000, reward total was -21.000000. running mean: -19.705594
episode 2772.000000, reward total was -20.000000. running mean: -19.708538
episode 2773.000000, reward total was -21.000000. running mean: -19.721452
episode 2774.000000, reward total was -20.000000. running mean: -19.724238
episode 2775.000000, rewa

episode 2872.000000, reward total was -20.000000. running mean: -19.673181
episode 2873.000000, reward total was -21.000000. running mean: -19.686449
episode 2874.000000, reward total was -20.000000. running mean: -19.689585
episode 2875.000000, reward total was -18.000000. running mean: -19.672689
episode 2876.000000, reward total was -21.000000. running mean: -19.685962
episode 2877.000000, reward total was -21.000000. running mean: -19.699103
episode 2878.000000, reward total was -21.000000. running mean: -19.712112
episode 2879.000000, reward total was -17.000000. running mean: -19.684990
episode 2880.000000, reward total was -19.000000. running mean: -19.678141
episode 2881.000000, reward total was -20.000000. running mean: -19.681359
episode 2882.000000, reward total was -20.000000. running mean: -19.684546
episode 2883.000000, reward total was -19.000000. running mean: -19.677700
episode 2884.000000, reward total was -18.000000. running mean: -19.660923
episode 2885.000000, rewa

episode 2982.000000, reward total was -21.000000. running mean: -19.865418
episode 2983.000000, reward total was -19.000000. running mean: -19.856764
episode 2984.000000, reward total was -20.000000. running mean: -19.858196
episode 2985.000000, reward total was -18.000000. running mean: -19.839614
episode 2986.000000, reward total was -20.000000. running mean: -19.841218
episode 2987.000000, reward total was -19.000000. running mean: -19.832806
episode 2988.000000, reward total was -19.000000. running mean: -19.824478
episode 2989.000000, reward total was -18.000000. running mean: -19.806233
episode 2990.000000, reward total was -20.000000. running mean: -19.808171
episode 2991.000000, reward total was -21.000000. running mean: -19.820089
episode 2992.000000, reward total was -20.000000. running mean: -19.821888
episode 2993.000000, reward total was -21.000000. running mean: -19.833669
episode 2994.000000, reward total was -19.000000. running mean: -19.825333
episode 2995.000000, rewa

episode 3092.000000, reward total was -19.000000. running mean: -19.528432
episode 3093.000000, reward total was -20.000000. running mean: -19.533147
episode 3094.000000, reward total was -19.000000. running mean: -19.527816
episode 3095.000000, reward total was -20.000000. running mean: -19.532538
episode 3096.000000, reward total was -20.000000. running mean: -19.537212
episode 3097.000000, reward total was -21.000000. running mean: -19.551840
episode 3098.000000, reward total was -20.000000. running mean: -19.556322
episode 3099.000000, reward total was -17.000000. running mean: -19.530759
episode 3100.000000, reward total was -21.000000. running mean: -19.545451
episode 3101.000000, reward total was -21.000000. running mean: -19.559996
episode 3102.000000, reward total was -20.000000. running mean: -19.564396
episode 3103.000000, reward total was -19.000000. running mean: -19.558752
episode 3104.000000, reward total was -21.000000. running mean: -19.573165
episode 3105.000000, rewa

episode 3202.000000, reward total was -21.000000. running mean: -19.605569
episode 3203.000000, reward total was -21.000000. running mean: -19.619513
episode 3204.000000, reward total was -19.000000. running mean: -19.613318
episode 3205.000000, reward total was -19.000000. running mean: -19.607185
episode 3206.000000, reward total was -19.000000. running mean: -19.601113
episode 3207.000000, reward total was -21.000000. running mean: -19.615102
episode 3208.000000, reward total was -19.000000. running mean: -19.608951
episode 3209.000000, reward total was -19.000000. running mean: -19.602861
episode 3210.000000, reward total was -18.000000. running mean: -19.586833
episode 3211.000000, reward total was -21.000000. running mean: -19.600964
episode 3212.000000, reward total was -20.000000. running mean: -19.604955
episode 3213.000000, reward total was -18.000000. running mean: -19.588905
episode 3214.000000, reward total was -20.000000. running mean: -19.593016
episode 3215.000000, rewa

episode 3312.000000, reward total was -20.000000. running mean: -19.485119
episode 3313.000000, reward total was -18.000000. running mean: -19.470268
episode 3314.000000, reward total was -18.000000. running mean: -19.455566
episode 3315.000000, reward total was -20.000000. running mean: -19.461010
episode 3316.000000, reward total was -21.000000. running mean: -19.476400
episode 3317.000000, reward total was -21.000000. running mean: -19.491636
episode 3318.000000, reward total was -18.000000. running mean: -19.476719
episode 3319.000000, reward total was -16.000000. running mean: -19.441952
episode 3320.000000, reward total was -20.000000. running mean: -19.447533
episode 3321.000000, reward total was -19.000000. running mean: -19.443057
episode 3322.000000, reward total was -19.000000. running mean: -19.438627
episode 3323.000000, reward total was -17.000000. running mean: -19.414241
episode 3324.000000, reward total was -19.000000. running mean: -19.410098
episode 3325.000000, rewa

episode 3422.000000, reward total was -21.000000. running mean: -19.465518
episode 3423.000000, reward total was -19.000000. running mean: -19.460863
episode 3424.000000, reward total was -21.000000. running mean: -19.476255
episode 3425.000000, reward total was -19.000000. running mean: -19.471492
episode 3426.000000, reward total was -19.000000. running mean: -19.466777
episode 3427.000000, reward total was -20.000000. running mean: -19.472109
episode 3428.000000, reward total was -19.000000. running mean: -19.467388
episode 3429.000000, reward total was -21.000000. running mean: -19.482714
episode 3430.000000, reward total was -20.000000. running mean: -19.487887
episode 3431.000000, reward total was -19.000000. running mean: -19.483008
episode 3432.000000, reward total was -19.000000. running mean: -19.478178
episode 3433.000000, reward total was -20.000000. running mean: -19.483396
episode 3434.000000, reward total was -20.000000. running mean: -19.488562
episode 3435.000000, rewa

episode 3532.000000, reward total was -17.000000. running mean: -19.377485
episode 3533.000000, reward total was -20.000000. running mean: -19.383711
episode 3534.000000, reward total was -19.000000. running mean: -19.379873
episode 3535.000000, reward total was -21.000000. running mean: -19.396075
episode 3536.000000, reward total was -20.000000. running mean: -19.402114
episode 3537.000000, reward total was -20.000000. running mean: -19.408093
episode 3538.000000, reward total was -19.000000. running mean: -19.404012
episode 3539.000000, reward total was -19.000000. running mean: -19.399972
episode 3540.000000, reward total was -19.000000. running mean: -19.395972
episode 3541.000000, reward total was -21.000000. running mean: -19.412012
episode 3542.000000, reward total was -20.000000. running mean: -19.417892
episode 3543.000000, reward total was -19.000000. running mean: -19.413713
episode 3544.000000, reward total was -19.000000. running mean: -19.409576
episode 3545.000000, rewa

episode 3642.000000, reward total was -20.000000. running mean: -19.511121
episode 3643.000000, reward total was -19.000000. running mean: -19.506009
episode 3644.000000, reward total was -20.000000. running mean: -19.510949
episode 3645.000000, reward total was -18.000000. running mean: -19.495840
episode 3646.000000, reward total was -20.000000. running mean: -19.500881
episode 3647.000000, reward total was -20.000000. running mean: -19.505873
episode 3648.000000, reward total was -21.000000. running mean: -19.520814
episode 3649.000000, reward total was -17.000000. running mean: -19.495606
episode 3650.000000, reward total was -20.000000. running mean: -19.500650
episode 3651.000000, reward total was -20.000000. running mean: -19.505643
episode 3652.000000, reward total was -21.000000. running mean: -19.520587
episode 3653.000000, reward total was -20.000000. running mean: -19.525381
episode 3654.000000, reward total was -18.000000. running mean: -19.510127
episode 3655.000000, rewa

episode 3752.000000, reward total was -17.000000. running mean: -19.365843
episode 3753.000000, reward total was -19.000000. running mean: -19.362185
episode 3754.000000, reward total was -18.000000. running mean: -19.348563
episode 3755.000000, reward total was -19.000000. running mean: -19.345077
episode 3756.000000, reward total was -18.000000. running mean: -19.331627
episode 3757.000000, reward total was -20.000000. running mean: -19.338310
episode 3758.000000, reward total was -20.000000. running mean: -19.344927
episode 3759.000000, reward total was -20.000000. running mean: -19.351478
episode 3760.000000, reward total was -20.000000. running mean: -19.357963
episode 3761.000000, reward total was -20.000000. running mean: -19.364383
episode 3762.000000, reward total was -19.000000. running mean: -19.360740
episode 3763.000000, reward total was -21.000000. running mean: -19.377132
episode 3764.000000, reward total was -21.000000. running mean: -19.393361
episode 3765.000000, rewa

episode 3862.000000, reward total was -21.000000. running mean: -19.455578
episode 3863.000000, reward total was -18.000000. running mean: -19.441022
episode 3864.000000, reward total was -20.000000. running mean: -19.446612
episode 3865.000000, reward total was -19.000000. running mean: -19.442146
episode 3866.000000, reward total was -20.000000. running mean: -19.447724
episode 3867.000000, reward total was -20.000000. running mean: -19.453247
episode 3868.000000, reward total was -20.000000. running mean: -19.458715
episode 3869.000000, reward total was -16.000000. running mean: -19.424128
episode 3870.000000, reward total was -20.000000. running mean: -19.429886
episode 3871.000000, reward total was -18.000000. running mean: -19.415587
episode 3872.000000, reward total was -19.000000. running mean: -19.411432
episode 3873.000000, reward total was -21.000000. running mean: -19.427317
episode 3874.000000, reward total was -18.000000. running mean: -19.413044
episode 3875.000000, rewa

episode 3972.000000, reward total was -18.000000. running mean: -19.203600
episode 3973.000000, reward total was -19.000000. running mean: -19.201564
episode 3974.000000, reward total was -21.000000. running mean: -19.219549
episode 3975.000000, reward total was -20.000000. running mean: -19.227353
episode 3976.000000, reward total was -20.000000. running mean: -19.235080
episode 3977.000000, reward total was -20.000000. running mean: -19.242729
episode 3978.000000, reward total was -16.000000. running mean: -19.210302
episode 3979.000000, reward total was -19.000000. running mean: -19.208199
episode 3980.000000, reward total was -18.000000. running mean: -19.196117
episode 3981.000000, reward total was -21.000000. running mean: -19.214155
episode 3982.000000, reward total was -19.000000. running mean: -19.212014
episode 3983.000000, reward total was -21.000000. running mean: -19.229894
episode 3984.000000, reward total was -19.000000. running mean: -19.227595
episode 3985.000000, rewa

episode 4082.000000, reward total was -17.000000. running mean: -19.165283
episode 4083.000000, reward total was -21.000000. running mean: -19.183630
episode 4084.000000, reward total was -19.000000. running mean: -19.181794
episode 4085.000000, reward total was -21.000000. running mean: -19.199976
episode 4086.000000, reward total was -18.000000. running mean: -19.187976
episode 4087.000000, reward total was -15.000000. running mean: -19.146096
episode 4088.000000, reward total was -18.000000. running mean: -19.134635
episode 4089.000000, reward total was -20.000000. running mean: -19.143289
episode 4090.000000, reward total was -19.000000. running mean: -19.141856
episode 4091.000000, reward total was -18.000000. running mean: -19.130437
episode 4092.000000, reward total was -21.000000. running mean: -19.149133
episode 4093.000000, reward total was -18.000000. running mean: -19.137642
episode 4094.000000, reward total was -21.000000. running mean: -19.156265
episode 4095.000000, rewa

episode 4192.000000, reward total was -17.000000. running mean: -19.102262
episode 4193.000000, reward total was -20.000000. running mean: -19.111240
episode 4194.000000, reward total was -19.000000. running mean: -19.110127
episode 4195.000000, reward total was -18.000000. running mean: -19.099026
episode 4196.000000, reward total was -20.000000. running mean: -19.108036
episode 4197.000000, reward total was -20.000000. running mean: -19.116955
episode 4198.000000, reward total was -19.000000. running mean: -19.115786
episode 4199.000000, reward total was -20.000000. running mean: -19.124628
episode 4200.000000, reward total was -15.000000. running mean: -19.083382
episode 4201.000000, reward total was -21.000000. running mean: -19.102548
episode 4202.000000, reward total was -19.000000. running mean: -19.101522
episode 4203.000000, reward total was -20.000000. running mean: -19.110507
episode 4204.000000, reward total was -21.000000. running mean: -19.129402
episode 4205.000000, rewa

episode 4302.000000, reward total was -17.000000. running mean: -19.126213
episode 4303.000000, reward total was -20.000000. running mean: -19.134950
episode 4304.000000, reward total was -21.000000. running mean: -19.153601
episode 4305.000000, reward total was -19.000000. running mean: -19.152065
episode 4306.000000, reward total was -18.000000. running mean: -19.140544
episode 4307.000000, reward total was -19.000000. running mean: -19.139139
episode 4308.000000, reward total was -20.000000. running mean: -19.147747
episode 4309.000000, reward total was -14.000000. running mean: -19.096270
episode 4310.000000, reward total was -20.000000. running mean: -19.105307
episode 4311.000000, reward total was -19.000000. running mean: -19.104254
episode 4312.000000, reward total was -19.000000. running mean: -19.103212
episode 4313.000000, reward total was -15.000000. running mean: -19.062180
episode 4314.000000, reward total was -20.000000. running mean: -19.071558
episode 4315.000000, rewa

episode 4412.000000, reward total was -19.000000. running mean: -18.955327
episode 4413.000000, reward total was -21.000000. running mean: -18.975774
episode 4414.000000, reward total was -17.000000. running mean: -18.956016
episode 4415.000000, reward total was -19.000000. running mean: -18.956456
episode 4416.000000, reward total was -21.000000. running mean: -18.976891
episode 4417.000000, reward total was -20.000000. running mean: -18.987122
episode 4418.000000, reward total was -20.000000. running mean: -18.997251
episode 4419.000000, reward total was -18.000000. running mean: -18.987279
episode 4420.000000, reward total was -19.000000. running mean: -18.987406
episode 4421.000000, reward total was -21.000000. running mean: -19.007532
episode 4422.000000, reward total was -16.000000. running mean: -18.977457
episode 4423.000000, reward total was -19.000000. running mean: -18.977682
episode 4424.000000, reward total was -18.000000. running mean: -18.967905
episode 4425.000000, rewa

episode 4522.000000, reward total was -14.000000. running mean: -19.012187
episode 4523.000000, reward total was -18.000000. running mean: -19.002065
episode 4524.000000, reward total was -19.000000. running mean: -19.002044
episode 4525.000000, reward total was -20.000000. running mean: -19.012024
episode 4526.000000, reward total was -18.000000. running mean: -19.001904
episode 4527.000000, reward total was -21.000000. running mean: -19.021885
episode 4528.000000, reward total was -19.000000. running mean: -19.021666
episode 4529.000000, reward total was -19.000000. running mean: -19.021449
episode 4530.000000, reward total was -20.000000. running mean: -19.031235
episode 4531.000000, reward total was -20.000000. running mean: -19.040922
episode 4532.000000, reward total was -18.000000. running mean: -19.030513
episode 4533.000000, reward total was -19.000000. running mean: -19.030208
episode 4534.000000, reward total was -18.000000. running mean: -19.019906
episode 4535.000000, rewa

episode 4632.000000, reward total was -20.000000. running mean: -19.103258
episode 4633.000000, reward total was -21.000000. running mean: -19.122225
episode 4634.000000, reward total was -14.000000. running mean: -19.071003
episode 4635.000000, reward total was -19.000000. running mean: -19.070293
episode 4636.000000, reward total was -20.000000. running mean: -19.079590
episode 4637.000000, reward total was -16.000000. running mean: -19.048794
episode 4638.000000, reward total was -19.000000. running mean: -19.048306
episode 4639.000000, reward total was -21.000000. running mean: -19.067823
episode 4640.000000, reward total was -19.000000. running mean: -19.067145
episode 4641.000000, reward total was -19.000000. running mean: -19.066473
episode 4642.000000, reward total was -18.000000. running mean: -19.055809
episode 4643.000000, reward total was -15.000000. running mean: -19.015251
episode 4644.000000, reward total was -17.000000. running mean: -18.995098
episode 4645.000000, rewa

episode 4742.000000, reward total was -21.000000. running mean: -18.858362
episode 4743.000000, reward total was -18.000000. running mean: -18.849778
episode 4744.000000, reward total was -20.000000. running mean: -18.861280
episode 4745.000000, reward total was -19.000000. running mean: -18.862667
episode 4746.000000, reward total was -17.000000. running mean: -18.844041
episode 4747.000000, reward total was -16.000000. running mean: -18.815600
episode 4748.000000, reward total was -16.000000. running mean: -18.787444
episode 4749.000000, reward total was -21.000000. running mean: -18.809570
episode 4750.000000, reward total was -17.000000. running mean: -18.791474
episode 4751.000000, reward total was -20.000000. running mean: -18.803559
episode 4752.000000, reward total was -18.000000. running mean: -18.795524
episode 4753.000000, reward total was -19.000000. running mean: -18.797569
episode 4754.000000, reward total was -18.000000. running mean: -18.789593
episode 4755.000000, rewa

episode 4852.000000, reward total was -18.000000. running mean: -18.709906
episode 4853.000000, reward total was -15.000000. running mean: -18.672807
episode 4854.000000, reward total was -16.000000. running mean: -18.646079
episode 4855.000000, reward total was -16.000000. running mean: -18.619618
episode 4856.000000, reward total was -20.000000. running mean: -18.633422
episode 4857.000000, reward total was -19.000000. running mean: -18.637088
episode 4858.000000, reward total was -21.000000. running mean: -18.660717
episode 4859.000000, reward total was -16.000000. running mean: -18.634109
episode 4860.000000, reward total was -17.000000. running mean: -18.617768
episode 4861.000000, reward total was -18.000000. running mean: -18.611591
episode 4862.000000, reward total was -20.000000. running mean: -18.625475
episode 4863.000000, reward total was -16.000000. running mean: -18.599220
episode 4864.000000, reward total was -15.000000. running mean: -18.563228
episode 4865.000000, rewa

episode 4962.000000, reward total was -19.000000. running mean: -18.703512
episode 4963.000000, reward total was -18.000000. running mean: -18.696477
episode 4964.000000, reward total was -17.000000. running mean: -18.679512
episode 4965.000000, reward total was -21.000000. running mean: -18.702717
episode 4966.000000, reward total was -19.000000. running mean: -18.705690
episode 4967.000000, reward total was -17.000000. running mean: -18.688633
episode 4968.000000, reward total was -20.000000. running mean: -18.701746
episode 4969.000000, reward total was -17.000000. running mean: -18.684729
episode 4970.000000, reward total was -16.000000. running mean: -18.657882
episode 4971.000000, reward total was -19.000000. running mean: -18.661303
episode 4972.000000, reward total was -19.000000. running mean: -18.664690
episode 4973.000000, reward total was -18.000000. running mean: -18.658043
episode 4974.000000, reward total was -20.000000. running mean: -18.671463
episode 4975.000000, rewa

episode 5072.000000, reward total was -18.000000. running mean: -18.760584
episode 5073.000000, reward total was -18.000000. running mean: -18.752978
episode 5074.000000, reward total was -20.000000. running mean: -18.765448
episode 5075.000000, reward total was -20.000000. running mean: -18.777794
episode 5076.000000, reward total was -19.000000. running mean: -18.780016
episode 5077.000000, reward total was -19.000000. running mean: -18.782216
episode 5078.000000, reward total was -19.000000. running mean: -18.784394
episode 5079.000000, reward total was -16.000000. running mean: -18.756550
episode 5080.000000, reward total was -21.000000. running mean: -18.778984
episode 5081.000000, reward total was -17.000000. running mean: -18.761194
episode 5082.000000, reward total was -20.000000. running mean: -18.773582
episode 5083.000000, reward total was -19.000000. running mean: -18.775846
episode 5084.000000, reward total was -20.000000. running mean: -18.788088
episode 5085.000000, rewa

episode 5182.000000, reward total was -19.000000. running mean: -18.506610
episode 5183.000000, reward total was -17.000000. running mean: -18.491544
episode 5184.000000, reward total was -15.000000. running mean: -18.456628
episode 5185.000000, reward total was -18.000000. running mean: -18.452062
episode 5186.000000, reward total was -19.000000. running mean: -18.457541
episode 5187.000000, reward total was -16.000000. running mean: -18.432966
episode 5188.000000, reward total was -17.000000. running mean: -18.418636
episode 5189.000000, reward total was -15.000000. running mean: -18.384450
episode 5190.000000, reward total was -19.000000. running mean: -18.390605
episode 5191.000000, reward total was -17.000000. running mean: -18.376699
episode 5192.000000, reward total was -18.000000. running mean: -18.372932
episode 5193.000000, reward total was -17.000000. running mean: -18.359203
episode 5194.000000, reward total was -21.000000. running mean: -18.385611
episode 5195.000000, rewa

episode 5292.000000, reward total was -14.000000. running mean: -18.676525
episode 5293.000000, reward total was -15.000000. running mean: -18.639759
episode 5294.000000, reward total was -18.000000. running mean: -18.633362
episode 5295.000000, reward total was -19.000000. running mean: -18.637028
episode 5296.000000, reward total was -19.000000. running mean: -18.640658
episode 5297.000000, reward total was -19.000000. running mean: -18.644251
episode 5298.000000, reward total was -16.000000. running mean: -18.617809
episode 5299.000000, reward total was -17.000000. running mean: -18.601631
episode 5300.000000, reward total was -19.000000. running mean: -18.605614
episode 5301.000000, reward total was -18.000000. running mean: -18.599558
episode 5302.000000, reward total was -18.000000. running mean: -18.593563
episode 5303.000000, reward total was -21.000000. running mean: -18.617627
episode 5304.000000, reward total was -19.000000. running mean: -18.621451
episode 5305.000000, rewa

episode 5402.000000, reward total was -20.000000. running mean: -18.432947
episode 5403.000000, reward total was -19.000000. running mean: -18.438617
episode 5404.000000, reward total was -18.000000. running mean: -18.434231
episode 5405.000000, reward total was -20.000000. running mean: -18.449889
episode 5406.000000, reward total was -17.000000. running mean: -18.435390
episode 5407.000000, reward total was -18.000000. running mean: -18.431036
episode 5408.000000, reward total was -21.000000. running mean: -18.456726
episode 5409.000000, reward total was -19.000000. running mean: -18.462159
episode 5410.000000, reward total was -20.000000. running mean: -18.477537
episode 5411.000000, reward total was -17.000000. running mean: -18.462762
episode 5412.000000, reward total was -17.000000. running mean: -18.448134
episode 5413.000000, reward total was -17.000000. running mean: -18.433653
episode 5414.000000, reward total was -19.000000. running mean: -18.439316
episode 5415.000000, rewa

episode 5512.000000, reward total was -17.000000. running mean: -18.329470
episode 5513.000000, reward total was -21.000000. running mean: -18.356175
episode 5514.000000, reward total was -16.000000. running mean: -18.332613
episode 5515.000000, reward total was -19.000000. running mean: -18.339287
episode 5516.000000, reward total was -20.000000. running mean: -18.355894
episode 5517.000000, reward total was -19.000000. running mean: -18.362335
episode 5518.000000, reward total was -18.000000. running mean: -18.358712
episode 5519.000000, reward total was -17.000000. running mean: -18.345125
episode 5520.000000, reward total was -16.000000. running mean: -18.321674
episode 5521.000000, reward total was -17.000000. running mean: -18.308457
episode 5522.000000, reward total was -19.000000. running mean: -18.315372
episode 5523.000000, reward total was -20.000000. running mean: -18.332219
episode 5524.000000, reward total was -17.000000. running mean: -18.318896
episode 5525.000000, rewa

episode 5622.000000, reward total was -20.000000. running mean: -18.567554
episode 5623.000000, reward total was -16.000000. running mean: -18.541879
episode 5624.000000, reward total was -19.000000. running mean: -18.546460
episode 5625.000000, reward total was -15.000000. running mean: -18.510995
episode 5626.000000, reward total was -21.000000. running mean: -18.535885
episode 5627.000000, reward total was -21.000000. running mean: -18.560526
episode 5628.000000, reward total was -19.000000. running mean: -18.564921
episode 5629.000000, reward total was -17.000000. running mean: -18.549272
episode 5630.000000, reward total was -19.000000. running mean: -18.553779
episode 5631.000000, reward total was -17.000000. running mean: -18.538241
episode 5632.000000, reward total was -18.000000. running mean: -18.532859
episode 5633.000000, reward total was -19.000000. running mean: -18.537530
episode 5634.000000, reward total was -15.000000. running mean: -18.502155
episode 5635.000000, rewa

episode 5732.000000, reward total was -17.000000. running mean: -18.507829
episode 5733.000000, reward total was -17.000000. running mean: -18.492751
episode 5734.000000, reward total was -21.000000. running mean: -18.517823
episode 5735.000000, reward total was -21.000000. running mean: -18.542645
episode 5736.000000, reward total was -18.000000. running mean: -18.537218
episode 5737.000000, reward total was -16.000000. running mean: -18.511846
episode 5738.000000, reward total was -20.000000. running mean: -18.526728
episode 5739.000000, reward total was -17.000000. running mean: -18.511460
episode 5740.000000, reward total was -18.000000. running mean: -18.506346
episode 5741.000000, reward total was -20.000000. running mean: -18.521282
episode 5742.000000, reward total was -19.000000. running mean: -18.526070
episode 5743.000000, reward total was -19.000000. running mean: -18.530809
episode 5744.000000, reward total was -17.000000. running mean: -18.515501
episode 5745.000000, rewa

episode 5842.000000, reward total was -20.000000. running mean: -18.393630
episode 5843.000000, reward total was -18.000000. running mean: -18.389694
episode 5844.000000, reward total was -16.000000. running mean: -18.365797
episode 5845.000000, reward total was -15.000000. running mean: -18.332139
episode 5846.000000, reward total was -21.000000. running mean: -18.358818
episode 5847.000000, reward total was -19.000000. running mean: -18.365230
episode 5848.000000, reward total was -18.000000. running mean: -18.361577
episode 5849.000000, reward total was -18.000000. running mean: -18.357961
episode 5850.000000, reward total was -14.000000. running mean: -18.314382
episode 5851.000000, reward total was -18.000000. running mean: -18.311238
episode 5852.000000, reward total was -17.000000. running mean: -18.298126
episode 5853.000000, reward total was -15.000000. running mean: -18.265144
episode 5854.000000, reward total was -19.000000. running mean: -18.272493
episode 5855.000000, rewa

episode 5952.000000, reward total was -19.000000. running mean: -18.199972
episode 5953.000000, reward total was -20.000000. running mean: -18.217973
episode 5954.000000, reward total was -19.000000. running mean: -18.225793
episode 5955.000000, reward total was -19.000000. running mean: -18.233535
episode 5956.000000, reward total was -17.000000. running mean: -18.221200
episode 5957.000000, reward total was -19.000000. running mean: -18.228988
episode 5958.000000, reward total was -19.000000. running mean: -18.236698
episode 5959.000000, reward total was -19.000000. running mean: -18.244331
episode 5960.000000, reward total was -16.000000. running mean: -18.221887
episode 5961.000000, reward total was -21.000000. running mean: -18.249669
episode 5962.000000, reward total was -18.000000. running mean: -18.247172
episode 5963.000000, reward total was -16.000000. running mean: -18.224700
episode 5964.000000, reward total was -19.000000. running mean: -18.232453
episode 5965.000000, rewa

episode 6062.000000, reward total was -19.000000. running mean: -18.098657
episode 6063.000000, reward total was -18.000000. running mean: -18.097671
episode 6064.000000, reward total was -19.000000. running mean: -18.106694
episode 6065.000000, reward total was -19.000000. running mean: -18.115627
episode 6066.000000, reward total was -20.000000. running mean: -18.134471
episode 6067.000000, reward total was -17.000000. running mean: -18.123126
episode 6068.000000, reward total was -17.000000. running mean: -18.111895
episode 6069.000000, reward total was -18.000000. running mean: -18.110776
episode 6070.000000, reward total was -19.000000. running mean: -18.119668
episode 6071.000000, reward total was -15.000000. running mean: -18.088471
episode 6072.000000, reward total was -17.000000. running mean: -18.077587
episode 6073.000000, reward total was -17.000000. running mean: -18.066811
episode 6074.000000, reward total was -15.000000. running mean: -18.036143
episode 6075.000000, rewa

episode 6172.000000, reward total was -17.000000. running mean: -17.824319
episode 6173.000000, reward total was -20.000000. running mean: -17.846076
episode 6174.000000, reward total was -21.000000. running mean: -17.877615
episode 6175.000000, reward total was -20.000000. running mean: -17.898839
episode 6176.000000, reward total was -17.000000. running mean: -17.889850
episode 6177.000000, reward total was -18.000000. running mean: -17.890952
episode 6178.000000, reward total was -19.000000. running mean: -17.902042
episode 6179.000000, reward total was -19.000000. running mean: -17.913022
episode 6180.000000, reward total was -18.000000. running mean: -17.913892
episode 6181.000000, reward total was -15.000000. running mean: -17.884753
episode 6182.000000, reward total was -19.000000. running mean: -17.895905
episode 6183.000000, reward total was -21.000000. running mean: -17.926946
episode 6184.000000, reward total was -21.000000. running mean: -17.957677
episode 6185.000000, rewa

episode 6282.000000, reward total was -19.000000. running mean: -17.915247
episode 6283.000000, reward total was -17.000000. running mean: -17.906095
episode 6284.000000, reward total was -19.000000. running mean: -17.917034
episode 6285.000000, reward total was -17.000000. running mean: -17.907863
episode 6286.000000, reward total was -20.000000. running mean: -17.928785
episode 6287.000000, reward total was -19.000000. running mean: -17.939497
episode 6288.000000, reward total was -13.000000. running mean: -17.890102
episode 6289.000000, reward total was -21.000000. running mean: -17.921201
episode 6290.000000, reward total was -20.000000. running mean: -17.941989
episode 6291.000000, reward total was -15.000000. running mean: -17.912569
episode 6292.000000, reward total was -18.000000. running mean: -17.913443
episode 6293.000000, reward total was -18.000000. running mean: -17.914309
episode 6294.000000, reward total was -19.000000. running mean: -17.925166
episode 6295.000000, rewa

episode 6392.000000, reward total was -19.000000. running mean: -18.005922
episode 6393.000000, reward total was -19.000000. running mean: -18.015863
episode 6394.000000, reward total was -16.000000. running mean: -17.995704
episode 6395.000000, reward total was -18.000000. running mean: -17.995747
episode 6396.000000, reward total was -19.000000. running mean: -18.005789
episode 6397.000000, reward total was -17.000000. running mean: -17.995732
episode 6398.000000, reward total was -15.000000. running mean: -17.965774
episode 6399.000000, reward total was -16.000000. running mean: -17.946117
episode 6400.000000, reward total was -17.000000. running mean: -17.936655
episode 6401.000000, reward total was -20.000000. running mean: -17.957289
episode 6402.000000, reward total was -19.000000. running mean: -17.967716
episode 6403.000000, reward total was -17.000000. running mean: -17.958039
episode 6404.000000, reward total was -21.000000. running mean: -17.988458
episode 6405.000000, rewa

episode 6502.000000, reward total was -19.000000. running mean: -17.769905
episode 6503.000000, reward total was -18.000000. running mean: -17.772206
episode 6504.000000, reward total was -18.000000. running mean: -17.774484
episode 6505.000000, reward total was -19.000000. running mean: -17.786739
episode 6506.000000, reward total was -17.000000. running mean: -17.778872
episode 6507.000000, reward total was -16.000000. running mean: -17.761083
episode 6508.000000, reward total was -20.000000. running mean: -17.783472
episode 6509.000000, reward total was -19.000000. running mean: -17.795637
episode 6510.000000, reward total was -12.000000. running mean: -17.737681
episode 6511.000000, reward total was -19.000000. running mean: -17.750304
episode 6512.000000, reward total was -19.000000. running mean: -17.762801
episode 6513.000000, reward total was -20.000000. running mean: -17.785173
episode 6514.000000, reward total was -19.000000. running mean: -17.797321
episode 6515.000000, rewa

episode 6612.000000, reward total was -14.000000. running mean: -17.616815
episode 6613.000000, reward total was -18.000000. running mean: -17.620647
episode 6614.000000, reward total was -19.000000. running mean: -17.634440
episode 6615.000000, reward total was -19.000000. running mean: -17.648096
episode 6616.000000, reward total was -13.000000. running mean: -17.601615
episode 6617.000000, reward total was -18.000000. running mean: -17.605599
episode 6618.000000, reward total was -20.000000. running mean: -17.629543
episode 6619.000000, reward total was -17.000000. running mean: -17.623247
episode 6620.000000, reward total was -19.000000. running mean: -17.637015
episode 6621.000000, reward total was -15.000000. running mean: -17.610645
episode 6622.000000, reward total was -19.000000. running mean: -17.624538
episode 6623.000000, reward total was -15.000000. running mean: -17.598293
episode 6624.000000, reward total was -19.000000. running mean: -17.612310
episode 6625.000000, rewa

episode 6722.000000, reward total was -18.000000. running mean: -17.497166
episode 6723.000000, reward total was -18.000000. running mean: -17.502194
episode 6724.000000, reward total was -17.000000. running mean: -17.497172
episode 6725.000000, reward total was -16.000000. running mean: -17.482200
episode 6726.000000, reward total was -17.000000. running mean: -17.477378
episode 6727.000000, reward total was -15.000000. running mean: -17.452605
episode 6728.000000, reward total was -18.000000. running mean: -17.458078
episode 6729.000000, reward total was -17.000000. running mean: -17.453498
episode 6730.000000, reward total was -17.000000. running mean: -17.448963
episode 6731.000000, reward total was -13.000000. running mean: -17.404473
episode 6732.000000, reward total was -16.000000. running mean: -17.390428
episode 6733.000000, reward total was -18.000000. running mean: -17.396524
episode 6734.000000, reward total was -18.000000. running mean: -17.402559
episode 6735.000000, rewa

episode 6832.000000, reward total was -20.000000. running mean: -17.402350
episode 6833.000000, reward total was -20.000000. running mean: -17.428327
episode 6834.000000, reward total was -16.000000. running mean: -17.414044
episode 6835.000000, reward total was -19.000000. running mean: -17.429903
episode 6836.000000, reward total was -19.000000. running mean: -17.445604
episode 6837.000000, reward total was -18.000000. running mean: -17.451148
episode 6838.000000, reward total was -19.000000. running mean: -17.466637
episode 6839.000000, reward total was -20.000000. running mean: -17.491970
episode 6840.000000, reward total was -18.000000. running mean: -17.497050
episode 6841.000000, reward total was -19.000000. running mean: -17.512080
episode 6842.000000, reward total was -18.000000. running mean: -17.516959
episode 6843.000000, reward total was -18.000000. running mean: -17.521790
episode 6844.000000, reward total was -17.000000. running mean: -17.516572
episode 6845.000000, rewa

episode 6942.000000, reward total was -17.000000. running mean: -17.642837
episode 6943.000000, reward total was -18.000000. running mean: -17.646408
episode 6944.000000, reward total was -19.000000. running mean: -17.659944
episode 6945.000000, reward total was -19.000000. running mean: -17.673345
episode 6946.000000, reward total was -14.000000. running mean: -17.636611
episode 6947.000000, reward total was -14.000000. running mean: -17.600245
episode 6948.000000, reward total was -17.000000. running mean: -17.594243
episode 6949.000000, reward total was -14.000000. running mean: -17.558300
episode 6950.000000, reward total was -19.000000. running mean: -17.572717
episode 6951.000000, reward total was -12.000000. running mean: -17.516990
episode 6952.000000, reward total was -18.000000. running mean: -17.521820
episode 6953.000000, reward total was -18.000000. running mean: -17.526602
episode 6954.000000, reward total was -21.000000. running mean: -17.561336
episode 6955.000000, rewa