In [1]:
import gym
import numpy as np

from gym.wrappers import AtariPreprocessing
gym.new_step_api=True
env = gym.make('Pong-v0')

H = 200 # number of hidden layer neurons
D = 80 * 80 # input dimensionality: 80x80 grid
model = {}
model['W1'] = np.random.randn(H,D) / np.sqrt(D) # "Xavier" initialization
model['W2'] = np.random.randn(H) / np.sqrt(H)
# hyperparameters
batch_size = 10 # every how many episodes to do a param update?
learning_rate = 1e-4
gamma = 0.99 # discount factor for reward
decay_rate = 0.99 # decay factor for RMSProp leaky sum of grad^2
grad_buffer = { k : np.zeros_like(v) for k,v in model.items() } # update buffers that add up gradients over a batch
rmsprop_cache = { k : np.zeros_like(v) for k,v in model.items() } # rmsprop memory

def sigmoid(x): 
  return 1.0 / (1.0 + np.exp(-x)) # sigmoid "squashing" function to interval [0,1]

def prepro(I):
  I=np.asarray(I)
  I = I[35:195] # crop
  I = I[::2,::2,0] # downsample by factor of 2
  
  I[I == 144] = 0 # erase background (background type 1)
  
  I[I == 109] = 0 # erase background (background type 2)
  
  I[I != 0] = 1 # everything else (paddles, ball) just set to 1
  return I.astype(float).ravel()

def discount_rewards(r):
  discounted_r = np.zeros_like(r)
  running_add = 0
  for t in reversed(range(0, r.size)):
    if r[t] != 0: running_add = 0 # reset the sum, since this was a game boundary (pong specific!)
    running_add = running_add * gamma + r[t]
    discounted_r[t] = running_add
  return discounted_r

def policy_forward(x):
  h = np.dot(model['W1'], x)
  h[h<0] = 0 # ReLU nonlinearity
  logp = np.dot(model['W2'], h)
  p = sigmoid(logp)
  return p, h # return probability of taking action 2, and hidden state

def policy_backward(epx, eph, epdlogp):
  """ backward pass. (eph is array of intermediate hidden states) """
  dW2 = np.dot(eph.T, epdlogp).ravel()
  dh = np.outer(epdlogp, model['W2'])
  dh[eph <= 0] = 0 # backpro prelu
  dW1 = np.dot(dh.T, epx)
  return {'W1':dW1, 'W2':dW2}

def model_step(model, observation, prev_x):

  cur_x = prepro(observation)
  x = cur_x - prev_x if prev_x is not None else np.zeros(D)
  prev_x = cur_x
  
  # forward the policy network and sample an action from the returned probability
  aprob, _ = policy_forward(x)
  action = 2 if aprob >= 0.5 else 3 # roll the dice!
  
  return action, prev_x

def play_game(env, model):
  observation = env.reset()

  frames = []
  cumulated_reward = 0

  prev_x = None # used in computing the difference frame

  for t in range(1000):
      frames.append(env.render(mode = 'rgb_array'))
      action, prev_x = model_step(model, observation, prev_x)
      observation, reward, done, info = env.step(action)
      cumulated_reward += reward
      if done:
          print("Episode finished after {} timesteps, accumulated reward = {}".format(t+1, cumulated_reward))
          break
  print("Episode finished without success, accumulated reward = {}".format(cumulated_reward))
  env.close()
  display_frames_as_gif(frames)

def train_model(env, model, total_episodes = 100):
  hist = []
  observation = env.reset()

  prev_x = None # used in computing the difference frame
  xs,hs,dlogps,drs = [],[],[],[]
  running_reward = None
  reward_sum = 0
  episode_number = 0

  while True:
  
    cur_x = prepro(observation)
    x = cur_x - prev_x if prev_x is not None else np.zeros(D)
    prev_x = cur_x

    # forward the policy network and sample an action from the returned probability
    aprob, h = policy_forward(x)
    action = 2 if np.random.uniform() < aprob else 3 # roll the dice!

    # record various intermediates (needed later for backprop)
    xs.append(x) # observation
    hs.append(h) # hidden state
    y = 1 if action == 2 else 0 # a "fake label"
    dlogps.append(y - aprob) # grad that encourages the action that was taken to be taken (see http://cs231n.github.io/neural-networks-2/#losses if confused)

    # step the environment and get new measurements
    observation, reward, done, info = env.step(action)
    reward_sum += reward

    drs.append(reward) # record reward (has to be done after we call step() to get reward for previous action)

    if done: # an episode finished
      episode_number += 1

      # stack together all inputs, hidden states, action gradients, and rewards for this episode
      epx = np.vstack(xs)
      eph = np.vstack(hs)
      epdlogp = np.vstack(dlogps)
      epr = np.vstack(drs)
      xs,hs,dlogps,drs = [],[],[],[] # reset array memory

      # compute the discounted reward backwards through time
      discounted_epr = discount_rewards(epr)
      # standardize the rewards to be unit normal (helps control the gradient estimator variance)
      discounted_epr -= np.mean(discounted_epr)
      discounted_epr /= np.std(discounted_epr)

      epdlogp *= discounted_epr # modulate the gradient with advantage (PG magic happens right here.)
      grad = policy_backward(epx, eph, epdlogp)
      for k in model: grad_buffer[k] += grad[k] # accumulate grad over batch

      # perform rmsprop parameter update every batch_size episodes
      if episode_number % batch_size == 0:
        for k,v in model.items():
          g = grad_buffer[k] # gradient
          rmsprop_cache[k] = decay_rate * rmsprop_cache[k] + (1 - decay_rate) * g**2
          model[k] += learning_rate * g / (np.sqrt(rmsprop_cache[k]) + 1e-5)
          grad_buffer[k] = np.zeros_like(v) # reset batch gradient buffer

      # boring book-keeping
      running_reward = reward_sum if running_reward is None else running_reward * 0.99 + reward_sum * 0.01
      hist.append((episode_number, reward_sum, running_reward))
      print ('episode %f, reward total was %f. running mean: %f' % (episode_number, reward_sum, running_reward))
      reward_sum = 0
      observation = env.reset() # reset env
      prev_x = None
      if episode_number == total_episodes: 
        return hist

   
    

  logger.warn(
  deprecation(
  deprecation(


In [2]:
%time hist1 = train_model(env, model, total_episodes=7000)

  logger.deprecation(


episode 1.000000, reward total was -21.000000. running mean: -21.000000
episode 2.000000, reward total was -21.000000. running mean: -21.000000
episode 3.000000, reward total was -20.000000. running mean: -20.990000
episode 4.000000, reward total was -21.000000. running mean: -20.990100
episode 5.000000, reward total was -21.000000. running mean: -20.990199
episode 6.000000, reward total was -20.000000. running mean: -20.980297
episode 7.000000, reward total was -21.000000. running mean: -20.980494
episode 8.000000, reward total was -20.000000. running mean: -20.970689
episode 9.000000, reward total was -21.000000. running mean: -20.970982
episode 10.000000, reward total was -20.000000. running mean: -20.961272
episode 11.000000, reward total was -21.000000. running mean: -20.961660
episode 12.000000, reward total was -19.000000. running mean: -20.942043
episode 13.000000, reward total was -19.000000. running mean: -20.922623
episode 14.000000, reward total was -21.000000. running mean

episode 114.000000, reward total was -21.000000. running mean: -20.539297
episode 115.000000, reward total was -21.000000. running mean: -20.543904
episode 116.000000, reward total was -20.000000. running mean: -20.538465
episode 117.000000, reward total was -21.000000. running mean: -20.543081
episode 118.000000, reward total was -21.000000. running mean: -20.547650
episode 119.000000, reward total was -20.000000. running mean: -20.542173
episode 120.000000, reward total was -21.000000. running mean: -20.546752
episode 121.000000, reward total was -19.000000. running mean: -20.531284
episode 122.000000, reward total was -19.000000. running mean: -20.515971
episode 123.000000, reward total was -21.000000. running mean: -20.520812
episode 124.000000, reward total was -20.000000. running mean: -20.515603
episode 125.000000, reward total was -20.000000. running mean: -20.510447
episode 126.000000, reward total was -20.000000. running mean: -20.505343
episode 127.000000, reward total was -

episode 225.000000, reward total was -20.000000. running mean: -20.440082
episode 226.000000, reward total was -20.000000. running mean: -20.435681
episode 227.000000, reward total was -21.000000. running mean: -20.441324
episode 228.000000, reward total was -21.000000. running mean: -20.446911
episode 229.000000, reward total was -19.000000. running mean: -20.432442
episode 230.000000, reward total was -21.000000. running mean: -20.438117
episode 231.000000, reward total was -21.000000. running mean: -20.443736
episode 232.000000, reward total was -20.000000. running mean: -20.439299
episode 233.000000, reward total was -21.000000. running mean: -20.444906
episode 234.000000, reward total was -20.000000. running mean: -20.440457
episode 235.000000, reward total was -21.000000. running mean: -20.446052
episode 236.000000, reward total was -21.000000. running mean: -20.451592
episode 237.000000, reward total was -21.000000. running mean: -20.457076
episode 238.000000, reward total was -

episode 336.000000, reward total was -21.000000. running mean: -20.423884
episode 337.000000, reward total was -18.000000. running mean: -20.399645
episode 338.000000, reward total was -21.000000. running mean: -20.405648
episode 339.000000, reward total was -21.000000. running mean: -20.411592
episode 340.000000, reward total was -20.000000. running mean: -20.407476
episode 341.000000, reward total was -21.000000. running mean: -20.413401
episode 342.000000, reward total was -21.000000. running mean: -20.419267
episode 343.000000, reward total was -20.000000. running mean: -20.415075
episode 344.000000, reward total was -21.000000. running mean: -20.420924
episode 345.000000, reward total was -21.000000. running mean: -20.426715
episode 346.000000, reward total was -20.000000. running mean: -20.422447
episode 347.000000, reward total was -21.000000. running mean: -20.428223
episode 348.000000, reward total was -21.000000. running mean: -20.433941
episode 349.000000, reward total was -

episode 447.000000, reward total was -21.000000. running mean: -20.420088
episode 448.000000, reward total was -20.000000. running mean: -20.415887
episode 449.000000, reward total was -21.000000. running mean: -20.421728
episode 450.000000, reward total was -19.000000. running mean: -20.407511
episode 451.000000, reward total was -21.000000. running mean: -20.413435
episode 452.000000, reward total was -20.000000. running mean: -20.409301
episode 453.000000, reward total was -21.000000. running mean: -20.415208
episode 454.000000, reward total was -21.000000. running mean: -20.421056
episode 455.000000, reward total was -20.000000. running mean: -20.416845
episode 456.000000, reward total was -21.000000. running mean: -20.422677
episode 457.000000, reward total was -20.000000. running mean: -20.418450
episode 458.000000, reward total was -20.000000. running mean: -20.414266
episode 459.000000, reward total was -19.000000. running mean: -20.400123
episode 460.000000, reward total was -

episode 558.000000, reward total was -20.000000. running mean: -20.345784
episode 559.000000, reward total was -19.000000. running mean: -20.332326
episode 560.000000, reward total was -21.000000. running mean: -20.339002
episode 561.000000, reward total was -20.000000. running mean: -20.335612
episode 562.000000, reward total was -20.000000. running mean: -20.332256
episode 563.000000, reward total was -21.000000. running mean: -20.338934
episode 564.000000, reward total was -21.000000. running mean: -20.345544
episode 565.000000, reward total was -21.000000. running mean: -20.352089
episode 566.000000, reward total was -20.000000. running mean: -20.348568
episode 567.000000, reward total was -21.000000. running mean: -20.355082
episode 568.000000, reward total was -20.000000. running mean: -20.351532
episode 569.000000, reward total was -21.000000. running mean: -20.358016
episode 570.000000, reward total was -21.000000. running mean: -20.364436
episode 571.000000, reward total was -

episode 669.000000, reward total was -21.000000. running mean: -20.406959
episode 670.000000, reward total was -20.000000. running mean: -20.402889
episode 671.000000, reward total was -21.000000. running mean: -20.408860
episode 672.000000, reward total was -19.000000. running mean: -20.394772
episode 673.000000, reward total was -20.000000. running mean: -20.390824
episode 674.000000, reward total was -21.000000. running mean: -20.396916
episode 675.000000, reward total was -20.000000. running mean: -20.392947
episode 676.000000, reward total was -18.000000. running mean: -20.369017
episode 677.000000, reward total was -21.000000. running mean: -20.375327
episode 678.000000, reward total was -21.000000. running mean: -20.381574
episode 679.000000, reward total was -21.000000. running mean: -20.387758
episode 680.000000, reward total was -21.000000. running mean: -20.393880
episode 681.000000, reward total was -21.000000. running mean: -20.399942
episode 682.000000, reward total was -

episode 780.000000, reward total was -20.000000. running mean: -20.461392
episode 781.000000, reward total was -21.000000. running mean: -20.466778
episode 782.000000, reward total was -19.000000. running mean: -20.452110
episode 783.000000, reward total was -20.000000. running mean: -20.447589
episode 784.000000, reward total was -21.000000. running mean: -20.453113
episode 785.000000, reward total was -21.000000. running mean: -20.458582
episode 786.000000, reward total was -21.000000. running mean: -20.463997
episode 787.000000, reward total was -21.000000. running mean: -20.469357
episode 788.000000, reward total was -19.000000. running mean: -20.454663
episode 789.000000, reward total was -20.000000. running mean: -20.450116
episode 790.000000, reward total was -21.000000. running mean: -20.455615
episode 791.000000, reward total was -20.000000. running mean: -20.451059
episode 792.000000, reward total was -20.000000. running mean: -20.446548
episode 793.000000, reward total was -

episode 891.000000, reward total was -20.000000. running mean: -20.400525
episode 892.000000, reward total was -21.000000. running mean: -20.406520
episode 893.000000, reward total was -20.000000. running mean: -20.402455
episode 894.000000, reward total was -21.000000. running mean: -20.408430
episode 895.000000, reward total was -21.000000. running mean: -20.414346
episode 896.000000, reward total was -21.000000. running mean: -20.420202
episode 897.000000, reward total was -20.000000. running mean: -20.416000
episode 898.000000, reward total was -21.000000. running mean: -20.421840
episode 899.000000, reward total was -21.000000. running mean: -20.427622
episode 900.000000, reward total was -20.000000. running mean: -20.423346
episode 901.000000, reward total was -21.000000. running mean: -20.429112
episode 902.000000, reward total was -19.000000. running mean: -20.414821
episode 903.000000, reward total was -20.000000. running mean: -20.410673
episode 904.000000, reward total was -

episode 1002.000000, reward total was -21.000000. running mean: -20.259333
episode 1003.000000, reward total was -19.000000. running mean: -20.246740
episode 1004.000000, reward total was -19.000000. running mean: -20.234272
episode 1005.000000, reward total was -21.000000. running mean: -20.241930
episode 1006.000000, reward total was -20.000000. running mean: -20.239510
episode 1007.000000, reward total was -20.000000. running mean: -20.237115
episode 1008.000000, reward total was -19.000000. running mean: -20.224744
episode 1009.000000, reward total was -21.000000. running mean: -20.232497
episode 1010.000000, reward total was -21.000000. running mean: -20.240172
episode 1011.000000, reward total was -21.000000. running mean: -20.247770
episode 1012.000000, reward total was -21.000000. running mean: -20.255292
episode 1013.000000, reward total was -21.000000. running mean: -20.262739
episode 1014.000000, reward total was -21.000000. running mean: -20.270112
episode 1015.000000, rewa

episode 1112.000000, reward total was -21.000000. running mean: -20.289400
episode 1113.000000, reward total was -19.000000. running mean: -20.276506
episode 1114.000000, reward total was -20.000000. running mean: -20.273740
episode 1115.000000, reward total was -21.000000. running mean: -20.281003
episode 1116.000000, reward total was -21.000000. running mean: -20.288193
episode 1117.000000, reward total was -21.000000. running mean: -20.295311
episode 1118.000000, reward total was -19.000000. running mean: -20.282358
episode 1119.000000, reward total was -19.000000. running mean: -20.269534
episode 1120.000000, reward total was -19.000000. running mean: -20.256839
episode 1121.000000, reward total was -21.000000. running mean: -20.264271
episode 1122.000000, reward total was -18.000000. running mean: -20.241628
episode 1123.000000, reward total was -18.000000. running mean: -20.219212
episode 1124.000000, reward total was -20.000000. running mean: -20.217020
episode 1125.000000, rewa

episode 1222.000000, reward total was -21.000000. running mean: -20.171131
episode 1223.000000, reward total was -19.000000. running mean: -20.159420
episode 1224.000000, reward total was -21.000000. running mean: -20.167826
episode 1225.000000, reward total was -21.000000. running mean: -20.176148
episode 1226.000000, reward total was -21.000000. running mean: -20.184386
episode 1227.000000, reward total was -19.000000. running mean: -20.172542
episode 1228.000000, reward total was -20.000000. running mean: -20.170817
episode 1229.000000, reward total was -20.000000. running mean: -20.169109
episode 1230.000000, reward total was -19.000000. running mean: -20.157418
episode 1231.000000, reward total was -20.000000. running mean: -20.155843
episode 1232.000000, reward total was -21.000000. running mean: -20.164285
episode 1233.000000, reward total was -20.000000. running mean: -20.162642
episode 1234.000000, reward total was -21.000000. running mean: -20.171016
episode 1235.000000, rewa

episode 1332.000000, reward total was -21.000000. running mean: -20.180573
episode 1333.000000, reward total was -20.000000. running mean: -20.178767
episode 1334.000000, reward total was -18.000000. running mean: -20.156980
episode 1335.000000, reward total was -19.000000. running mean: -20.145410
episode 1336.000000, reward total was -20.000000. running mean: -20.143956
episode 1337.000000, reward total was -20.000000. running mean: -20.142516
episode 1338.000000, reward total was -21.000000. running mean: -20.151091
episode 1339.000000, reward total was -21.000000. running mean: -20.159580
episode 1340.000000, reward total was -21.000000. running mean: -20.167984
episode 1341.000000, reward total was -19.000000. running mean: -20.156304
episode 1342.000000, reward total was -20.000000. running mean: -20.154741
episode 1343.000000, reward total was -21.000000. running mean: -20.163194
episode 1344.000000, reward total was -19.000000. running mean: -20.151562
episode 1345.000000, rewa

episode 1442.000000, reward total was -21.000000. running mean: -20.160585
episode 1443.000000, reward total was -19.000000. running mean: -20.148979
episode 1444.000000, reward total was -21.000000. running mean: -20.157490
episode 1445.000000, reward total was -21.000000. running mean: -20.165915
episode 1446.000000, reward total was -21.000000. running mean: -20.174256
episode 1447.000000, reward total was -19.000000. running mean: -20.162513
episode 1448.000000, reward total was -18.000000. running mean: -20.140888
episode 1449.000000, reward total was -21.000000. running mean: -20.149479
episode 1450.000000, reward total was -20.000000. running mean: -20.147984
episode 1451.000000, reward total was -19.000000. running mean: -20.136504
episode 1452.000000, reward total was -21.000000. running mean: -20.145139
episode 1453.000000, reward total was -20.000000. running mean: -20.143688
episode 1454.000000, reward total was -21.000000. running mean: -20.152251
episode 1455.000000, rewa

episode 1552.000000, reward total was -20.000000. running mean: -20.212221
episode 1553.000000, reward total was -18.000000. running mean: -20.190098
episode 1554.000000, reward total was -20.000000. running mean: -20.188197
episode 1555.000000, reward total was -21.000000. running mean: -20.196315
episode 1556.000000, reward total was -20.000000. running mean: -20.194352
episode 1557.000000, reward total was -21.000000. running mean: -20.202409
episode 1558.000000, reward total was -20.000000. running mean: -20.200385
episode 1559.000000, reward total was -21.000000. running mean: -20.208381
episode 1560.000000, reward total was -18.000000. running mean: -20.186297
episode 1561.000000, reward total was -21.000000. running mean: -20.194434
episode 1562.000000, reward total was -20.000000. running mean: -20.192490
episode 1563.000000, reward total was -19.000000. running mean: -20.180565
episode 1564.000000, reward total was -19.000000. running mean: -20.168759
episode 1565.000000, rewa

episode 1662.000000, reward total was -21.000000. running mean: -20.151611
episode 1663.000000, reward total was -21.000000. running mean: -20.160095
episode 1664.000000, reward total was -21.000000. running mean: -20.168494
episode 1665.000000, reward total was -21.000000. running mean: -20.176809
episode 1666.000000, reward total was -16.000000. running mean: -20.135041
episode 1667.000000, reward total was -20.000000. running mean: -20.133691
episode 1668.000000, reward total was -19.000000. running mean: -20.122354
episode 1669.000000, reward total was -20.000000. running mean: -20.121130
episode 1670.000000, reward total was -20.000000. running mean: -20.119919
episode 1671.000000, reward total was -21.000000. running mean: -20.128720
episode 1672.000000, reward total was -21.000000. running mean: -20.137433
episode 1673.000000, reward total was -21.000000. running mean: -20.146058
episode 1674.000000, reward total was -21.000000. running mean: -20.154598
episode 1675.000000, rewa

episode 1772.000000, reward total was -19.000000. running mean: -20.253716
episode 1773.000000, reward total was -20.000000. running mean: -20.251179
episode 1774.000000, reward total was -19.000000. running mean: -20.238667
episode 1775.000000, reward total was -21.000000. running mean: -20.246280
episode 1776.000000, reward total was -21.000000. running mean: -20.253817
episode 1777.000000, reward total was -20.000000. running mean: -20.251279
episode 1778.000000, reward total was -20.000000. running mean: -20.248766
episode 1779.000000, reward total was -21.000000. running mean: -20.256279
episode 1780.000000, reward total was -21.000000. running mean: -20.263716
episode 1781.000000, reward total was -19.000000. running mean: -20.251079
episode 1782.000000, reward total was -19.000000. running mean: -20.238568
episode 1783.000000, reward total was -21.000000. running mean: -20.246182
episode 1784.000000, reward total was -21.000000. running mean: -20.253720
episode 1785.000000, rewa

episode 1882.000000, reward total was -21.000000. running mean: -20.139073
episode 1883.000000, reward total was -20.000000. running mean: -20.137682
episode 1884.000000, reward total was -21.000000. running mean: -20.146305
episode 1885.000000, reward total was -19.000000. running mean: -20.134842
episode 1886.000000, reward total was -20.000000. running mean: -20.133494
episode 1887.000000, reward total was -21.000000. running mean: -20.142159
episode 1888.000000, reward total was -20.000000. running mean: -20.140737
episode 1889.000000, reward total was -21.000000. running mean: -20.149330
episode 1890.000000, reward total was -21.000000. running mean: -20.157836
episode 1891.000000, reward total was -20.000000. running mean: -20.156258
episode 1892.000000, reward total was -19.000000. running mean: -20.144695
episode 1893.000000, reward total was -20.000000. running mean: -20.143249
episode 1894.000000, reward total was -19.000000. running mean: -20.131816
episode 1895.000000, rewa

episode 1992.000000, reward total was -21.000000. running mean: -20.140922
episode 1993.000000, reward total was -19.000000. running mean: -20.129513
episode 1994.000000, reward total was -20.000000. running mean: -20.128218
episode 1995.000000, reward total was -21.000000. running mean: -20.136935
episode 1996.000000, reward total was -20.000000. running mean: -20.135566
episode 1997.000000, reward total was -21.000000. running mean: -20.144210
episode 1998.000000, reward total was -20.000000. running mean: -20.142768
episode 1999.000000, reward total was -19.000000. running mean: -20.131341
episode 2000.000000, reward total was -21.000000. running mean: -20.140027
episode 2001.000000, reward total was -21.000000. running mean: -20.148627
episode 2002.000000, reward total was -21.000000. running mean: -20.157141
episode 2003.000000, reward total was -21.000000. running mean: -20.165569
episode 2004.000000, reward total was -21.000000. running mean: -20.173914
episode 2005.000000, rewa

episode 2102.000000, reward total was -20.000000. running mean: -20.104474
episode 2103.000000, reward total was -18.000000. running mean: -20.083429
episode 2104.000000, reward total was -21.000000. running mean: -20.092595
episode 2105.000000, reward total was -20.000000. running mean: -20.091669
episode 2106.000000, reward total was -21.000000. running mean: -20.100752
episode 2107.000000, reward total was -21.000000. running mean: -20.109745
episode 2108.000000, reward total was -17.000000. running mean: -20.078648
episode 2109.000000, reward total was -20.000000. running mean: -20.077861
episode 2110.000000, reward total was -21.000000. running mean: -20.087082
episode 2111.000000, reward total was -20.000000. running mean: -20.086212
episode 2112.000000, reward total was -20.000000. running mean: -20.085349
episode 2113.000000, reward total was -21.000000. running mean: -20.094496
episode 2114.000000, reward total was -19.000000. running mean: -20.083551
episode 2115.000000, rewa

episode 2212.000000, reward total was -21.000000. running mean: -20.188948
episode 2213.000000, reward total was -21.000000. running mean: -20.197058
episode 2214.000000, reward total was -21.000000. running mean: -20.205088
episode 2215.000000, reward total was -21.000000. running mean: -20.213037
episode 2216.000000, reward total was -20.000000. running mean: -20.210906
episode 2217.000000, reward total was -20.000000. running mean: -20.208797
episode 2218.000000, reward total was -19.000000. running mean: -20.196709
episode 2219.000000, reward total was -20.000000. running mean: -20.194742
episode 2220.000000, reward total was -20.000000. running mean: -20.192795
episode 2221.000000, reward total was -19.000000. running mean: -20.180867
episode 2222.000000, reward total was -21.000000. running mean: -20.189058
episode 2223.000000, reward total was -21.000000. running mean: -20.197168
episode 2224.000000, reward total was -20.000000. running mean: -20.195196
episode 2225.000000, rewa

episode 2322.000000, reward total was -20.000000. running mean: -20.079744
episode 2323.000000, reward total was -21.000000. running mean: -20.088946
episode 2324.000000, reward total was -20.000000. running mean: -20.088057
episode 2325.000000, reward total was -21.000000. running mean: -20.097176
episode 2326.000000, reward total was -21.000000. running mean: -20.106204
episode 2327.000000, reward total was -20.000000. running mean: -20.105142
episode 2328.000000, reward total was -20.000000. running mean: -20.104091
episode 2329.000000, reward total was -21.000000. running mean: -20.113050
episode 2330.000000, reward total was -21.000000. running mean: -20.121920
episode 2331.000000, reward total was -20.000000. running mean: -20.120700
episode 2332.000000, reward total was -19.000000. running mean: -20.109493
episode 2333.000000, reward total was -18.000000. running mean: -20.088398
episode 2334.000000, reward total was -19.000000. running mean: -20.077514
episode 2335.000000, rewa

episode 2432.000000, reward total was -20.000000. running mean: -20.172746
episode 2433.000000, reward total was -19.000000. running mean: -20.161018
episode 2434.000000, reward total was -19.000000. running mean: -20.149408
episode 2435.000000, reward total was -19.000000. running mean: -20.137914
episode 2436.000000, reward total was -18.000000. running mean: -20.116535
episode 2437.000000, reward total was -21.000000. running mean: -20.125370
episode 2438.000000, reward total was -21.000000. running mean: -20.134116
episode 2439.000000, reward total was -20.000000. running mean: -20.132775
episode 2440.000000, reward total was -18.000000. running mean: -20.111447
episode 2441.000000, reward total was -17.000000. running mean: -20.080332
episode 2442.000000, reward total was -19.000000. running mean: -20.069529
episode 2443.000000, reward total was -21.000000. running mean: -20.078834
episode 2444.000000, reward total was -21.000000. running mean: -20.088046
episode 2445.000000, rewa

episode 2542.000000, reward total was -21.000000. running mean: -20.090047
episode 2543.000000, reward total was -21.000000. running mean: -20.099147
episode 2544.000000, reward total was -20.000000. running mean: -20.098155
episode 2545.000000, reward total was -20.000000. running mean: -20.097174
episode 2546.000000, reward total was -20.000000. running mean: -20.096202
episode 2547.000000, reward total was -20.000000. running mean: -20.095240
episode 2548.000000, reward total was -21.000000. running mean: -20.104287
episode 2549.000000, reward total was -21.000000. running mean: -20.113245
episode 2550.000000, reward total was -20.000000. running mean: -20.112112
episode 2551.000000, reward total was -18.000000. running mean: -20.090991
episode 2552.000000, reward total was -19.000000. running mean: -20.080081
episode 2553.000000, reward total was -21.000000. running mean: -20.089280
episode 2554.000000, reward total was -20.000000. running mean: -20.088387
episode 2555.000000, rewa

episode 2652.000000, reward total was -20.000000. running mean: -20.067161
episode 2653.000000, reward total was -20.000000. running mean: -20.066489
episode 2654.000000, reward total was -18.000000. running mean: -20.045824
episode 2655.000000, reward total was -20.000000. running mean: -20.045366
episode 2656.000000, reward total was -21.000000. running mean: -20.054912
episode 2657.000000, reward total was -21.000000. running mean: -20.064363
episode 2658.000000, reward total was -21.000000. running mean: -20.073720
episode 2659.000000, reward total was -19.000000. running mean: -20.062982
episode 2660.000000, reward total was -19.000000. running mean: -20.052353
episode 2661.000000, reward total was -21.000000. running mean: -20.061829
episode 2662.000000, reward total was -18.000000. running mean: -20.041211
episode 2663.000000, reward total was -21.000000. running mean: -20.050799
episode 2664.000000, reward total was -21.000000. running mean: -20.060291
episode 2665.000000, rewa

episode 2762.000000, reward total was -21.000000. running mean: -20.108465
episode 2763.000000, reward total was -21.000000. running mean: -20.117380
episode 2764.000000, reward total was -20.000000. running mean: -20.116206
episode 2765.000000, reward total was -21.000000. running mean: -20.125044
episode 2766.000000, reward total was -21.000000. running mean: -20.133794
episode 2767.000000, reward total was -20.000000. running mean: -20.132456
episode 2768.000000, reward total was -20.000000. running mean: -20.131131
episode 2769.000000, reward total was -20.000000. running mean: -20.129820
episode 2770.000000, reward total was -19.000000. running mean: -20.118522
episode 2771.000000, reward total was -21.000000. running mean: -20.127337
episode 2772.000000, reward total was -20.000000. running mean: -20.126063
episode 2773.000000, reward total was -20.000000. running mean: -20.124803
episode 2774.000000, reward total was -21.000000. running mean: -20.133555
episode 2775.000000, rewa

episode 2872.000000, reward total was -19.000000. running mean: -20.101764
episode 2873.000000, reward total was -21.000000. running mean: -20.110747
episode 2874.000000, reward total was -20.000000. running mean: -20.109639
episode 2875.000000, reward total was -20.000000. running mean: -20.108543
episode 2876.000000, reward total was -20.000000. running mean: -20.107457
episode 2877.000000, reward total was -21.000000. running mean: -20.116383
episode 2878.000000, reward total was -20.000000. running mean: -20.115219
episode 2879.000000, reward total was -18.000000. running mean: -20.094067
episode 2880.000000, reward total was -18.000000. running mean: -20.073126
episode 2881.000000, reward total was -21.000000. running mean: -20.082395
episode 2882.000000, reward total was -21.000000. running mean: -20.091571
episode 2883.000000, reward total was -20.000000. running mean: -20.090655
episode 2884.000000, reward total was -21.000000. running mean: -20.099749
episode 2885.000000, rewa

episode 2982.000000, reward total was -19.000000. running mean: -20.063854
episode 2983.000000, reward total was -20.000000. running mean: -20.063215
episode 2984.000000, reward total was -20.000000. running mean: -20.062583
episode 2985.000000, reward total was -20.000000. running mean: -20.061957
episode 2986.000000, reward total was -21.000000. running mean: -20.071338
episode 2987.000000, reward total was -20.000000. running mean: -20.070625
episode 2988.000000, reward total was -20.000000. running mean: -20.069918
episode 2989.000000, reward total was -19.000000. running mean: -20.059219
episode 2990.000000, reward total was -18.000000. running mean: -20.038627
episode 2991.000000, reward total was -20.000000. running mean: -20.038241
episode 2992.000000, reward total was -18.000000. running mean: -20.017858
episode 2993.000000, reward total was -21.000000. running mean: -20.027680
episode 2994.000000, reward total was -20.000000. running mean: -20.027403
episode 2995.000000, rewa

episode 3092.000000, reward total was -20.000000. running mean: -19.995297
episode 3093.000000, reward total was -19.000000. running mean: -19.985344
episode 3094.000000, reward total was -19.000000. running mean: -19.975490
episode 3095.000000, reward total was -20.000000. running mean: -19.975735
episode 3096.000000, reward total was -21.000000. running mean: -19.985978
episode 3097.000000, reward total was -21.000000. running mean: -19.996118
episode 3098.000000, reward total was -20.000000. running mean: -19.996157
episode 3099.000000, reward total was -19.000000. running mean: -19.986195
episode 3100.000000, reward total was -20.000000. running mean: -19.986334
episode 3101.000000, reward total was -19.000000. running mean: -19.976470
episode 3102.000000, reward total was -21.000000. running mean: -19.986705
episode 3103.000000, reward total was -17.000000. running mean: -19.956838
episode 3104.000000, reward total was -21.000000. running mean: -19.967270
episode 3105.000000, rewa

episode 3202.000000, reward total was -21.000000. running mean: -20.096438
episode 3203.000000, reward total was -20.000000. running mean: -20.095473
episode 3204.000000, reward total was -21.000000. running mean: -20.104519
episode 3205.000000, reward total was -21.000000. running mean: -20.113473
episode 3206.000000, reward total was -21.000000. running mean: -20.122339
episode 3207.000000, reward total was -18.000000. running mean: -20.101115
episode 3208.000000, reward total was -21.000000. running mean: -20.110104
episode 3209.000000, reward total was -20.000000. running mean: -20.109003
episode 3210.000000, reward total was -21.000000. running mean: -20.117913
episode 3211.000000, reward total was -19.000000. running mean: -20.106734
episode 3212.000000, reward total was -20.000000. running mean: -20.105667
episode 3213.000000, reward total was -21.000000. running mean: -20.114610
episode 3214.000000, reward total was -18.000000. running mean: -20.093464
episode 3215.000000, rewa

episode 3312.000000, reward total was -21.000000. running mean: -20.175831
episode 3313.000000, reward total was -20.000000. running mean: -20.174073
episode 3314.000000, reward total was -21.000000. running mean: -20.182332
episode 3315.000000, reward total was -20.000000. running mean: -20.180509
episode 3316.000000, reward total was -21.000000. running mean: -20.188704
episode 3317.000000, reward total was -18.000000. running mean: -20.166817
episode 3318.000000, reward total was -21.000000. running mean: -20.175148
episode 3319.000000, reward total was -19.000000. running mean: -20.163397
episode 3320.000000, reward total was -20.000000. running mean: -20.161763
episode 3321.000000, reward total was -19.000000. running mean: -20.150145
episode 3322.000000, reward total was -19.000000. running mean: -20.138644
episode 3323.000000, reward total was -21.000000. running mean: -20.147258
episode 3324.000000, reward total was -20.000000. running mean: -20.145785
episode 3325.000000, rewa

episode 3422.000000, reward total was -19.000000. running mean: -19.972201
episode 3423.000000, reward total was -19.000000. running mean: -19.962479
episode 3424.000000, reward total was -20.000000. running mean: -19.962854
episode 3425.000000, reward total was -19.000000. running mean: -19.953225
episode 3426.000000, reward total was -21.000000. running mean: -19.963693
episode 3427.000000, reward total was -21.000000. running mean: -19.974056
episode 3428.000000, reward total was -21.000000. running mean: -19.984316
episode 3429.000000, reward total was -20.000000. running mean: -19.984472
episode 3430.000000, reward total was -20.000000. running mean: -19.984628
episode 3431.000000, reward total was -21.000000. running mean: -19.994781
episode 3432.000000, reward total was -18.000000. running mean: -19.974834
episode 3433.000000, reward total was -20.000000. running mean: -19.975085
episode 3434.000000, reward total was -20.000000. running mean: -19.975334
episode 3435.000000, rewa

episode 3532.000000, reward total was -21.000000. running mean: -20.071801
episode 3533.000000, reward total was -21.000000. running mean: -20.081083
episode 3534.000000, reward total was -21.000000. running mean: -20.090272
episode 3535.000000, reward total was -20.000000. running mean: -20.089369
episode 3536.000000, reward total was -20.000000. running mean: -20.088475
episode 3537.000000, reward total was -21.000000. running mean: -20.097591
episode 3538.000000, reward total was -20.000000. running mean: -20.096615
episode 3539.000000, reward total was -19.000000. running mean: -20.085649
episode 3540.000000, reward total was -21.000000. running mean: -20.094792
episode 3541.000000, reward total was -20.000000. running mean: -20.093844
episode 3542.000000, reward total was -20.000000. running mean: -20.092906
episode 3543.000000, reward total was -19.000000. running mean: -20.081977
episode 3544.000000, reward total was -20.000000. running mean: -20.081157
episode 3545.000000, rewa

episode 3642.000000, reward total was -20.000000. running mean: -19.991967
episode 3643.000000, reward total was -21.000000. running mean: -20.002047
episode 3644.000000, reward total was -18.000000. running mean: -19.982026
episode 3645.000000, reward total was -19.000000. running mean: -19.972206
episode 3646.000000, reward total was -19.000000. running mean: -19.962484
episode 3647.000000, reward total was -19.000000. running mean: -19.952859
episode 3648.000000, reward total was -21.000000. running mean: -19.963331
episode 3649.000000, reward total was -21.000000. running mean: -19.973697
episode 3650.000000, reward total was -20.000000. running mean: -19.973960
episode 3651.000000, reward total was -21.000000. running mean: -19.984221
episode 3652.000000, reward total was -21.000000. running mean: -19.994379
episode 3653.000000, reward total was -18.000000. running mean: -19.974435
episode 3654.000000, reward total was -20.000000. running mean: -19.974690
episode 3655.000000, rewa

episode 3752.000000, reward total was -21.000000. running mean: -20.073036
episode 3753.000000, reward total was -21.000000. running mean: -20.082305
episode 3754.000000, reward total was -18.000000. running mean: -20.061482
episode 3755.000000, reward total was -20.000000. running mean: -20.060867
episode 3756.000000, reward total was -21.000000. running mean: -20.070259
episode 3757.000000, reward total was -21.000000. running mean: -20.079556
episode 3758.000000, reward total was -21.000000. running mean: -20.088761
episode 3759.000000, reward total was -21.000000. running mean: -20.097873
episode 3760.000000, reward total was -18.000000. running mean: -20.076894
episode 3761.000000, reward total was -20.000000. running mean: -20.076125
episode 3762.000000, reward total was -19.000000. running mean: -20.065364
episode 3763.000000, reward total was -20.000000. running mean: -20.064710
episode 3764.000000, reward total was -18.000000. running mean: -20.044063
episode 3765.000000, rewa

episode 3862.000000, reward total was -21.000000. running mean: -20.015088
episode 3863.000000, reward total was -21.000000. running mean: -20.024937
episode 3864.000000, reward total was -19.000000. running mean: -20.014687
episode 3865.000000, reward total was -19.000000. running mean: -20.004540
episode 3866.000000, reward total was -20.000000. running mean: -20.004495
episode 3867.000000, reward total was -21.000000. running mean: -20.014450
episode 3868.000000, reward total was -21.000000. running mean: -20.024306
episode 3869.000000, reward total was -19.000000. running mean: -20.014063
episode 3870.000000, reward total was -21.000000. running mean: -20.023922
episode 3871.000000, reward total was -21.000000. running mean: -20.033683
episode 3872.000000, reward total was -21.000000. running mean: -20.043346
episode 3873.000000, reward total was -20.000000. running mean: -20.042912
episode 3874.000000, reward total was -21.000000. running mean: -20.052483
episode 3875.000000, rewa

episode 3972.000000, reward total was -21.000000. running mean: -19.958439
episode 3973.000000, reward total was -19.000000. running mean: -19.948855
episode 3974.000000, reward total was -20.000000. running mean: -19.949366
episode 3975.000000, reward total was -21.000000. running mean: -19.959873
episode 3976.000000, reward total was -21.000000. running mean: -19.970274
episode 3977.000000, reward total was -21.000000. running mean: -19.980571
episode 3978.000000, reward total was -21.000000. running mean: -19.990766
episode 3979.000000, reward total was -19.000000. running mean: -19.980858
episode 3980.000000, reward total was -21.000000. running mean: -19.991049
episode 3981.000000, reward total was -18.000000. running mean: -19.971139
episode 3982.000000, reward total was -18.000000. running mean: -19.951427
episode 3983.000000, reward total was -18.000000. running mean: -19.931913
episode 3984.000000, reward total was -21.000000. running mean: -19.942594
episode 3985.000000, rewa

episode 4082.000000, reward total was -21.000000. running mean: -19.991862
episode 4083.000000, reward total was -21.000000. running mean: -20.001943
episode 4084.000000, reward total was -20.000000. running mean: -20.001924
episode 4085.000000, reward total was -19.000000. running mean: -19.991904
episode 4086.000000, reward total was -20.000000. running mean: -19.991985
episode 4087.000000, reward total was -20.000000. running mean: -19.992065
episode 4088.000000, reward total was -21.000000. running mean: -20.002145
episode 4089.000000, reward total was -21.000000. running mean: -20.012123
episode 4090.000000, reward total was -21.000000. running mean: -20.022002
episode 4091.000000, reward total was -18.000000. running mean: -20.001782
episode 4092.000000, reward total was -20.000000. running mean: -20.001764
episode 4093.000000, reward total was -20.000000. running mean: -20.001747
episode 4094.000000, reward total was -21.000000. running mean: -20.011729
episode 4095.000000, rewa

episode 4192.000000, reward total was -20.000000. running mean: -19.916444
episode 4193.000000, reward total was -21.000000. running mean: -19.927279
episode 4194.000000, reward total was -18.000000. running mean: -19.908006
episode 4195.000000, reward total was -21.000000. running mean: -19.918926
episode 4196.000000, reward total was -17.000000. running mean: -19.889737
episode 4197.000000, reward total was -21.000000. running mean: -19.900840
episode 4198.000000, reward total was -19.000000. running mean: -19.891831
episode 4199.000000, reward total was -20.000000. running mean: -19.892913
episode 4200.000000, reward total was -20.000000. running mean: -19.893984
episode 4201.000000, reward total was -19.000000. running mean: -19.885044
episode 4202.000000, reward total was -21.000000. running mean: -19.896194
episode 4203.000000, reward total was -21.000000. running mean: -19.907232
episode 4204.000000, reward total was -16.000000. running mean: -19.868159
episode 4205.000000, rewa

episode 4302.000000, reward total was -14.000000. running mean: -19.774418
episode 4303.000000, reward total was -21.000000. running mean: -19.786674
episode 4304.000000, reward total was -21.000000. running mean: -19.798807
episode 4305.000000, reward total was -21.000000. running mean: -19.810819
episode 4306.000000, reward total was -20.000000. running mean: -19.812711
episode 4307.000000, reward total was -19.000000. running mean: -19.804584
episode 4308.000000, reward total was -18.000000. running mean: -19.786538
episode 4309.000000, reward total was -20.000000. running mean: -19.788672
episode 4310.000000, reward total was -21.000000. running mean: -19.800786
episode 4311.000000, reward total was -19.000000. running mean: -19.792778
episode 4312.000000, reward total was -19.000000. running mean: -19.784850
episode 4313.000000, reward total was -21.000000. running mean: -19.797002
episode 4314.000000, reward total was -17.000000. running mean: -19.769032
episode 4315.000000, rewa

episode 4412.000000, reward total was -21.000000. running mean: -19.686285
episode 4413.000000, reward total was -19.000000. running mean: -19.679422
episode 4414.000000, reward total was -21.000000. running mean: -19.692628
episode 4415.000000, reward total was -21.000000. running mean: -19.705701
episode 4416.000000, reward total was -21.000000. running mean: -19.718644
episode 4417.000000, reward total was -21.000000. running mean: -19.731458
episode 4418.000000, reward total was -21.000000. running mean: -19.744143
episode 4419.000000, reward total was -20.000000. running mean: -19.746702
episode 4420.000000, reward total was -20.000000. running mean: -19.749235
episode 4421.000000, reward total was -19.000000. running mean: -19.741743
episode 4422.000000, reward total was -20.000000. running mean: -19.744325
episode 4423.000000, reward total was -21.000000. running mean: -19.756882
episode 4424.000000, reward total was -21.000000. running mean: -19.769313
episode 4425.000000, rewa

episode 4522.000000, reward total was -21.000000. running mean: -19.631941
episode 4523.000000, reward total was -20.000000. running mean: -19.635622
episode 4524.000000, reward total was -18.000000. running mean: -19.619265
episode 4525.000000, reward total was -20.000000. running mean: -19.623073
episode 4526.000000, reward total was -17.000000. running mean: -19.596842
episode 4527.000000, reward total was -20.000000. running mean: -19.600874
episode 4528.000000, reward total was -20.000000. running mean: -19.604865
episode 4529.000000, reward total was -19.000000. running mean: -19.598816
episode 4530.000000, reward total was -19.000000. running mean: -19.592828
episode 4531.000000, reward total was -21.000000. running mean: -19.606900
episode 4532.000000, reward total was -20.000000. running mean: -19.610831
episode 4533.000000, reward total was -20.000000. running mean: -19.614722
episode 4534.000000, reward total was -18.000000. running mean: -19.598575
episode 4535.000000, rewa

episode 4632.000000, reward total was -21.000000. running mean: -19.820688
episode 4633.000000, reward total was -18.000000. running mean: -19.802481
episode 4634.000000, reward total was -20.000000. running mean: -19.804457
episode 4635.000000, reward total was -20.000000. running mean: -19.806412
episode 4636.000000, reward total was -21.000000. running mean: -19.818348
episode 4637.000000, reward total was -21.000000. running mean: -19.830164
episode 4638.000000, reward total was -19.000000. running mean: -19.821863
episode 4639.000000, reward total was -19.000000. running mean: -19.813644
episode 4640.000000, reward total was -17.000000. running mean: -19.785508
episode 4641.000000, reward total was -20.000000. running mean: -19.787653
episode 4642.000000, reward total was -20.000000. running mean: -19.789776
episode 4643.000000, reward total was -21.000000. running mean: -19.801878
episode 4644.000000, reward total was -20.000000. running mean: -19.803860
episode 4645.000000, rewa

episode 4742.000000, reward total was -15.000000. running mean: -19.783002
episode 4743.000000, reward total was -20.000000. running mean: -19.785172
episode 4744.000000, reward total was -18.000000. running mean: -19.767321
episode 4745.000000, reward total was -21.000000. running mean: -19.779647
episode 4746.000000, reward total was -21.000000. running mean: -19.791851
episode 4747.000000, reward total was -20.000000. running mean: -19.793932
episode 4748.000000, reward total was -21.000000. running mean: -19.805993
episode 4749.000000, reward total was -20.000000. running mean: -19.807933
episode 4750.000000, reward total was -19.000000. running mean: -19.799854
episode 4751.000000, reward total was -20.000000. running mean: -19.801855
episode 4752.000000, reward total was -19.000000. running mean: -19.793837
episode 4753.000000, reward total was -20.000000. running mean: -19.795898
episode 4754.000000, reward total was -21.000000. running mean: -19.807939
episode 4755.000000, rewa

episode 4852.000000, reward total was -17.000000. running mean: -19.780304
episode 4853.000000, reward total was -21.000000. running mean: -19.792501
episode 4854.000000, reward total was -19.000000. running mean: -19.784576
episode 4855.000000, reward total was -19.000000. running mean: -19.776730
episode 4856.000000, reward total was -19.000000. running mean: -19.768963
episode 4857.000000, reward total was -19.000000. running mean: -19.761274
episode 4858.000000, reward total was -19.000000. running mean: -19.753661
episode 4859.000000, reward total was -21.000000. running mean: -19.766124
episode 4860.000000, reward total was -21.000000. running mean: -19.778463
episode 4861.000000, reward total was -18.000000. running mean: -19.760678
episode 4862.000000, reward total was -19.000000. running mean: -19.753072
episode 4863.000000, reward total was -21.000000. running mean: -19.765541
episode 4864.000000, reward total was -21.000000. running mean: -19.777885
episode 4865.000000, rewa

episode 4962.000000, reward total was -20.000000. running mean: -19.756056
episode 4963.000000, reward total was -21.000000. running mean: -19.768495
episode 4964.000000, reward total was -21.000000. running mean: -19.780810
episode 4965.000000, reward total was -20.000000. running mean: -19.783002
episode 4966.000000, reward total was -18.000000. running mean: -19.765172
episode 4967.000000, reward total was -18.000000. running mean: -19.747520
episode 4968.000000, reward total was -21.000000. running mean: -19.760045
episode 4969.000000, reward total was -21.000000. running mean: -19.772445
episode 4970.000000, reward total was -19.000000. running mean: -19.764720
episode 4971.000000, reward total was -18.000000. running mean: -19.747073
episode 4972.000000, reward total was -21.000000. running mean: -19.759602
episode 4973.000000, reward total was -20.000000. running mean: -19.762006
episode 4974.000000, reward total was -19.000000. running mean: -19.754386
episode 4975.000000, rewa

episode 5072.000000, reward total was -19.000000. running mean: -19.755367
episode 5073.000000, reward total was -21.000000. running mean: -19.767814
episode 5074.000000, reward total was -21.000000. running mean: -19.780135
episode 5075.000000, reward total was -21.000000. running mean: -19.792334
episode 5076.000000, reward total was -20.000000. running mean: -19.794411
episode 5077.000000, reward total was -20.000000. running mean: -19.796467
episode 5078.000000, reward total was -20.000000. running mean: -19.798502
episode 5079.000000, reward total was -18.000000. running mean: -19.780517
episode 5080.000000, reward total was -20.000000. running mean: -19.782712
episode 5081.000000, reward total was -20.000000. running mean: -19.784885
episode 5082.000000, reward total was -17.000000. running mean: -19.757036
episode 5083.000000, reward total was -19.000000. running mean: -19.749465
episode 5084.000000, reward total was -19.000000. running mean: -19.741971
episode 5085.000000, rewa

episode 5182.000000, reward total was -21.000000. running mean: -19.536475
episode 5183.000000, reward total was -20.000000. running mean: -19.541110
episode 5184.000000, reward total was -17.000000. running mean: -19.515699
episode 5185.000000, reward total was -20.000000. running mean: -19.520542
episode 5186.000000, reward total was -20.000000. running mean: -19.525337
episode 5187.000000, reward total was -19.000000. running mean: -19.520083
episode 5188.000000, reward total was -16.000000. running mean: -19.484883
episode 5189.000000, reward total was -20.000000. running mean: -19.490034
episode 5190.000000, reward total was -19.000000. running mean: -19.485133
episode 5191.000000, reward total was -20.000000. running mean: -19.490282
episode 5192.000000, reward total was -21.000000. running mean: -19.505379
episode 5193.000000, reward total was -19.000000. running mean: -19.500326
episode 5194.000000, reward total was -21.000000. running mean: -19.515322
episode 5195.000000, rewa

episode 5292.000000, reward total was -19.000000. running mean: -19.426255
episode 5293.000000, reward total was -18.000000. running mean: -19.411993
episode 5294.000000, reward total was -20.000000. running mean: -19.417873
episode 5295.000000, reward total was -19.000000. running mean: -19.413694
episode 5296.000000, reward total was -19.000000. running mean: -19.409557
episode 5297.000000, reward total was -21.000000. running mean: -19.425462
episode 5298.000000, reward total was -21.000000. running mean: -19.441207
episode 5299.000000, reward total was -21.000000. running mean: -19.456795
episode 5300.000000, reward total was -21.000000. running mean: -19.472227
episode 5301.000000, reward total was -20.000000. running mean: -19.477505
episode 5302.000000, reward total was -21.000000. running mean: -19.492730
episode 5303.000000, reward total was -21.000000. running mean: -19.507802
episode 5304.000000, reward total was -18.000000. running mean: -19.492724
episode 5305.000000, rewa

episode 5402.000000, reward total was -21.000000. running mean: -19.503231
episode 5403.000000, reward total was -20.000000. running mean: -19.508199
episode 5404.000000, reward total was -20.000000. running mean: -19.513117
episode 5405.000000, reward total was -21.000000. running mean: -19.527986
episode 5406.000000, reward total was -19.000000. running mean: -19.522706
episode 5407.000000, reward total was -20.000000. running mean: -19.527479
episode 5408.000000, reward total was -18.000000. running mean: -19.512204
episode 5409.000000, reward total was -21.000000. running mean: -19.527082
episode 5410.000000, reward total was -19.000000. running mean: -19.521811
episode 5411.000000, reward total was -19.000000. running mean: -19.516593
episode 5412.000000, reward total was -19.000000. running mean: -19.511427
episode 5413.000000, reward total was -20.000000. running mean: -19.516313
episode 5414.000000, reward total was -20.000000. running mean: -19.521150
episode 5415.000000, rewa

episode 5512.000000, reward total was -20.000000. running mean: -19.557326
episode 5513.000000, reward total was -20.000000. running mean: -19.561753
episode 5514.000000, reward total was -20.000000. running mean: -19.566135
episode 5515.000000, reward total was -20.000000. running mean: -19.570474
episode 5516.000000, reward total was -20.000000. running mean: -19.574769
episode 5517.000000, reward total was -20.000000. running mean: -19.579021
episode 5518.000000, reward total was -21.000000. running mean: -19.593231
episode 5519.000000, reward total was -19.000000. running mean: -19.587299
episode 5520.000000, reward total was -19.000000. running mean: -19.581426
episode 5521.000000, reward total was -21.000000. running mean: -19.595612
episode 5522.000000, reward total was -17.000000. running mean: -19.569656
episode 5523.000000, reward total was -19.000000. running mean: -19.563959
episode 5524.000000, reward total was -19.000000. running mean: -19.558319
episode 5525.000000, rewa

episode 5622.000000, reward total was -21.000000. running mean: -19.648220
episode 5623.000000, reward total was -18.000000. running mean: -19.631738
episode 5624.000000, reward total was -20.000000. running mean: -19.635421
episode 5625.000000, reward total was -19.000000. running mean: -19.629067
episode 5626.000000, reward total was -18.000000. running mean: -19.612776
episode 5627.000000, reward total was -18.000000. running mean: -19.596648
episode 5628.000000, reward total was -19.000000. running mean: -19.590682
episode 5629.000000, reward total was -21.000000. running mean: -19.604775
episode 5630.000000, reward total was -19.000000. running mean: -19.598727
episode 5631.000000, reward total was -21.000000. running mean: -19.612740
episode 5632.000000, reward total was -21.000000. running mean: -19.626612
episode 5633.000000, reward total was -19.000000. running mean: -19.620346
episode 5634.000000, reward total was -20.000000. running mean: -19.624143
episode 5635.000000, rewa

episode 5732.000000, reward total was -20.000000. running mean: -19.492186
episode 5733.000000, reward total was -21.000000. running mean: -19.507265
episode 5734.000000, reward total was -19.000000. running mean: -19.502192
episode 5735.000000, reward total was -19.000000. running mean: -19.497170
episode 5736.000000, reward total was -19.000000. running mean: -19.492198
episode 5737.000000, reward total was -20.000000. running mean: -19.497276
episode 5738.000000, reward total was -20.000000. running mean: -19.502304
episode 5739.000000, reward total was -20.000000. running mean: -19.507281
episode 5740.000000, reward total was -19.000000. running mean: -19.502208
episode 5741.000000, reward total was -20.000000. running mean: -19.507186
episode 5742.000000, reward total was -18.000000. running mean: -19.492114
episode 5743.000000, reward total was -20.000000. running mean: -19.497193
episode 5744.000000, reward total was -21.000000. running mean: -19.512221
episode 5745.000000, rewa

episode 5842.000000, reward total was -19.000000. running mean: -19.455493
episode 5843.000000, reward total was -20.000000. running mean: -19.460938
episode 5844.000000, reward total was -19.000000. running mean: -19.456329
episode 5845.000000, reward total was -19.000000. running mean: -19.451766
episode 5846.000000, reward total was -19.000000. running mean: -19.447248
episode 5847.000000, reward total was -21.000000. running mean: -19.462776
episode 5848.000000, reward total was -19.000000. running mean: -19.458148
episode 5849.000000, reward total was -16.000000. running mean: -19.423566
episode 5850.000000, reward total was -20.000000. running mean: -19.429331
episode 5851.000000, reward total was -18.000000. running mean: -19.415037
episode 5852.000000, reward total was -20.000000. running mean: -19.420887
episode 5853.000000, reward total was -21.000000. running mean: -19.436678
episode 5854.000000, reward total was -19.000000. running mean: -19.432311
episode 5855.000000, rewa

episode 5952.000000, reward total was -21.000000. running mean: -19.483705
episode 5953.000000, reward total was -21.000000. running mean: -19.498868
episode 5954.000000, reward total was -21.000000. running mean: -19.513879
episode 5955.000000, reward total was -19.000000. running mean: -19.508740
episode 5956.000000, reward total was -20.000000. running mean: -19.513653
episode 5957.000000, reward total was -21.000000. running mean: -19.528516
episode 5958.000000, reward total was -19.000000. running mean: -19.523231
episode 5959.000000, reward total was -18.000000. running mean: -19.507999
episode 5960.000000, reward total was -19.000000. running mean: -19.502919
episode 5961.000000, reward total was -20.000000. running mean: -19.507890
episode 5962.000000, reward total was -21.000000. running mean: -19.522811
episode 5963.000000, reward total was -17.000000. running mean: -19.497583
episode 5964.000000, reward total was -15.000000. running mean: -19.452607
episode 5965.000000, rewa

episode 6062.000000, reward total was -20.000000. running mean: -19.438675
episode 6063.000000, reward total was -20.000000. running mean: -19.444289
episode 6064.000000, reward total was -19.000000. running mean: -19.439846
episode 6065.000000, reward total was -19.000000. running mean: -19.435447
episode 6066.000000, reward total was -20.000000. running mean: -19.441093
episode 6067.000000, reward total was -21.000000. running mean: -19.456682
episode 6068.000000, reward total was -19.000000. running mean: -19.452115
episode 6069.000000, reward total was -15.000000. running mean: -19.407594
episode 6070.000000, reward total was -19.000000. running mean: -19.403518
episode 6071.000000, reward total was -19.000000. running mean: -19.399483
episode 6072.000000, reward total was -19.000000. running mean: -19.395488
episode 6073.000000, reward total was -21.000000. running mean: -19.411533
episode 6074.000000, reward total was -17.000000. running mean: -19.387418
episode 6075.000000, rewa

episode 6172.000000, reward total was -20.000000. running mean: -19.325972
episode 6173.000000, reward total was -19.000000. running mean: -19.322712
episode 6174.000000, reward total was -21.000000. running mean: -19.339485
episode 6175.000000, reward total was -21.000000. running mean: -19.356090
episode 6176.000000, reward total was -18.000000. running mean: -19.342529
episode 6177.000000, reward total was -19.000000. running mean: -19.339104
episode 6178.000000, reward total was -19.000000. running mean: -19.335713
episode 6179.000000, reward total was -21.000000. running mean: -19.352356
episode 6180.000000, reward total was -18.000000. running mean: -19.338832
episode 6181.000000, reward total was -19.000000. running mean: -19.335444
episode 6182.000000, reward total was -21.000000. running mean: -19.352089
episode 6183.000000, reward total was -19.000000. running mean: -19.348568
episode 6184.000000, reward total was -20.000000. running mean: -19.355083
episode 6185.000000, rewa

episode 6282.000000, reward total was -19.000000. running mean: -19.262209
episode 6283.000000, reward total was -20.000000. running mean: -19.269587
episode 6284.000000, reward total was -19.000000. running mean: -19.266891
episode 6285.000000, reward total was -20.000000. running mean: -19.274223
episode 6286.000000, reward total was -17.000000. running mean: -19.251480
episode 6287.000000, reward total was -20.000000. running mean: -19.258965
episode 6288.000000, reward total was -19.000000. running mean: -19.256376
episode 6289.000000, reward total was -20.000000. running mean: -19.263812
episode 6290.000000, reward total was -19.000000. running mean: -19.261174
episode 6291.000000, reward total was -18.000000. running mean: -19.248562
episode 6292.000000, reward total was -19.000000. running mean: -19.246077
episode 6293.000000, reward total was -19.000000. running mean: -19.243616
episode 6294.000000, reward total was -15.000000. running mean: -19.201180
episode 6295.000000, rewa

episode 6392.000000, reward total was -21.000000. running mean: -19.268957
episode 6393.000000, reward total was -18.000000. running mean: -19.256268
episode 6394.000000, reward total was -19.000000. running mean: -19.253705
episode 6395.000000, reward total was -20.000000. running mean: -19.261168
episode 6396.000000, reward total was -21.000000. running mean: -19.278556
episode 6397.000000, reward total was -19.000000. running mean: -19.275771
episode 6398.000000, reward total was -17.000000. running mean: -19.253013
episode 6399.000000, reward total was -20.000000. running mean: -19.260483
episode 6400.000000, reward total was -18.000000. running mean: -19.247878
episode 6401.000000, reward total was -19.000000. running mean: -19.245399
episode 6402.000000, reward total was -20.000000. running mean: -19.252945
episode 6403.000000, reward total was -19.000000. running mean: -19.250416
episode 6404.000000, reward total was -19.000000. running mean: -19.247912
episode 6405.000000, rewa

episode 6502.000000, reward total was -19.000000. running mean: -19.239672
episode 6503.000000, reward total was -21.000000. running mean: -19.257276
episode 6504.000000, reward total was -20.000000. running mean: -19.264703
episode 6505.000000, reward total was -19.000000. running mean: -19.262056
episode 6506.000000, reward total was -20.000000. running mean: -19.269435
episode 6507.000000, reward total was -20.000000. running mean: -19.276741
episode 6508.000000, reward total was -19.000000. running mean: -19.273974
episode 6509.000000, reward total was -20.000000. running mean: -19.281234
episode 6510.000000, reward total was -18.000000. running mean: -19.268422
episode 6511.000000, reward total was -17.000000. running mean: -19.245737
episode 6512.000000, reward total was -19.000000. running mean: -19.243280
episode 6513.000000, reward total was -21.000000. running mean: -19.260847
episode 6514.000000, reward total was -21.000000. running mean: -19.278239
episode 6515.000000, rewa

episode 6612.000000, reward total was -21.000000. running mean: -19.299384
episode 6613.000000, reward total was -20.000000. running mean: -19.306390
episode 6614.000000, reward total was -19.000000. running mean: -19.303326
episode 6615.000000, reward total was -21.000000. running mean: -19.320293
episode 6616.000000, reward total was -21.000000. running mean: -19.337090
episode 6617.000000, reward total was -18.000000. running mean: -19.323719
episode 6618.000000, reward total was -19.000000. running mean: -19.320482
episode 6619.000000, reward total was -15.000000. running mean: -19.277277
episode 6620.000000, reward total was -18.000000. running mean: -19.264504
episode 6621.000000, reward total was -17.000000. running mean: -19.241859
episode 6622.000000, reward total was -21.000000. running mean: -19.259441
episode 6623.000000, reward total was -20.000000. running mean: -19.266846
episode 6624.000000, reward total was -20.000000. running mean: -19.274178
episode 6625.000000, rewa

episode 6722.000000, reward total was -15.000000. running mean: -19.151979
episode 6723.000000, reward total was -17.000000. running mean: -19.130459
episode 6724.000000, reward total was -20.000000. running mean: -19.139155
episode 6725.000000, reward total was -19.000000. running mean: -19.137763
episode 6726.000000, reward total was -20.000000. running mean: -19.146386
episode 6727.000000, reward total was -21.000000. running mean: -19.164922
episode 6728.000000, reward total was -20.000000. running mean: -19.173273
episode 6729.000000, reward total was -20.000000. running mean: -19.181540
episode 6730.000000, reward total was -20.000000. running mean: -19.189724
episode 6731.000000, reward total was -20.000000. running mean: -19.197827
episode 6732.000000, reward total was -18.000000. running mean: -19.185849
episode 6733.000000, reward total was -19.000000. running mean: -19.183990
episode 6734.000000, reward total was -18.000000. running mean: -19.172151
episode 6735.000000, rewa

episode 6832.000000, reward total was -19.000000. running mean: -19.145382
episode 6833.000000, reward total was -21.000000. running mean: -19.163929
episode 6834.000000, reward total was -17.000000. running mean: -19.142289
episode 6835.000000, reward total was -20.000000. running mean: -19.150866
episode 6836.000000, reward total was -19.000000. running mean: -19.149358
episode 6837.000000, reward total was -18.000000. running mean: -19.137864
episode 6838.000000, reward total was -21.000000. running mean: -19.156485
episode 6839.000000, reward total was -20.000000. running mean: -19.164921
episode 6840.000000, reward total was -21.000000. running mean: -19.183271
episode 6841.000000, reward total was -21.000000. running mean: -19.201439
episode 6842.000000, reward total was -20.000000. running mean: -19.209424
episode 6843.000000, reward total was -15.000000. running mean: -19.167330
episode 6844.000000, reward total was -18.000000. running mean: -19.155657
episode 6845.000000, rewa

episode 6942.000000, reward total was -21.000000. running mean: -19.139394
episode 6943.000000, reward total was -19.000000. running mean: -19.138000
episode 6944.000000, reward total was -17.000000. running mean: -19.116620
episode 6945.000000, reward total was -18.000000. running mean: -19.105454
episode 6946.000000, reward total was -20.000000. running mean: -19.114400
episode 6947.000000, reward total was -19.000000. running mean: -19.113256
episode 6948.000000, reward total was -21.000000. running mean: -19.132123
episode 6949.000000, reward total was -19.000000. running mean: -19.130802
episode 6950.000000, reward total was -19.000000. running mean: -19.129494
episode 6951.000000, reward total was -19.000000. running mean: -19.128199
episode 6952.000000, reward total was -19.000000. running mean: -19.126917
episode 6953.000000, reward total was -19.000000. running mean: -19.125648
episode 6954.000000, reward total was -18.000000. running mean: -19.114391
episode 6955.000000, rewa