In [1]:
import gym
import numpy as np

from gym.wrappers import AtariPreprocessing
gym.new_step_api=True
env = gym.make('Pong-v0')

H = 1600 # number of hidden layer neurons
D = 80 * 80 # input dimensionality: 80x80 grid
model = {}
model['W1'] = np.random.randn(H,D) / np.sqrt(D) # "Xavier" initialization
model['W2'] = np.random.randn(H) / np.sqrt(H)
# hyperparameters
batch_size = 10 # every how many episodes to do a param update?
learning_rate = 1e-4
gamma = 0.99 # discount factor for reward
decay_rate = 0.99 # decay factor for RMSProp leaky sum of grad^2
grad_buffer = { k : np.zeros_like(v) for k,v in model.items() } # update buffers that add up gradients over a batch
rmsprop_cache = { k : np.zeros_like(v) for k,v in model.items() } # rmsprop memory

def sigmoid(x): 
  return 1.0 / (1.0 + np.exp(-x)) # sigmoid "squashing" function to interval [0,1]

def prepro(I):
  I=np.asarray(I)
  I = I[35:195] # crop
  I = I[::2,::2,0] # downsample by factor of 2
  
  I[I == 144] = 0 # erase background (background type 1)
  
  I[I == 109] = 0 # erase background (background type 2)
  
  I[I != 0] = 1 # everything else (paddles, ball) just set to 1
  return I.astype(float).ravel()

def discount_rewards(r):
  discounted_r = np.zeros_like(r)
  running_add = 0
  for t in reversed(range(0, r.size)):
    if r[t] != 0: running_add = 0 # reset the sum, since this was a game boundary (pong specific!)
    running_add = running_add * gamma + r[t]
    discounted_r[t] = running_add
  return discounted_r

def policy_forward(x):
  h = np.dot(model['W1'], x)
  h[h<0] = 0 # ReLU nonlinearity
  logp = np.dot(model['W2'], h)
  p = sigmoid(logp)
  return p, h # return probability of taking action 2, and hidden state

def policy_backward(epx, eph, epdlogp):
  """ backward pass. (eph is array of intermediate hidden states) """
  dW2 = np.dot(eph.T, epdlogp).ravel()
  dh = np.outer(epdlogp, model['W2'])
  dh[eph <= 0] = 0 # backpro prelu
  dW1 = np.dot(dh.T, epx)
  return {'W1':dW1, 'W2':dW2}

def model_step(model, observation, prev_x):

  cur_x = prepro(observation)
  x = cur_x - prev_x if prev_x is not None else np.zeros(D)
  prev_x = cur_x
  
  # forward the policy network and sample an action from the returned probability
  aprob, _ = policy_forward(x)
  action = 2 if aprob >= 0.5 else 3 # roll the dice!
  
  return action, prev_x

def play_game(env, model):
  observation = env.reset()

  frames = []
  cumulated_reward = 0

  prev_x = None # used in computing the difference frame

  for t in range(1000):
      frames.append(env.render(mode = 'rgb_array'))
      action, prev_x = model_step(model, observation, prev_x)
      observation, reward, done, info = env.step(action)
      cumulated_reward += reward
      if done:
          print("Episode finished after {} timesteps, accumulated reward = {}".format(t+1, cumulated_reward))
          break
  print("Episode finished without success, accumulated reward = {}".format(cumulated_reward))
  env.close()
  display_frames_as_gif(frames)

def train_model(env, model, total_episodes = 100):
  hist = []
  observation = env.reset()

  prev_x = None # used in computing the difference frame
  xs,hs,dlogps,drs = [],[],[],[]
  running_reward = None
  reward_sum = 0
  episode_number = 0

  while True:
  
    cur_x = prepro(observation)
    x = cur_x - prev_x if prev_x is not None else np.zeros(D)
    prev_x = cur_x

    # forward the policy network and sample an action from the returned probability
    aprob, h = policy_forward(x)
    action = 2 if np.random.uniform() < aprob else 3 # roll the dice!

    # record various intermediates (needed later for backprop)
    xs.append(x) # observation
    hs.append(h) # hidden state
    y = 1 if action == 2 else 0 # a "fake label"
    dlogps.append(y - aprob) # grad that encourages the action that was taken to be taken (see http://cs231n.github.io/neural-networks-2/#losses if confused)

    # step the environment and get new measurements
    observation, reward, done, info = env.step(action)
    reward_sum += reward

    drs.append(reward) # record reward (has to be done after we call step() to get reward for previous action)

    if done: # an episode finished
      episode_number += 1

      # stack together all inputs, hidden states, action gradients, and rewards for this episode
      epx = np.vstack(xs)
      eph = np.vstack(hs)
      epdlogp = np.vstack(dlogps)
      epr = np.vstack(drs)
      xs,hs,dlogps,drs = [],[],[],[] # reset array memory

      # compute the discounted reward backwards through time
      discounted_epr = discount_rewards(epr)
      # standardize the rewards to be unit normal (helps control the gradient estimator variance)
      discounted_epr -= np.mean(discounted_epr)
      discounted_epr /= np.std(discounted_epr)

      epdlogp *= discounted_epr # modulate the gradient with advantage (PG magic happens right here.)
      grad = policy_backward(epx, eph, epdlogp)
      for k in model: grad_buffer[k] += grad[k] # accumulate grad over batch

      # perform rmsprop parameter update every batch_size episodes
      if episode_number % batch_size == 0:
        for k,v in model.items():
          g = grad_buffer[k] # gradient
          rmsprop_cache[k] = decay_rate * rmsprop_cache[k] + (1 - decay_rate) * g**2
          model[k] += learning_rate * g / (np.sqrt(rmsprop_cache[k]) + 1e-5)
          grad_buffer[k] = np.zeros_like(v) # reset batch gradient buffer

      # boring book-keeping
      running_reward = reward_sum if running_reward is None else running_reward * 0.99 + reward_sum * 0.01
      hist.append((episode_number, reward_sum, running_reward))
      print ('episode %f, reward total was %f. running mean: %f' % (episode_number, reward_sum, running_reward))
      reward_sum = 0
      observation = env.reset() # reset env
      prev_x = None
      if episode_number == total_episodes: 
        return hist

   
    

  logger.warn(
  deprecation(
  deprecation(


In [2]:
%time hist1 = train_model(env, model, total_episodes=7000)

  logger.deprecation(


episode 1.000000, reward total was -21.000000. running mean: -21.000000
episode 2.000000, reward total was -21.000000. running mean: -21.000000
episode 3.000000, reward total was -21.000000. running mean: -21.000000
episode 4.000000, reward total was -17.000000. running mean: -20.960000
episode 5.000000, reward total was -19.000000. running mean: -20.940400
episode 6.000000, reward total was -20.000000. running mean: -20.930996
episode 7.000000, reward total was -21.000000. running mean: -20.931686
episode 8.000000, reward total was -21.000000. running mean: -20.932369
episode 9.000000, reward total was -21.000000. running mean: -20.933045
episode 10.000000, reward total was -20.000000. running mean: -20.923715
episode 11.000000, reward total was -20.000000. running mean: -20.914478
episode 12.000000, reward total was -21.000000. running mean: -20.915333
episode 13.000000, reward total was -21.000000. running mean: -20.916180
episode 14.000000, reward total was -21.000000. running mean

episode 114.000000, reward total was -19.000000. running mean: -20.633100
episode 115.000000, reward total was -19.000000. running mean: -20.616769
episode 116.000000, reward total was -19.000000. running mean: -20.600602
episode 117.000000, reward total was -19.000000. running mean: -20.584596
episode 118.000000, reward total was -19.000000. running mean: -20.568750
episode 119.000000, reward total was -21.000000. running mean: -20.573062
episode 120.000000, reward total was -21.000000. running mean: -20.577332
episode 121.000000, reward total was -21.000000. running mean: -20.581558
episode 122.000000, reward total was -18.000000. running mean: -20.555743
episode 123.000000, reward total was -21.000000. running mean: -20.560185
episode 124.000000, reward total was -20.000000. running mean: -20.554584
episode 125.000000, reward total was -21.000000. running mean: -20.559038
episode 126.000000, reward total was -21.000000. running mean: -20.563447
episode 127.000000, reward total was -

episode 225.000000, reward total was -20.000000. running mean: -20.432854
episode 226.000000, reward total was -21.000000. running mean: -20.438526
episode 227.000000, reward total was -21.000000. running mean: -20.444141
episode 228.000000, reward total was -20.000000. running mean: -20.439699
episode 229.000000, reward total was -20.000000. running mean: -20.435302
episode 230.000000, reward total was -21.000000. running mean: -20.440949
episode 231.000000, reward total was -21.000000. running mean: -20.446540
episode 232.000000, reward total was -19.000000. running mean: -20.432074
episode 233.000000, reward total was -19.000000. running mean: -20.417753
episode 234.000000, reward total was -21.000000. running mean: -20.423576
episode 235.000000, reward total was -20.000000. running mean: -20.419340
episode 236.000000, reward total was -21.000000. running mean: -20.425147
episode 237.000000, reward total was -21.000000. running mean: -20.430895
episode 238.000000, reward total was -

episode 336.000000, reward total was -20.000000. running mean: -20.361309
episode 337.000000, reward total was -20.000000. running mean: -20.357696
episode 338.000000, reward total was -21.000000. running mean: -20.364119
episode 339.000000, reward total was -21.000000. running mean: -20.370478
episode 340.000000, reward total was -20.000000. running mean: -20.366773
episode 341.000000, reward total was -19.000000. running mean: -20.353105
episode 342.000000, reward total was -19.000000. running mean: -20.339574
episode 343.000000, reward total was -19.000000. running mean: -20.326179
episode 344.000000, reward total was -20.000000. running mean: -20.322917
episode 345.000000, reward total was -20.000000. running mean: -20.319688
episode 346.000000, reward total was -18.000000. running mean: -20.296491
episode 347.000000, reward total was -20.000000. running mean: -20.293526
episode 348.000000, reward total was -19.000000. running mean: -20.280591
episode 349.000000, reward total was -

episode 447.000000, reward total was -21.000000. running mean: -20.268404
episode 448.000000, reward total was -21.000000. running mean: -20.275720
episode 449.000000, reward total was -21.000000. running mean: -20.282963
episode 450.000000, reward total was -21.000000. running mean: -20.290133
episode 451.000000, reward total was -21.000000. running mean: -20.297232
episode 452.000000, reward total was -21.000000. running mean: -20.304259
episode 453.000000, reward total was -21.000000. running mean: -20.311217
episode 454.000000, reward total was -21.000000. running mean: -20.318105
episode 455.000000, reward total was -21.000000. running mean: -20.324923
episode 456.000000, reward total was -18.000000. running mean: -20.301674
episode 457.000000, reward total was -20.000000. running mean: -20.298658
episode 458.000000, reward total was -21.000000. running mean: -20.305671
episode 459.000000, reward total was -21.000000. running mean: -20.312614
episode 460.000000, reward total was -

episode 558.000000, reward total was -20.000000. running mean: -20.300858
episode 559.000000, reward total was -21.000000. running mean: -20.307849
episode 560.000000, reward total was -21.000000. running mean: -20.314771
episode 561.000000, reward total was -21.000000. running mean: -20.321623
episode 562.000000, reward total was -21.000000. running mean: -20.328407
episode 563.000000, reward total was -21.000000. running mean: -20.335123
episode 564.000000, reward total was -21.000000. running mean: -20.341772
episode 565.000000, reward total was -21.000000. running mean: -20.348354
episode 566.000000, reward total was -21.000000. running mean: -20.354870
episode 567.000000, reward total was -21.000000. running mean: -20.361322
episode 568.000000, reward total was -21.000000. running mean: -20.367709
episode 569.000000, reward total was -20.000000. running mean: -20.364031
episode 570.000000, reward total was -21.000000. running mean: -20.370391
episode 571.000000, reward total was -

episode 669.000000, reward total was -20.000000. running mean: -20.190346
episode 670.000000, reward total was -18.000000. running mean: -20.168442
episode 671.000000, reward total was -20.000000. running mean: -20.166758
episode 672.000000, reward total was -18.000000. running mean: -20.145090
episode 673.000000, reward total was -19.000000. running mean: -20.133639
episode 674.000000, reward total was -20.000000. running mean: -20.132303
episode 675.000000, reward total was -21.000000. running mean: -20.140980
episode 676.000000, reward total was -21.000000. running mean: -20.149570
episode 677.000000, reward total was -21.000000. running mean: -20.158074
episode 678.000000, reward total was -21.000000. running mean: -20.166493
episode 679.000000, reward total was -19.000000. running mean: -20.154829
episode 680.000000, reward total was -18.000000. running mean: -20.133280
episode 681.000000, reward total was -21.000000. running mean: -20.141947
episode 682.000000, reward total was -

episode 780.000000, reward total was -20.000000. running mean: -20.085535
episode 781.000000, reward total was -18.000000. running mean: -20.064680
episode 782.000000, reward total was -20.000000. running mean: -20.064033
episode 783.000000, reward total was -20.000000. running mean: -20.063393
episode 784.000000, reward total was -20.000000. running mean: -20.062759
episode 785.000000, reward total was -21.000000. running mean: -20.072131
episode 786.000000, reward total was -19.000000. running mean: -20.061410
episode 787.000000, reward total was -20.000000. running mean: -20.060796
episode 788.000000, reward total was -21.000000. running mean: -20.070188
episode 789.000000, reward total was -20.000000. running mean: -20.069486
episode 790.000000, reward total was -19.000000. running mean: -20.058791
episode 791.000000, reward total was -20.000000. running mean: -20.058203
episode 792.000000, reward total was -20.000000. running mean: -20.057621
episode 793.000000, reward total was -

episode 891.000000, reward total was -21.000000. running mean: -20.004661
episode 892.000000, reward total was -19.000000. running mean: -19.994615
episode 893.000000, reward total was -21.000000. running mean: -20.004668
episode 894.000000, reward total was -21.000000. running mean: -20.014622
episode 895.000000, reward total was -21.000000. running mean: -20.024476
episode 896.000000, reward total was -21.000000. running mean: -20.034231
episode 897.000000, reward total was -21.000000. running mean: -20.043888
episode 898.000000, reward total was -19.000000. running mean: -20.033450
episode 899.000000, reward total was -17.000000. running mean: -20.003115
episode 900.000000, reward total was -21.000000. running mean: -20.013084
episode 901.000000, reward total was -20.000000. running mean: -20.012953
episode 902.000000, reward total was -20.000000. running mean: -20.012824
episode 903.000000, reward total was -21.000000. running mean: -20.022695
episode 904.000000, reward total was -

episode 1002.000000, reward total was -19.000000. running mean: -19.948567
episode 1003.000000, reward total was -20.000000. running mean: -19.949081
episode 1004.000000, reward total was -20.000000. running mean: -19.949591
episode 1005.000000, reward total was -20.000000. running mean: -19.950095
episode 1006.000000, reward total was -21.000000. running mean: -19.960594
episode 1007.000000, reward total was -21.000000. running mean: -19.970988
episode 1008.000000, reward total was -21.000000. running mean: -19.981278
episode 1009.000000, reward total was -20.000000. running mean: -19.981465
episode 1010.000000, reward total was -21.000000. running mean: -19.991651
episode 1011.000000, reward total was -21.000000. running mean: -20.001734
episode 1012.000000, reward total was -21.000000. running mean: -20.011717
episode 1013.000000, reward total was -21.000000. running mean: -20.021600
episode 1014.000000, reward total was -21.000000. running mean: -20.031384
episode 1015.000000, rewa

episode 1112.000000, reward total was -19.000000. running mean: -19.906478
episode 1113.000000, reward total was -18.000000. running mean: -19.887413
episode 1114.000000, reward total was -19.000000. running mean: -19.878539
episode 1115.000000, reward total was -21.000000. running mean: -19.889753
episode 1116.000000, reward total was -21.000000. running mean: -19.900856
episode 1117.000000, reward total was -19.000000. running mean: -19.891847
episode 1118.000000, reward total was -20.000000. running mean: -19.892929
episode 1119.000000, reward total was -17.000000. running mean: -19.864000
episode 1120.000000, reward total was -17.000000. running mean: -19.835360
episode 1121.000000, reward total was -19.000000. running mean: -19.827006
episode 1122.000000, reward total was -20.000000. running mean: -19.828736
episode 1123.000000, reward total was -21.000000. running mean: -19.840449
episode 1124.000000, reward total was -20.000000. running mean: -19.842044
episode 1125.000000, rewa

episode 1222.000000, reward total was -19.000000. running mean: -19.826140
episode 1223.000000, reward total was -20.000000. running mean: -19.827878
episode 1224.000000, reward total was -20.000000. running mean: -19.829599
episode 1225.000000, reward total was -19.000000. running mean: -19.821303
episode 1226.000000, reward total was -21.000000. running mean: -19.833090
episode 1227.000000, reward total was -20.000000. running mean: -19.834759
episode 1228.000000, reward total was -19.000000. running mean: -19.826412
episode 1229.000000, reward total was -20.000000. running mean: -19.828148
episode 1230.000000, reward total was -21.000000. running mean: -19.839866
episode 1231.000000, reward total was -21.000000. running mean: -19.851468
episode 1232.000000, reward total was -20.000000. running mean: -19.852953
episode 1233.000000, reward total was -18.000000. running mean: -19.834423
episode 1234.000000, reward total was -20.000000. running mean: -19.836079
episode 1235.000000, rewa

episode 1332.000000, reward total was -21.000000. running mean: -19.845965
episode 1333.000000, reward total was -20.000000. running mean: -19.847506
episode 1334.000000, reward total was -20.000000. running mean: -19.849031
episode 1335.000000, reward total was -20.000000. running mean: -19.850540
episode 1336.000000, reward total was -18.000000. running mean: -19.832035
episode 1337.000000, reward total was -21.000000. running mean: -19.843714
episode 1338.000000, reward total was -17.000000. running mean: -19.815277
episode 1339.000000, reward total was -19.000000. running mean: -19.807125
episode 1340.000000, reward total was -19.000000. running mean: -19.799053
episode 1341.000000, reward total was -20.000000. running mean: -19.801063
episode 1342.000000, reward total was -20.000000. running mean: -19.803052
episode 1343.000000, reward total was -21.000000. running mean: -19.815022
episode 1344.000000, reward total was -20.000000. running mean: -19.816871
episode 1345.000000, rewa

episode 1442.000000, reward total was -19.000000. running mean: -19.698268
episode 1443.000000, reward total was -20.000000. running mean: -19.701285
episode 1444.000000, reward total was -20.000000. running mean: -19.704272
episode 1445.000000, reward total was -19.000000. running mean: -19.697229
episode 1446.000000, reward total was -20.000000. running mean: -19.700257
episode 1447.000000, reward total was -21.000000. running mean: -19.713255
episode 1448.000000, reward total was -20.000000. running mean: -19.716122
episode 1449.000000, reward total was -21.000000. running mean: -19.728961
episode 1450.000000, reward total was -18.000000. running mean: -19.711671
episode 1451.000000, reward total was -19.000000. running mean: -19.704554
episode 1452.000000, reward total was -19.000000. running mean: -19.697509
episode 1453.000000, reward total was -20.000000. running mean: -19.700534
episode 1454.000000, reward total was -20.000000. running mean: -19.703529
episode 1455.000000, rewa

episode 1552.000000, reward total was -20.000000. running mean: -19.743551
episode 1553.000000, reward total was -20.000000. running mean: -19.746116
episode 1554.000000, reward total was -20.000000. running mean: -19.748654
episode 1555.000000, reward total was -20.000000. running mean: -19.751168
episode 1556.000000, reward total was -20.000000. running mean: -19.753656
episode 1557.000000, reward total was -20.000000. running mean: -19.756120
episode 1558.000000, reward total was -21.000000. running mean: -19.768558
episode 1559.000000, reward total was -21.000000. running mean: -19.780873
episode 1560.000000, reward total was -21.000000. running mean: -19.793064
episode 1561.000000, reward total was -18.000000. running mean: -19.775134
episode 1562.000000, reward total was -20.000000. running mean: -19.777382
episode 1563.000000, reward total was -19.000000. running mean: -19.769608
episode 1564.000000, reward total was -20.000000. running mean: -19.771912
episode 1565.000000, rewa

episode 1662.000000, reward total was -16.000000. running mean: -19.574408
episode 1663.000000, reward total was -18.000000. running mean: -19.558664
episode 1664.000000, reward total was -18.000000. running mean: -19.543078
episode 1665.000000, reward total was -20.000000. running mean: -19.547647
episode 1666.000000, reward total was -21.000000. running mean: -19.562170
episode 1667.000000, reward total was -16.000000. running mean: -19.526549
episode 1668.000000, reward total was -20.000000. running mean: -19.531283
episode 1669.000000, reward total was -21.000000. running mean: -19.545970
episode 1670.000000, reward total was -19.000000. running mean: -19.540511
episode 1671.000000, reward total was -18.000000. running mean: -19.525106
episode 1672.000000, reward total was -21.000000. running mean: -19.539855
episode 1673.000000, reward total was -19.000000. running mean: -19.534456
episode 1674.000000, reward total was -19.000000. running mean: -19.529111
episode 1675.000000, rewa

episode 1772.000000, reward total was -20.000000. running mean: -19.650256
episode 1773.000000, reward total was -20.000000. running mean: -19.653754
episode 1774.000000, reward total was -20.000000. running mean: -19.657216
episode 1775.000000, reward total was -19.000000. running mean: -19.650644
episode 1776.000000, reward total was -19.000000. running mean: -19.644138
episode 1777.000000, reward total was -21.000000. running mean: -19.657696
episode 1778.000000, reward total was -21.000000. running mean: -19.671119
episode 1779.000000, reward total was -19.000000. running mean: -19.664408
episode 1780.000000, reward total was -18.000000. running mean: -19.647764
episode 1781.000000, reward total was -19.000000. running mean: -19.641286
episode 1782.000000, reward total was -18.000000. running mean: -19.624874
episode 1783.000000, reward total was -21.000000. running mean: -19.638625
episode 1784.000000, reward total was -21.000000. running mean: -19.652239
episode 1785.000000, rewa

episode 1882.000000, reward total was -20.000000. running mean: -19.693539
episode 1883.000000, reward total was -21.000000. running mean: -19.706603
episode 1884.000000, reward total was -20.000000. running mean: -19.709537
episode 1885.000000, reward total was -21.000000. running mean: -19.722442
episode 1886.000000, reward total was -18.000000. running mean: -19.705218
episode 1887.000000, reward total was -20.000000. running mean: -19.708165
episode 1888.000000, reward total was -20.000000. running mean: -19.711084
episode 1889.000000, reward total was -21.000000. running mean: -19.723973
episode 1890.000000, reward total was -19.000000. running mean: -19.716733
episode 1891.000000, reward total was -20.000000. running mean: -19.719566
episode 1892.000000, reward total was -19.000000. running mean: -19.712370
episode 1893.000000, reward total was -21.000000. running mean: -19.725246
episode 1894.000000, reward total was -20.000000. running mean: -19.727994
episode 1895.000000, rewa

episode 1992.000000, reward total was -18.000000. running mean: -19.570576
episode 1993.000000, reward total was -16.000000. running mean: -19.534870
episode 1994.000000, reward total was -18.000000. running mean: -19.519522
episode 1995.000000, reward total was -18.000000. running mean: -19.504327
episode 1996.000000, reward total was -15.000000. running mean: -19.459283
episode 1997.000000, reward total was -18.000000. running mean: -19.444690
episode 1998.000000, reward total was -20.000000. running mean: -19.450244
episode 1999.000000, reward total was -17.000000. running mean: -19.425741
episode 2000.000000, reward total was -20.000000. running mean: -19.431484
episode 2001.000000, reward total was -21.000000. running mean: -19.447169
episode 2002.000000, reward total was -21.000000. running mean: -19.462697
episode 2003.000000, reward total was -21.000000. running mean: -19.478070
episode 2004.000000, reward total was -19.000000. running mean: -19.473289
episode 2005.000000, rewa

episode 2102.000000, reward total was -20.000000. running mean: -19.301529
episode 2103.000000, reward total was -20.000000. running mean: -19.308514
episode 2104.000000, reward total was -21.000000. running mean: -19.325428
episode 2105.000000, reward total was -20.000000. running mean: -19.332174
episode 2106.000000, reward total was -20.000000. running mean: -19.338852
episode 2107.000000, reward total was -19.000000. running mean: -19.335464
episode 2108.000000, reward total was -20.000000. running mean: -19.342109
episode 2109.000000, reward total was -21.000000. running mean: -19.358688
episode 2110.000000, reward total was -16.000000. running mean: -19.325101
episode 2111.000000, reward total was -21.000000. running mean: -19.341850
episode 2112.000000, reward total was -17.000000. running mean: -19.318432
episode 2113.000000, reward total was -20.000000. running mean: -19.325247
episode 2114.000000, reward total was -18.000000. running mean: -19.311995
episode 2115.000000, rewa

episode 2212.000000, reward total was -20.000000. running mean: -19.500172
episode 2213.000000, reward total was -21.000000. running mean: -19.515170
episode 2214.000000, reward total was -19.000000. running mean: -19.510018
episode 2215.000000, reward total was -19.000000. running mean: -19.504918
episode 2216.000000, reward total was -19.000000. running mean: -19.499869
episode 2217.000000, reward total was -19.000000. running mean: -19.494870
episode 2218.000000, reward total was -19.000000. running mean: -19.489922
episode 2219.000000, reward total was -19.000000. running mean: -19.485022
episode 2220.000000, reward total was -19.000000. running mean: -19.480172
episode 2221.000000, reward total was -21.000000. running mean: -19.495371
episode 2222.000000, reward total was -20.000000. running mean: -19.500417
episode 2223.000000, reward total was -19.000000. running mean: -19.495413
episode 2224.000000, reward total was -20.000000. running mean: -19.500459
episode 2225.000000, rewa

episode 2322.000000, reward total was -18.000000. running mean: -19.357500
episode 2323.000000, reward total was -20.000000. running mean: -19.363925
episode 2324.000000, reward total was -20.000000. running mean: -19.370285
episode 2325.000000, reward total was -21.000000. running mean: -19.386583
episode 2326.000000, reward total was -20.000000. running mean: -19.392717
episode 2327.000000, reward total was -17.000000. running mean: -19.368790
episode 2328.000000, reward total was -19.000000. running mean: -19.365102
episode 2329.000000, reward total was -16.000000. running mean: -19.331451
episode 2330.000000, reward total was -20.000000. running mean: -19.338136
episode 2331.000000, reward total was -20.000000. running mean: -19.344755
episode 2332.000000, reward total was -20.000000. running mean: -19.351307
episode 2333.000000, reward total was -21.000000. running mean: -19.367794
episode 2334.000000, reward total was -19.000000. running mean: -19.364116
episode 2335.000000, rewa

episode 2432.000000, reward total was -15.000000. running mean: -19.280538
episode 2433.000000, reward total was -19.000000. running mean: -19.277733
episode 2434.000000, reward total was -20.000000. running mean: -19.284956
episode 2435.000000, reward total was -21.000000. running mean: -19.302106
episode 2436.000000, reward total was -21.000000. running mean: -19.319085
episode 2437.000000, reward total was -21.000000. running mean: -19.335894
episode 2438.000000, reward total was -18.000000. running mean: -19.322535
episode 2439.000000, reward total was -19.000000. running mean: -19.319310
episode 2440.000000, reward total was -20.000000. running mean: -19.326117
episode 2441.000000, reward total was -19.000000. running mean: -19.322856
episode 2442.000000, reward total was -19.000000. running mean: -19.319627
episode 2443.000000, reward total was -16.000000. running mean: -19.286431
episode 2444.000000, reward total was -20.000000. running mean: -19.293567
episode 2445.000000, rewa

episode 2542.000000, reward total was -19.000000. running mean: -19.155835
episode 2543.000000, reward total was -18.000000. running mean: -19.144276
episode 2544.000000, reward total was -19.000000. running mean: -19.142834
episode 2545.000000, reward total was -17.000000. running mean: -19.121405
episode 2546.000000, reward total was -20.000000. running mean: -19.130191
episode 2547.000000, reward total was -20.000000. running mean: -19.138889
episode 2548.000000, reward total was -18.000000. running mean: -19.127500
episode 2549.000000, reward total was -19.000000. running mean: -19.126225
episode 2550.000000, reward total was -20.000000. running mean: -19.134963
episode 2551.000000, reward total was -20.000000. running mean: -19.143613
episode 2552.000000, reward total was -19.000000. running mean: -19.142177
episode 2553.000000, reward total was -17.000000. running mean: -19.120756
episode 2554.000000, reward total was -21.000000. running mean: -19.139548
episode 2555.000000, rewa

episode 2652.000000, reward total was -19.000000. running mean: -19.224623
episode 2653.000000, reward total was -15.000000. running mean: -19.182377
episode 2654.000000, reward total was -17.000000. running mean: -19.160553
episode 2655.000000, reward total was -20.000000. running mean: -19.168947
episode 2656.000000, reward total was -17.000000. running mean: -19.147258
episode 2657.000000, reward total was -21.000000. running mean: -19.165785
episode 2658.000000, reward total was -21.000000. running mean: -19.184127
episode 2659.000000, reward total was -20.000000. running mean: -19.192286
episode 2660.000000, reward total was -16.000000. running mean: -19.160363
episode 2661.000000, reward total was -18.000000. running mean: -19.148760
episode 2662.000000, reward total was -20.000000. running mean: -19.157272
episode 2663.000000, reward total was -18.000000. running mean: -19.145699
episode 2664.000000, reward total was -15.000000. running mean: -19.104242
episode 2665.000000, rewa

episode 2762.000000, reward total was -19.000000. running mean: -18.948042
episode 2763.000000, reward total was -19.000000. running mean: -18.948561
episode 2764.000000, reward total was -19.000000. running mean: -18.949076
episode 2765.000000, reward total was -19.000000. running mean: -18.949585
episode 2766.000000, reward total was -20.000000. running mean: -18.960089
episode 2767.000000, reward total was -19.000000. running mean: -18.960488
episode 2768.000000, reward total was -19.000000. running mean: -18.960883
episode 2769.000000, reward total was -17.000000. running mean: -18.941275
episode 2770.000000, reward total was -20.000000. running mean: -18.951862
episode 2771.000000, reward total was -19.000000. running mean: -18.952343
episode 2772.000000, reward total was -17.000000. running mean: -18.932820
episode 2773.000000, reward total was -20.000000. running mean: -18.943492
episode 2774.000000, reward total was -19.000000. running mean: -18.944057
episode 2775.000000, rewa

episode 2872.000000, reward total was -17.000000. running mean: -18.862378
episode 2873.000000, reward total was -18.000000. running mean: -18.853754
episode 2874.000000, reward total was -20.000000. running mean: -18.865217
episode 2875.000000, reward total was -16.000000. running mean: -18.836564
episode 2876.000000, reward total was -15.000000. running mean: -18.798199
episode 2877.000000, reward total was -17.000000. running mean: -18.780217
episode 2878.000000, reward total was -21.000000. running mean: -18.802415
episode 2879.000000, reward total was -19.000000. running mean: -18.804391
episode 2880.000000, reward total was -18.000000. running mean: -18.796347
episode 2881.000000, reward total was -18.000000. running mean: -18.788383
episode 2882.000000, reward total was -18.000000. running mean: -18.780499
episode 2883.000000, reward total was -18.000000. running mean: -18.772694
episode 2884.000000, reward total was -20.000000. running mean: -18.784967
episode 2885.000000, rewa

episode 2982.000000, reward total was -20.000000. running mean: -18.858044
episode 2983.000000, reward total was -17.000000. running mean: -18.839463
episode 2984.000000, reward total was -21.000000. running mean: -18.861069
episode 2985.000000, reward total was -19.000000. running mean: -18.862458
episode 2986.000000, reward total was -21.000000. running mean: -18.883833
episode 2987.000000, reward total was -16.000000. running mean: -18.854995
episode 2988.000000, reward total was -19.000000. running mean: -18.856445
episode 2989.000000, reward total was -19.000000. running mean: -18.857881
episode 2990.000000, reward total was -21.000000. running mean: -18.879302
episode 2991.000000, reward total was -19.000000. running mean: -18.880509
episode 2992.000000, reward total was -17.000000. running mean: -18.861704
episode 2993.000000, reward total was -17.000000. running mean: -18.843087
episode 2994.000000, reward total was -18.000000. running mean: -18.834656
episode 2995.000000, rewa

episode 3092.000000, reward total was -18.000000. running mean: -18.813472
episode 3093.000000, reward total was -17.000000. running mean: -18.795337
episode 3094.000000, reward total was -21.000000. running mean: -18.817384
episode 3095.000000, reward total was -19.000000. running mean: -18.819210
episode 3096.000000, reward total was -21.000000. running mean: -18.841018
episode 3097.000000, reward total was -19.000000. running mean: -18.842608
episode 3098.000000, reward total was -16.000000. running mean: -18.814182
episode 3099.000000, reward total was -18.000000. running mean: -18.806040
episode 3100.000000, reward total was -19.000000. running mean: -18.807979
episode 3101.000000, reward total was -20.000000. running mean: -18.819900
episode 3102.000000, reward total was -19.000000. running mean: -18.821701
episode 3103.000000, reward total was -17.000000. running mean: -18.803484
episode 3104.000000, reward total was -19.000000. running mean: -18.805449
episode 3105.000000, rewa

episode 3202.000000, reward total was -15.000000. running mean: -18.470877
episode 3203.000000, reward total was -21.000000. running mean: -18.496168
episode 3204.000000, reward total was -20.000000. running mean: -18.511206
episode 3205.000000, reward total was -20.000000. running mean: -18.526094
episode 3206.000000, reward total was -19.000000. running mean: -18.530833
episode 3207.000000, reward total was -15.000000. running mean: -18.495525
episode 3208.000000, reward total was -17.000000. running mean: -18.480570
episode 3209.000000, reward total was -17.000000. running mean: -18.465764
episode 3210.000000, reward total was -18.000000. running mean: -18.461106
episode 3211.000000, reward total was -21.000000. running mean: -18.486495
episode 3212.000000, reward total was -18.000000. running mean: -18.481630
episode 3213.000000, reward total was -20.000000. running mean: -18.496814
episode 3214.000000, reward total was -15.000000. running mean: -18.461846
episode 3215.000000, rewa

episode 3312.000000, reward total was -17.000000. running mean: -18.474664
episode 3313.000000, reward total was -20.000000. running mean: -18.489917
episode 3314.000000, reward total was -19.000000. running mean: -18.495018
episode 3315.000000, reward total was -20.000000. running mean: -18.510068
episode 3316.000000, reward total was -19.000000. running mean: -18.514967
episode 3317.000000, reward total was -18.000000. running mean: -18.509817
episode 3318.000000, reward total was -17.000000. running mean: -18.494719
episode 3319.000000, reward total was -17.000000. running mean: -18.479772
episode 3320.000000, reward total was -19.000000. running mean: -18.484974
episode 3321.000000, reward total was -18.000000. running mean: -18.480124
episode 3322.000000, reward total was -20.000000. running mean: -18.495323
episode 3323.000000, reward total was -21.000000. running mean: -18.520370
episode 3324.000000, reward total was -17.000000. running mean: -18.505166
episode 3325.000000, rewa

episode 3422.000000, reward total was -14.000000. running mean: -18.457919
episode 3423.000000, reward total was -19.000000. running mean: -18.463340
episode 3424.000000, reward total was -19.000000. running mean: -18.468707
episode 3425.000000, reward total was -18.000000. running mean: -18.464020
episode 3426.000000, reward total was -17.000000. running mean: -18.449379
episode 3427.000000, reward total was -19.000000. running mean: -18.454886
episode 3428.000000, reward total was -16.000000. running mean: -18.430337
episode 3429.000000, reward total was -17.000000. running mean: -18.416033
episode 3430.000000, reward total was -17.000000. running mean: -18.401873
episode 3431.000000, reward total was -15.000000. running mean: -18.367854
episode 3432.000000, reward total was -15.000000. running mean: -18.334176
episode 3433.000000, reward total was -20.000000. running mean: -18.350834
episode 3434.000000, reward total was -15.000000. running mean: -18.317326
episode 3435.000000, rewa

episode 3532.000000, reward total was -19.000000. running mean: -18.257767
episode 3533.000000, reward total was -21.000000. running mean: -18.285190
episode 3534.000000, reward total was -18.000000. running mean: -18.282338
episode 3535.000000, reward total was -19.000000. running mean: -18.289514
episode 3536.000000, reward total was -18.000000. running mean: -18.286619
episode 3537.000000, reward total was -18.000000. running mean: -18.283753
episode 3538.000000, reward total was -18.000000. running mean: -18.280916
episode 3539.000000, reward total was -17.000000. running mean: -18.268106
episode 3540.000000, reward total was -17.000000. running mean: -18.255425
episode 3541.000000, reward total was -19.000000. running mean: -18.262871
episode 3542.000000, reward total was -20.000000. running mean: -18.280242
episode 3543.000000, reward total was -19.000000. running mean: -18.287440
episode 3544.000000, reward total was -18.000000. running mean: -18.284566
episode 3545.000000, rewa

episode 3642.000000, reward total was -14.000000. running mean: -18.188491
episode 3643.000000, reward total was -16.000000. running mean: -18.166606
episode 3644.000000, reward total was -16.000000. running mean: -18.144940
episode 3645.000000, reward total was -17.000000. running mean: -18.133490
episode 3646.000000, reward total was -17.000000. running mean: -18.122155
episode 3647.000000, reward total was -19.000000. running mean: -18.130934
episode 3648.000000, reward total was -20.000000. running mean: -18.149624
episode 3649.000000, reward total was -19.000000. running mean: -18.158128
episode 3650.000000, reward total was -13.000000. running mean: -18.106547
episode 3651.000000, reward total was -20.000000. running mean: -18.125481
episode 3652.000000, reward total was -17.000000. running mean: -18.114227
episode 3653.000000, reward total was -17.000000. running mean: -18.103084
episode 3654.000000, reward total was -18.000000. running mean: -18.102053
episode 3655.000000, rewa

episode 3752.000000, reward total was -15.000000. running mean: -17.990829
episode 3753.000000, reward total was -18.000000. running mean: -17.990921
episode 3754.000000, reward total was -17.000000. running mean: -17.981012
episode 3755.000000, reward total was -21.000000. running mean: -18.011202
episode 3756.000000, reward total was -17.000000. running mean: -18.001090
episode 3757.000000, reward total was -17.000000. running mean: -17.991079
episode 3758.000000, reward total was -17.000000. running mean: -17.981168
episode 3759.000000, reward total was -19.000000. running mean: -17.991356
episode 3760.000000, reward total was -13.000000. running mean: -17.941443
episode 3761.000000, reward total was -17.000000. running mean: -17.932028
episode 3762.000000, reward total was -17.000000. running mean: -17.922708
episode 3763.000000, reward total was -16.000000. running mean: -17.903481
episode 3764.000000, reward total was -14.000000. running mean: -17.864446
episode 3765.000000, rewa

episode 3862.000000, reward total was -17.000000. running mean: -18.018984
episode 3863.000000, reward total was -16.000000. running mean: -17.998794
episode 3864.000000, reward total was -21.000000. running mean: -18.028806
episode 3865.000000, reward total was -20.000000. running mean: -18.048518
episode 3866.000000, reward total was -20.000000. running mean: -18.068033
episode 3867.000000, reward total was -17.000000. running mean: -18.057352
episode 3868.000000, reward total was -16.000000. running mean: -18.036779
episode 3869.000000, reward total was -16.000000. running mean: -18.016411
episode 3870.000000, reward total was -20.000000. running mean: -18.036247
episode 3871.000000, reward total was -17.000000. running mean: -18.025884
episode 3872.000000, reward total was -15.000000. running mean: -17.995626
episode 3873.000000, reward total was -13.000000. running mean: -17.945669
episode 3874.000000, reward total was -18.000000. running mean: -17.946213
episode 3875.000000, rewa

episode 3972.000000, reward total was -21.000000. running mean: -17.941746
episode 3973.000000, reward total was -13.000000. running mean: -17.892328
episode 3974.000000, reward total was -19.000000. running mean: -17.903405
episode 3975.000000, reward total was -19.000000. running mean: -17.914371
episode 3976.000000, reward total was -17.000000. running mean: -17.905227
episode 3977.000000, reward total was -20.000000. running mean: -17.926175
episode 3978.000000, reward total was -14.000000. running mean: -17.886913
episode 3979.000000, reward total was -19.000000. running mean: -17.898044
episode 3980.000000, reward total was -18.000000. running mean: -17.899064
episode 3981.000000, reward total was -15.000000. running mean: -17.870073
episode 3982.000000, reward total was -20.000000. running mean: -17.891372
episode 3983.000000, reward total was -18.000000. running mean: -17.892458
episode 3984.000000, reward total was -17.000000. running mean: -17.883534
episode 3985.000000, rewa

episode 4082.000000, reward total was -16.000000. running mean: -17.629919
episode 4083.000000, reward total was -19.000000. running mean: -17.643620
episode 4084.000000, reward total was -18.000000. running mean: -17.647184
episode 4085.000000, reward total was -18.000000. running mean: -17.650712
episode 4086.000000, reward total was -15.000000. running mean: -17.624205
episode 4087.000000, reward total was -20.000000. running mean: -17.647963
episode 4088.000000, reward total was -20.000000. running mean: -17.671483
episode 4089.000000, reward total was -19.000000. running mean: -17.684768
episode 4090.000000, reward total was -17.000000. running mean: -17.677921
episode 4091.000000, reward total was -15.000000. running mean: -17.651141
episode 4092.000000, reward total was -21.000000. running mean: -17.684630
episode 4093.000000, reward total was -17.000000. running mean: -17.677784
episode 4094.000000, reward total was -17.000000. running mean: -17.671006
episode 4095.000000, rewa

episode 4192.000000, reward total was -18.000000. running mean: -17.575340
episode 4193.000000, reward total was -19.000000. running mean: -17.589586
episode 4194.000000, reward total was -17.000000. running mean: -17.583690
episode 4195.000000, reward total was -15.000000. running mean: -17.557853
episode 4196.000000, reward total was -17.000000. running mean: -17.552275
episode 4197.000000, reward total was -16.000000. running mean: -17.536752
episode 4198.000000, reward total was -17.000000. running mean: -17.531385
episode 4199.000000, reward total was -17.000000. running mean: -17.526071
episode 4200.000000, reward total was -12.000000. running mean: -17.470810
episode 4201.000000, reward total was -17.000000. running mean: -17.466102
episode 4202.000000, reward total was -13.000000. running mean: -17.421441
episode 4203.000000, reward total was -17.000000. running mean: -17.417226
episode 4204.000000, reward total was -16.000000. running mean: -17.403054
episode 4205.000000, rewa

episode 4302.000000, reward total was -17.000000. running mean: -17.441202
episode 4303.000000, reward total was -18.000000. running mean: -17.446790
episode 4304.000000, reward total was -18.000000. running mean: -17.452322
episode 4305.000000, reward total was -18.000000. running mean: -17.457799
episode 4306.000000, reward total was -19.000000. running mean: -17.473221
episode 4307.000000, reward total was -17.000000. running mean: -17.468489
episode 4308.000000, reward total was -15.000000. running mean: -17.443804
episode 4309.000000, reward total was -14.000000. running mean: -17.409366
episode 4310.000000, reward total was -11.000000. running mean: -17.345272
episode 4311.000000, reward total was -16.000000. running mean: -17.331819
episode 4312.000000, reward total was -18.000000. running mean: -17.338501
episode 4313.000000, reward total was -17.000000. running mean: -17.335116
episode 4314.000000, reward total was -19.000000. running mean: -17.351765
episode 4315.000000, rewa

episode 4412.000000, reward total was -17.000000. running mean: -17.092438
episode 4413.000000, reward total was -15.000000. running mean: -17.071513
episode 4414.000000, reward total was -15.000000. running mean: -17.050798
episode 4415.000000, reward total was -17.000000. running mean: -17.050290
episode 4416.000000, reward total was -17.000000. running mean: -17.049787
episode 4417.000000, reward total was -19.000000. running mean: -17.069290
episode 4418.000000, reward total was -16.000000. running mean: -17.058597
episode 4419.000000, reward total was -18.000000. running mean: -17.068011
episode 4420.000000, reward total was -17.000000. running mean: -17.067331
episode 4421.000000, reward total was -19.000000. running mean: -17.086657
episode 4422.000000, reward total was -13.000000. running mean: -17.045791
episode 4423.000000, reward total was -7.000000. running mean: -16.945333
episode 4424.000000, reward total was -14.000000. running mean: -16.915879
episode 4425.000000, rewar

episode 4522.000000, reward total was -19.000000. running mean: -16.833997
episode 4523.000000, reward total was -15.000000. running mean: -16.815657
episode 4524.000000, reward total was -17.000000. running mean: -16.817501
episode 4525.000000, reward total was -15.000000. running mean: -16.799326
episode 4526.000000, reward total was -16.000000. running mean: -16.791332
episode 4527.000000, reward total was -15.000000. running mean: -16.773419
episode 4528.000000, reward total was -17.000000. running mean: -16.775685
episode 4529.000000, reward total was -17.000000. running mean: -16.777928
episode 4530.000000, reward total was -16.000000. running mean: -16.770149
episode 4531.000000, reward total was -17.000000. running mean: -16.772447
episode 4532.000000, reward total was -17.000000. running mean: -16.774723
episode 4533.000000, reward total was -16.000000. running mean: -16.766976
episode 4534.000000, reward total was -16.000000. running mean: -16.759306
episode 4535.000000, rewa

episode 4632.000000, reward total was -20.000000. running mean: -16.845561
episode 4633.000000, reward total was -17.000000. running mean: -16.847106
episode 4634.000000, reward total was -11.000000. running mean: -16.788635
episode 4635.000000, reward total was -18.000000. running mean: -16.800748
episode 4636.000000, reward total was -21.000000. running mean: -16.842741
episode 4637.000000, reward total was -14.000000. running mean: -16.814313
episode 4638.000000, reward total was -18.000000. running mean: -16.826170
episode 4639.000000, reward total was -17.000000. running mean: -16.827908
episode 4640.000000, reward total was -19.000000. running mean: -16.849629
episode 4641.000000, reward total was -15.000000. running mean: -16.831133
episode 4642.000000, reward total was -18.000000. running mean: -16.842822
episode 4643.000000, reward total was -17.000000. running mean: -16.844394
episode 4644.000000, reward total was -19.000000. running mean: -16.865950
episode 4645.000000, rewa

episode 4742.000000, reward total was -15.000000. running mean: -16.821181
episode 4743.000000, reward total was -17.000000. running mean: -16.822970
episode 4744.000000, reward total was -13.000000. running mean: -16.784740
episode 4745.000000, reward total was -16.000000. running mean: -16.776892
episode 4746.000000, reward total was -16.000000. running mean: -16.769124
episode 4747.000000, reward total was -13.000000. running mean: -16.731432
episode 4748.000000, reward total was -17.000000. running mean: -16.734118
episode 4749.000000, reward total was -20.000000. running mean: -16.766777
episode 4750.000000, reward total was -21.000000. running mean: -16.809109
episode 4751.000000, reward total was -19.000000. running mean: -16.831018
episode 4752.000000, reward total was -19.000000. running mean: -16.852708
episode 4753.000000, reward total was -16.000000. running mean: -16.844181
episode 4754.000000, reward total was -21.000000. running mean: -16.885739
episode 4755.000000, rewa

episode 4852.000000, reward total was -16.000000. running mean: -16.793084
episode 4853.000000, reward total was -15.000000. running mean: -16.775153
episode 4854.000000, reward total was -14.000000. running mean: -16.747401
episode 4855.000000, reward total was -19.000000. running mean: -16.769927
episode 4856.000000, reward total was -20.000000. running mean: -16.802228
episode 4857.000000, reward total was -20.000000. running mean: -16.834206
episode 4858.000000, reward total was -20.000000. running mean: -16.865864
episode 4859.000000, reward total was -19.000000. running mean: -16.887205
episode 4860.000000, reward total was -17.000000. running mean: -16.888333
episode 4861.000000, reward total was -18.000000. running mean: -16.899450
episode 4862.000000, reward total was -14.000000. running mean: -16.870455
episode 4863.000000, reward total was -12.000000. running mean: -16.821751
episode 4864.000000, reward total was -17.000000. running mean: -16.823533
episode 4865.000000, rewa

episode 4962.000000, reward total was -15.000000. running mean: -16.584748
episode 4963.000000, reward total was -19.000000. running mean: -16.608901
episode 4964.000000, reward total was -12.000000. running mean: -16.562812
episode 4965.000000, reward total was -17.000000. running mean: -16.567184
episode 4966.000000, reward total was -16.000000. running mean: -16.561512
episode 4967.000000, reward total was -18.000000. running mean: -16.575897
episode 4968.000000, reward total was -16.000000. running mean: -16.570138
episode 4969.000000, reward total was -16.000000. running mean: -16.564436
episode 4970.000000, reward total was -16.000000. running mean: -16.558792
episode 4971.000000, reward total was -15.000000. running mean: -16.543204
episode 4972.000000, reward total was -14.000000. running mean: -16.517772
episode 4973.000000, reward total was -19.000000. running mean: -16.542594
episode 4974.000000, reward total was -20.000000. running mean: -16.577168
episode 4975.000000, rewa

episode 5072.000000, reward total was -14.000000. running mean: -16.046986
episode 5073.000000, reward total was -11.000000. running mean: -15.996516
episode 5074.000000, reward total was -19.000000. running mean: -16.026551
episode 5075.000000, reward total was -15.000000. running mean: -16.016286
episode 5076.000000, reward total was -17.000000. running mean: -16.026123
episode 5077.000000, reward total was -14.000000. running mean: -16.005862
episode 5078.000000, reward total was -17.000000. running mean: -16.015803
episode 5079.000000, reward total was -16.000000. running mean: -16.015645
episode 5080.000000, reward total was -11.000000. running mean: -15.965489
episode 5081.000000, reward total was -17.000000. running mean: -15.975834
episode 5082.000000, reward total was -11.000000. running mean: -15.926075
episode 5083.000000, reward total was -21.000000. running mean: -15.976815
episode 5084.000000, reward total was -15.000000. running mean: -15.967046
episode 5085.000000, rewa

episode 5182.000000, reward total was -17.000000. running mean: -15.810705
episode 5183.000000, reward total was -12.000000. running mean: -15.772598
episode 5184.000000, reward total was -12.000000. running mean: -15.734872
episode 5185.000000, reward total was -12.000000. running mean: -15.697523
episode 5186.000000, reward total was -15.000000. running mean: -15.690548
episode 5187.000000, reward total was -11.000000. running mean: -15.643642
episode 5188.000000, reward total was -10.000000. running mean: -15.587206
episode 5189.000000, reward total was -13.000000. running mean: -15.561334
episode 5190.000000, reward total was -18.000000. running mean: -15.585720
episode 5191.000000, reward total was -17.000000. running mean: -15.599863
episode 5192.000000, reward total was -15.000000. running mean: -15.593865
episode 5193.000000, reward total was -16.000000. running mean: -15.597926
episode 5194.000000, reward total was -19.000000. running mean: -15.631947
episode 5195.000000, rewa

episode 5292.000000, reward total was -17.000000. running mean: -15.473068
episode 5293.000000, reward total was -15.000000. running mean: -15.468338
episode 5294.000000, reward total was -19.000000. running mean: -15.503654
episode 5295.000000, reward total was -17.000000. running mean: -15.518618
episode 5296.000000, reward total was -20.000000. running mean: -15.563432
episode 5297.000000, reward total was -14.000000. running mean: -15.547797
episode 5298.000000, reward total was -12.000000. running mean: -15.512319
episode 5299.000000, reward total was -15.000000. running mean: -15.507196
episode 5300.000000, reward total was -15.000000. running mean: -15.502124
episode 5301.000000, reward total was -21.000000. running mean: -15.557103
episode 5302.000000, reward total was -19.000000. running mean: -15.591532
episode 5303.000000, reward total was -14.000000. running mean: -15.575617
episode 5304.000000, reward total was -14.000000. running mean: -15.559860
episode 5305.000000, rewa

episode 5402.000000, reward total was -14.000000. running mean: -15.579626
episode 5403.000000, reward total was -17.000000. running mean: -15.593830
episode 5404.000000, reward total was -13.000000. running mean: -15.567891
episode 5405.000000, reward total was -19.000000. running mean: -15.602213
episode 5406.000000, reward total was -13.000000. running mean: -15.576190
episode 5407.000000, reward total was -15.000000. running mean: -15.570428
episode 5408.000000, reward total was -13.000000. running mean: -15.544724
episode 5409.000000, reward total was -18.000000. running mean: -15.569277
episode 5410.000000, reward total was -16.000000. running mean: -15.573584
episode 5411.000000, reward total was -16.000000. running mean: -15.577848
episode 5412.000000, reward total was -10.000000. running mean: -15.522070
episode 5413.000000, reward total was -15.000000. running mean: -15.516849
episode 5414.000000, reward total was -17.000000. running mean: -15.531681
episode 5415.000000, rewa

episode 5512.000000, reward total was -15.000000. running mean: -15.255171
episode 5513.000000, reward total was -10.000000. running mean: -15.202619
episode 5514.000000, reward total was 1.000000. running mean: -15.040593
episode 5515.000000, reward total was -18.000000. running mean: -15.070187
episode 5516.000000, reward total was -12.000000. running mean: -15.039485
episode 5517.000000, reward total was -13.000000. running mean: -15.019090
episode 5518.000000, reward total was -15.000000. running mean: -15.018899
episode 5519.000000, reward total was -18.000000. running mean: -15.048710
episode 5520.000000, reward total was -17.000000. running mean: -15.068223
episode 5521.000000, reward total was -13.000000. running mean: -15.047541
episode 5522.000000, reward total was -17.000000. running mean: -15.067066
episode 5523.000000, reward total was -17.000000. running mean: -15.086395
episode 5524.000000, reward total was -12.000000. running mean: -15.055531
episode 5525.000000, reward

episode 5622.000000, reward total was -13.000000. running mean: -14.968828
episode 5623.000000, reward total was -19.000000. running mean: -15.009139
episode 5624.000000, reward total was -17.000000. running mean: -15.029048
episode 5625.000000, reward total was -18.000000. running mean: -15.058757
episode 5626.000000, reward total was -15.000000. running mean: -15.058170
episode 5627.000000, reward total was -12.000000. running mean: -15.027588
episode 5628.000000, reward total was -9.000000. running mean: -14.967312
episode 5629.000000, reward total was -10.000000. running mean: -14.917639
episode 5630.000000, reward total was -13.000000. running mean: -14.898463
episode 5631.000000, reward total was -11.000000. running mean: -14.859478
episode 5632.000000, reward total was -18.000000. running mean: -14.890883
episode 5633.000000, reward total was -17.000000. running mean: -14.911975
episode 5634.000000, reward total was -14.000000. running mean: -14.902855
episode 5635.000000, rewar

episode 5732.000000, reward total was -16.000000. running mean: -14.847577
episode 5733.000000, reward total was -17.000000. running mean: -14.869101
episode 5734.000000, reward total was -13.000000. running mean: -14.850410
episode 5735.000000, reward total was -14.000000. running mean: -14.841906
episode 5736.000000, reward total was -17.000000. running mean: -14.863487
episode 5737.000000, reward total was -10.000000. running mean: -14.814852
episode 5738.000000, reward total was -13.000000. running mean: -14.796703
episode 5739.000000, reward total was -12.000000. running mean: -14.768736
episode 5740.000000, reward total was -16.000000. running mean: -14.781049
episode 5741.000000, reward total was -15.000000. running mean: -14.783238
episode 5742.000000, reward total was -17.000000. running mean: -14.805406
episode 5743.000000, reward total was -7.000000. running mean: -14.727352
episode 5744.000000, reward total was -15.000000. running mean: -14.730078
episode 5745.000000, rewar

episode 5842.000000, reward total was -17.000000. running mean: -14.807492
episode 5843.000000, reward total was -9.000000. running mean: -14.749418
episode 5844.000000, reward total was -17.000000. running mean: -14.771923
episode 5845.000000, reward total was -18.000000. running mean: -14.804204
episode 5846.000000, reward total was -14.000000. running mean: -14.796162
episode 5847.000000, reward total was -8.000000. running mean: -14.728200
episode 5848.000000, reward total was -15.000000. running mean: -14.730918
episode 5849.000000, reward total was -17.000000. running mean: -14.753609
episode 5850.000000, reward total was -17.000000. running mean: -14.776073
episode 5851.000000, reward total was -15.000000. running mean: -14.778312
episode 5852.000000, reward total was -19.000000. running mean: -14.820529
episode 5853.000000, reward total was -14.000000. running mean: -14.812324
episode 5854.000000, reward total was -15.000000. running mean: -14.814201
episode 5855.000000, reward

episode 5952.000000, reward total was -18.000000. running mean: -14.886232
episode 5953.000000, reward total was -18.000000. running mean: -14.917370
episode 5954.000000, reward total was -16.000000. running mean: -14.928196
episode 5955.000000, reward total was -16.000000. running mean: -14.938914
episode 5956.000000, reward total was -14.000000. running mean: -14.929525
episode 5957.000000, reward total was -18.000000. running mean: -14.960230
episode 5958.000000, reward total was -18.000000. running mean: -14.990627
episode 5959.000000, reward total was -12.000000. running mean: -14.960721
episode 5960.000000, reward total was -18.000000. running mean: -14.991114
episode 5961.000000, reward total was -11.000000. running mean: -14.951203
episode 5962.000000, reward total was -10.000000. running mean: -14.901691
episode 5963.000000, reward total was -8.000000. running mean: -14.832674
episode 5964.000000, reward total was -17.000000. running mean: -14.854347
episode 5965.000000, rewar

episode 6062.000000, reward total was -15.000000. running mean: -14.712621
episode 6063.000000, reward total was -14.000000. running mean: -14.705494
episode 6064.000000, reward total was -16.000000. running mean: -14.718439
episode 6065.000000, reward total was -19.000000. running mean: -14.761255
episode 6066.000000, reward total was -7.000000. running mean: -14.683642
episode 6067.000000, reward total was -9.000000. running mean: -14.626806
episode 6068.000000, reward total was -14.000000. running mean: -14.620538
episode 6069.000000, reward total was -15.000000. running mean: -14.624333
episode 6070.000000, reward total was -20.000000. running mean: -14.678089
episode 6071.000000, reward total was -14.000000. running mean: -14.671308
episode 6072.000000, reward total was -11.000000. running mean: -14.634595
episode 6073.000000, reward total was -19.000000. running mean: -14.678249
episode 6074.000000, reward total was -13.000000. running mean: -14.661467
episode 6075.000000, reward

episode 6172.000000, reward total was -14.000000. running mean: -14.137863
episode 6173.000000, reward total was -7.000000. running mean: -14.066485
episode 6174.000000, reward total was -10.000000. running mean: -14.025820
episode 6175.000000, reward total was -14.000000. running mean: -14.025562
episode 6176.000000, reward total was -14.000000. running mean: -14.025306
episode 6177.000000, reward total was -17.000000. running mean: -14.055053
episode 6178.000000, reward total was -13.000000. running mean: -14.044503
episode 6179.000000, reward total was -17.000000. running mean: -14.074058
episode 6180.000000, reward total was -14.000000. running mean: -14.073317
episode 6181.000000, reward total was -16.000000. running mean: -14.092584
episode 6182.000000, reward total was -15.000000. running mean: -14.101658
episode 6183.000000, reward total was -13.000000. running mean: -14.090641
episode 6184.000000, reward total was -16.000000. running mean: -14.109735
episode 6185.000000, rewar

episode 6282.000000, reward total was -10.000000. running mean: -14.499479
episode 6283.000000, reward total was -19.000000. running mean: -14.544484
episode 6284.000000, reward total was -14.000000. running mean: -14.539039
episode 6285.000000, reward total was -6.000000. running mean: -14.453649
episode 6286.000000, reward total was -11.000000. running mean: -14.419112
episode 6287.000000, reward total was -15.000000. running mean: -14.424921
episode 6288.000000, reward total was -11.000000. running mean: -14.390672
episode 6289.000000, reward total was -14.000000. running mean: -14.386765
episode 6290.000000, reward total was -10.000000. running mean: -14.342897
episode 6291.000000, reward total was -16.000000. running mean: -14.359469
episode 6292.000000, reward total was -13.000000. running mean: -14.345874
episode 6293.000000, reward total was -11.000000. running mean: -14.312415
episode 6294.000000, reward total was -17.000000. running mean: -14.339291
episode 6295.000000, rewar

episode 6392.000000, reward total was -9.000000. running mean: -13.970708
episode 6393.000000, reward total was -8.000000. running mean: -13.911001
episode 6394.000000, reward total was -10.000000. running mean: -13.871891
episode 6395.000000, reward total was -15.000000. running mean: -13.883172
episode 6396.000000, reward total was -9.000000. running mean: -13.834340
episode 6397.000000, reward total was -15.000000. running mean: -13.845997
episode 6398.000000, reward total was -16.000000. running mean: -13.867537
episode 6399.000000, reward total was -14.000000. running mean: -13.868861
episode 6400.000000, reward total was -8.000000. running mean: -13.810173
episode 6401.000000, reward total was -19.000000. running mean: -13.862071
episode 6402.000000, reward total was -17.000000. running mean: -13.893450
episode 6403.000000, reward total was -13.000000. running mean: -13.884516
episode 6404.000000, reward total was -12.000000. running mean: -13.865671
episode 6405.000000, reward t

episode 6502.000000, reward total was -17.000000. running mean: -13.889359
episode 6503.000000, reward total was -15.000000. running mean: -13.900465
episode 6504.000000, reward total was -14.000000. running mean: -13.901461
episode 6505.000000, reward total was -16.000000. running mean: -13.922446
episode 6506.000000, reward total was -9.000000. running mean: -13.873222
episode 6507.000000, reward total was -14.000000. running mean: -13.874489
episode 6508.000000, reward total was -9.000000. running mean: -13.825744
episode 6509.000000, reward total was -16.000000. running mean: -13.847487
episode 6510.000000, reward total was -15.000000. running mean: -13.859012
episode 6511.000000, reward total was -4.000000. running mean: -13.760422
episode 6512.000000, reward total was -10.000000. running mean: -13.722818
episode 6513.000000, reward total was -13.000000. running mean: -13.715590
episode 6514.000000, reward total was -17.000000. running mean: -13.748434
episode 6515.000000, reward 

episode 6612.000000, reward total was -11.000000. running mean: -13.869561
episode 6613.000000, reward total was -11.000000. running mean: -13.840866
episode 6614.000000, reward total was -17.000000. running mean: -13.872457
episode 6615.000000, reward total was -17.000000. running mean: -13.903732
episode 6616.000000, reward total was -9.000000. running mean: -13.854695
episode 6617.000000, reward total was -15.000000. running mean: -13.866148
episode 6618.000000, reward total was -7.000000. running mean: -13.797487
episode 6619.000000, reward total was -15.000000. running mean: -13.809512
episode 6620.000000, reward total was -8.000000. running mean: -13.751417
episode 6621.000000, reward total was -14.000000. running mean: -13.753903
episode 6622.000000, reward total was -9.000000. running mean: -13.706363
episode 6623.000000, reward total was -10.000000. running mean: -13.669300
episode 6624.000000, reward total was -13.000000. running mean: -13.662607
episode 6625.000000, reward t

episode 6722.000000, reward total was -17.000000. running mean: -13.448180
episode 6723.000000, reward total was -7.000000. running mean: -13.383699
episode 6724.000000, reward total was -14.000000. running mean: -13.389862
episode 6725.000000, reward total was -14.000000. running mean: -13.395963
episode 6726.000000, reward total was -13.000000. running mean: -13.392003
episode 6727.000000, reward total was -13.000000. running mean: -13.388083
episode 6728.000000, reward total was -13.000000. running mean: -13.384202
episode 6729.000000, reward total was -17.000000. running mean: -13.420360
episode 6730.000000, reward total was -13.000000. running mean: -13.416157
episode 6731.000000, reward total was -15.000000. running mean: -13.431995
episode 6732.000000, reward total was -13.000000. running mean: -13.427675
episode 6733.000000, reward total was -9.000000. running mean: -13.383399
episode 6734.000000, reward total was -8.000000. running mean: -13.329565
episode 6735.000000, reward 

episode 6832.000000, reward total was -17.000000. running mean: -13.008575
episode 6833.000000, reward total was -14.000000. running mean: -13.018489
episode 6834.000000, reward total was -13.000000. running mean: -13.018304
episode 6835.000000, reward total was -10.000000. running mean: -12.988121
episode 6836.000000, reward total was -14.000000. running mean: -12.998240
episode 6837.000000, reward total was -14.000000. running mean: -13.008257
episode 6838.000000, reward total was -9.000000. running mean: -12.968175
episode 6839.000000, reward total was -18.000000. running mean: -13.018493
episode 6840.000000, reward total was -13.000000. running mean: -13.018308
episode 6841.000000, reward total was -9.000000. running mean: -12.978125
episode 6842.000000, reward total was -9.000000. running mean: -12.938344
episode 6843.000000, reward total was -16.000000. running mean: -12.968960
episode 6844.000000, reward total was -13.000000. running mean: -12.969271
episode 6845.000000, reward 

episode 6942.000000, reward total was -8.000000. running mean: -12.462367
episode 6943.000000, reward total was -5.000000. running mean: -12.387743
episode 6944.000000, reward total was -13.000000. running mean: -12.393866
episode 6945.000000, reward total was -18.000000. running mean: -12.449927
episode 6946.000000, reward total was -17.000000. running mean: -12.495428
episode 6947.000000, reward total was -11.000000. running mean: -12.480473
episode 6948.000000, reward total was -11.000000. running mean: -12.465669
episode 6949.000000, reward total was -17.000000. running mean: -12.511012
episode 6950.000000, reward total was -13.000000. running mean: -12.515902
episode 6951.000000, reward total was -8.000000. running mean: -12.470743
episode 6952.000000, reward total was -12.000000. running mean: -12.466035
episode 6953.000000, reward total was -10.000000. running mean: -12.441375
episode 6954.000000, reward total was -12.000000. running mean: -12.436961
episode 6955.000000, reward 