In [1]:
import gym
import numpy as np

from gym.wrappers import AtariPreprocessing
gym.new_step_api=True
env = gym.make('Pong-v0')

H = 800 # number of hidden layer neurons
D = 80 * 80 # input dimensionality: 80x80 grid
model = {}
model['W1'] = np.random.randn(H,D) / np.sqrt(D) # "Xavier" initialization
model['W2'] = np.random.randn(H) / np.sqrt(H)
# hyperparameters
batch_size = 10 # every how many episodes to do a param update?
learning_rate = 1e-4
gamma = 0.99 # discount factor for reward
decay_rate = 0.99 # decay factor for RMSProp leaky sum of grad^2
grad_buffer = { k : np.zeros_like(v) for k,v in model.items() } # update buffers that add up gradients over a batch
rmsprop_cache = { k : np.zeros_like(v) for k,v in model.items() } # rmsprop memory

def sigmoid(x): 
  return 1.0 / (1.0 + np.exp(-x)) # sigmoid "squashing" function to interval [0,1]

def prepro(I):
  I=np.asarray(I)
  I = I[35:195] # crop
  I = I[::2,::2,0] # downsample by factor of 2
  
  I[I == 144] = 0 # erase background (background type 1)
  
  I[I == 109] = 0 # erase background (background type 2)
  
  I[I != 0] = 1 # everything else (paddles, ball) just set to 1
  return I.astype(float).ravel()

def discount_rewards(r):
  discounted_r = np.zeros_like(r)
  running_add = 0
  for t in reversed(range(0, r.size)):
    if r[t] != 0: running_add = 0 # reset the sum, since this was a game boundary (pong specific!)
    running_add = running_add * gamma + r[t]
    discounted_r[t] = running_add
  return discounted_r

def policy_forward(x):
  h = np.dot(model['W1'], x)
  h[h<0] = 0 # ReLU nonlinearity
  logp = np.dot(model['W2'], h)
  p = sigmoid(logp)
  return p, h # return probability of taking action 2, and hidden state

def policy_backward(epx, eph, epdlogp):
  """ backward pass. (eph is array of intermediate hidden states) """
  dW2 = np.dot(eph.T, epdlogp).ravel()
  dh = np.outer(epdlogp, model['W2'])
  dh[eph <= 0] = 0 # backpro prelu
  dW1 = np.dot(dh.T, epx)
  return {'W1':dW1, 'W2':dW2}

def model_step(model, observation, prev_x):

  cur_x = prepro(observation)
  x = cur_x - prev_x if prev_x is not None else np.zeros(D)
  prev_x = cur_x
  
  # forward the policy network and sample an action from the returned probability
  aprob, _ = policy_forward(x)
  action = 2 if aprob >= 0.5 else 3 # roll the dice!
  
  return action, prev_x

def play_game(env, model):
  observation = env.reset()

  frames = []
  cumulated_reward = 0

  prev_x = None # used in computing the difference frame

  for t in range(1000):
      frames.append(env.render(mode = 'rgb_array'))
      action, prev_x = model_step(model, observation, prev_x)
      observation, reward, done, info = env.step(action)
      cumulated_reward += reward
      if done:
          print("Episode finished after {} timesteps, accumulated reward = {}".format(t+1, cumulated_reward))
          break
  print("Episode finished without success, accumulated reward = {}".format(cumulated_reward))
  env.close()
  display_frames_as_gif(frames)

def train_model(env, model, total_episodes = 100):
  hist = []
  observation = env.reset()

  prev_x = None # used in computing the difference frame
  xs,hs,dlogps,drs = [],[],[],[]
  running_reward = None
  reward_sum = 0
  episode_number = 0

  while True:
  
    cur_x = prepro(observation)
    x = cur_x - prev_x if prev_x is not None else np.zeros(D)
    prev_x = cur_x

    # forward the policy network and sample an action from the returned probability
    aprob, h = policy_forward(x)
    action = 2 if np.random.uniform() < aprob else 3 # roll the dice!

    # record various intermediates (needed later for backprop)
    xs.append(x) # observation
    hs.append(h) # hidden state
    y = 1 if action == 2 else 0 # a "fake label"
    dlogps.append(y - aprob) # grad that encourages the action that was taken to be taken (see http://cs231n.github.io/neural-networks-2/#losses if confused)

    # step the environment and get new measurements
    observation, reward, done, info = env.step(action)
    reward_sum += reward

    drs.append(reward) # record reward (has to be done after we call step() to get reward for previous action)

    if done: # an episode finished
      episode_number += 1

      # stack together all inputs, hidden states, action gradients, and rewards for this episode
      epx = np.vstack(xs)
      eph = np.vstack(hs)
      epdlogp = np.vstack(dlogps)
      epr = np.vstack(drs)
      xs,hs,dlogps,drs = [],[],[],[] # reset array memory

      # compute the discounted reward backwards through time
      discounted_epr = discount_rewards(epr)
      # standardize the rewards to be unit normal (helps control the gradient estimator variance)
      discounted_epr -= np.mean(discounted_epr)
      discounted_epr /= np.std(discounted_epr)

      epdlogp *= discounted_epr # modulate the gradient with advantage (PG magic happens right here.)
      grad = policy_backward(epx, eph, epdlogp)
      for k in model: grad_buffer[k] += grad[k] # accumulate grad over batch

      # perform rmsprop parameter update every batch_size episodes
      if episode_number % batch_size == 0:
        for k,v in model.items():
          g = grad_buffer[k] # gradient
          rmsprop_cache[k] = decay_rate * rmsprop_cache[k] + (1 - decay_rate) * g**2
          model[k] += learning_rate * g / (np.sqrt(rmsprop_cache[k]) + 1e-5)
          grad_buffer[k] = np.zeros_like(v) # reset batch gradient buffer

      # boring book-keeping
      running_reward = reward_sum if running_reward is None else running_reward * 0.99 + reward_sum * 0.01
      hist.append((episode_number, reward_sum, running_reward))
      print ('episode %f, reward total was %f. running mean: %f' % (episode_number, reward_sum, running_reward))
      reward_sum = 0
      observation = env.reset() # reset env
      prev_x = None
      if episode_number == total_episodes: 
        return hist

   
    

  logger.warn(
  deprecation(
  deprecation(


In [2]:
%time hist1 = train_model(env, model, total_episodes=7000)

  logger.deprecation(


episode 1.000000, reward total was -21.000000. running mean: -21.000000
episode 2.000000, reward total was -20.000000. running mean: -20.990000
episode 3.000000, reward total was -21.000000. running mean: -20.990100
episode 4.000000, reward total was -20.000000. running mean: -20.980199
episode 5.000000, reward total was -21.000000. running mean: -20.980397
episode 6.000000, reward total was -21.000000. running mean: -20.980593
episode 7.000000, reward total was -21.000000. running mean: -20.980787
episode 8.000000, reward total was -21.000000. running mean: -20.980979
episode 9.000000, reward total was -21.000000. running mean: -20.981169
episode 10.000000, reward total was -21.000000. running mean: -20.981358
episode 11.000000, reward total was -20.000000. running mean: -20.971544
episode 12.000000, reward total was -21.000000. running mean: -20.971829
episode 13.000000, reward total was -21.000000. running mean: -20.972110
episode 14.000000, reward total was -21.000000. running mean

episode 114.000000, reward total was -20.000000. running mean: -20.470331
episode 115.000000, reward total was -21.000000. running mean: -20.475628
episode 116.000000, reward total was -21.000000. running mean: -20.480872
episode 117.000000, reward total was -20.000000. running mean: -20.476063
episode 118.000000, reward total was -21.000000. running mean: -20.481303
episode 119.000000, reward total was -21.000000. running mean: -20.486490
episode 120.000000, reward total was -20.000000. running mean: -20.481625
episode 121.000000, reward total was -19.000000. running mean: -20.466808
episode 122.000000, reward total was -20.000000. running mean: -20.462140
episode 123.000000, reward total was -21.000000. running mean: -20.467519
episode 124.000000, reward total was -20.000000. running mean: -20.462844
episode 125.000000, reward total was -21.000000. running mean: -20.468215
episode 126.000000, reward total was -19.000000. running mean: -20.453533
episode 127.000000, reward total was -

episode 225.000000, reward total was -21.000000. running mean: -20.320313
episode 226.000000, reward total was -20.000000. running mean: -20.317110
episode 227.000000, reward total was -21.000000. running mean: -20.323939
episode 228.000000, reward total was -21.000000. running mean: -20.330700
episode 229.000000, reward total was -20.000000. running mean: -20.327393
episode 230.000000, reward total was -16.000000. running mean: -20.284119
episode 231.000000, reward total was -20.000000. running mean: -20.281278
episode 232.000000, reward total was -21.000000. running mean: -20.288465
episode 233.000000, reward total was -21.000000. running mean: -20.295580
episode 234.000000, reward total was -19.000000. running mean: -20.282624
episode 235.000000, reward total was -21.000000. running mean: -20.289798
episode 236.000000, reward total was -21.000000. running mean: -20.296900
episode 237.000000, reward total was -21.000000. running mean: -20.303931
episode 238.000000, reward total was -

episode 336.000000, reward total was -18.000000. running mean: -20.201314
episode 337.000000, reward total was -21.000000. running mean: -20.209300
episode 338.000000, reward total was -19.000000. running mean: -20.197207
episode 339.000000, reward total was -20.000000. running mean: -20.195235
episode 340.000000, reward total was -21.000000. running mean: -20.203283
episode 341.000000, reward total was -20.000000. running mean: -20.201250
episode 342.000000, reward total was -21.000000. running mean: -20.209238
episode 343.000000, reward total was -21.000000. running mean: -20.217145
episode 344.000000, reward total was -21.000000. running mean: -20.224974
episode 345.000000, reward total was -21.000000. running mean: -20.232724
episode 346.000000, reward total was -21.000000. running mean: -20.240397
episode 347.000000, reward total was -21.000000. running mean: -20.247993
episode 348.000000, reward total was -21.000000. running mean: -20.255513
episode 349.000000, reward total was -

episode 447.000000, reward total was -21.000000. running mean: -20.335900
episode 448.000000, reward total was -21.000000. running mean: -20.342541
episode 449.000000, reward total was -21.000000. running mean: -20.349116
episode 450.000000, reward total was -21.000000. running mean: -20.355625
episode 451.000000, reward total was -20.000000. running mean: -20.352068
episode 452.000000, reward total was -21.000000. running mean: -20.358548
episode 453.000000, reward total was -19.000000. running mean: -20.344962
episode 454.000000, reward total was -21.000000. running mean: -20.351512
episode 455.000000, reward total was -21.000000. running mean: -20.357997
episode 456.000000, reward total was -20.000000. running mean: -20.354417
episode 457.000000, reward total was -21.000000. running mean: -20.360873
episode 458.000000, reward total was -21.000000. running mean: -20.367264
episode 459.000000, reward total was -21.000000. running mean: -20.373592
episode 460.000000, reward total was -

episode 558.000000, reward total was -21.000000. running mean: -20.375769
episode 559.000000, reward total was -21.000000. running mean: -20.382012
episode 560.000000, reward total was -21.000000. running mean: -20.388192
episode 561.000000, reward total was -21.000000. running mean: -20.394310
episode 562.000000, reward total was -21.000000. running mean: -20.400367
episode 563.000000, reward total was -21.000000. running mean: -20.406363
episode 564.000000, reward total was -20.000000. running mean: -20.402299
episode 565.000000, reward total was -21.000000. running mean: -20.408276
episode 566.000000, reward total was -19.000000. running mean: -20.394194
episode 567.000000, reward total was -20.000000. running mean: -20.390252
episode 568.000000, reward total was -17.000000. running mean: -20.356349
episode 569.000000, reward total was -20.000000. running mean: -20.352786
episode 570.000000, reward total was -21.000000. running mean: -20.359258
episode 571.000000, reward total was -

episode 669.000000, reward total was -20.000000. running mean: -20.228952
episode 670.000000, reward total was -20.000000. running mean: -20.226663
episode 671.000000, reward total was -21.000000. running mean: -20.234396
episode 672.000000, reward total was -20.000000. running mean: -20.232052
episode 673.000000, reward total was -20.000000. running mean: -20.229731
episode 674.000000, reward total was -21.000000. running mean: -20.237434
episode 675.000000, reward total was -20.000000. running mean: -20.235060
episode 676.000000, reward total was -21.000000. running mean: -20.242709
episode 677.000000, reward total was -18.000000. running mean: -20.220282
episode 678.000000, reward total was -18.000000. running mean: -20.198079
episode 679.000000, reward total was -20.000000. running mean: -20.196098
episode 680.000000, reward total was -18.000000. running mean: -20.174137
episode 681.000000, reward total was -21.000000. running mean: -20.182396
episode 682.000000, reward total was -

episode 780.000000, reward total was -20.000000. running mean: -20.191336
episode 781.000000, reward total was -21.000000. running mean: -20.199422
episode 782.000000, reward total was -21.000000. running mean: -20.207428
episode 783.000000, reward total was -20.000000. running mean: -20.205354
episode 784.000000, reward total was -20.000000. running mean: -20.203300
episode 785.000000, reward total was -20.000000. running mean: -20.201267
episode 786.000000, reward total was -19.000000. running mean: -20.189255
episode 787.000000, reward total was -20.000000. running mean: -20.187362
episode 788.000000, reward total was -21.000000. running mean: -20.195488
episode 789.000000, reward total was -19.000000. running mean: -20.183533
episode 790.000000, reward total was -21.000000. running mean: -20.191698
episode 791.000000, reward total was -21.000000. running mean: -20.199781
episode 792.000000, reward total was -21.000000. running mean: -20.207783
episode 793.000000, reward total was -

episode 891.000000, reward total was -21.000000. running mean: -20.183092
episode 892.000000, reward total was -21.000000. running mean: -20.191261
episode 893.000000, reward total was -21.000000. running mean: -20.199349
episode 894.000000, reward total was -21.000000. running mean: -20.207355
episode 895.000000, reward total was -20.000000. running mean: -20.205282
episode 896.000000, reward total was -21.000000. running mean: -20.213229
episode 897.000000, reward total was -20.000000. running mean: -20.211097
episode 898.000000, reward total was -20.000000. running mean: -20.208986
episode 899.000000, reward total was -20.000000. running mean: -20.206896
episode 900.000000, reward total was -21.000000. running mean: -20.214827
episode 901.000000, reward total was -20.000000. running mean: -20.212679
episode 902.000000, reward total was -19.000000. running mean: -20.200552
episode 903.000000, reward total was -20.000000. running mean: -20.198546
episode 904.000000, reward total was -

episode 1002.000000, reward total was -20.000000. running mean: -20.120188
episode 1003.000000, reward total was -20.000000. running mean: -20.118986
episode 1004.000000, reward total was -21.000000. running mean: -20.127796
episode 1005.000000, reward total was -21.000000. running mean: -20.136518
episode 1006.000000, reward total was -21.000000. running mean: -20.145153
episode 1007.000000, reward total was -17.000000. running mean: -20.113701
episode 1008.000000, reward total was -21.000000. running mean: -20.122564
episode 1009.000000, reward total was -21.000000. running mean: -20.131339
episode 1010.000000, reward total was -19.000000. running mean: -20.120025
episode 1011.000000, reward total was -21.000000. running mean: -20.128825
episode 1012.000000, reward total was -19.000000. running mean: -20.117537
episode 1013.000000, reward total was -20.000000. running mean: -20.116361
episode 1014.000000, reward total was -21.000000. running mean: -20.125198
episode 1015.000000, rewa

episode 1112.000000, reward total was -21.000000. running mean: -19.991620
episode 1113.000000, reward total was -20.000000. running mean: -19.991704
episode 1114.000000, reward total was -19.000000. running mean: -19.981787
episode 1115.000000, reward total was -21.000000. running mean: -19.991969
episode 1116.000000, reward total was -20.000000. running mean: -19.992049
episode 1117.000000, reward total was -19.000000. running mean: -19.982129
episode 1118.000000, reward total was -18.000000. running mean: -19.962307
episode 1119.000000, reward total was -21.000000. running mean: -19.972684
episode 1120.000000, reward total was -20.000000. running mean: -19.972957
episode 1121.000000, reward total was -19.000000. running mean: -19.963228
episode 1122.000000, reward total was -21.000000. running mean: -19.973596
episode 1123.000000, reward total was -19.000000. running mean: -19.963860
episode 1124.000000, reward total was -18.000000. running mean: -19.944221
episode 1125.000000, rewa

episode 1222.000000, reward total was -20.000000. running mean: -20.046311
episode 1223.000000, reward total was -20.000000. running mean: -20.045848
episode 1224.000000, reward total was -19.000000. running mean: -20.035390
episode 1225.000000, reward total was -19.000000. running mean: -20.025036
episode 1226.000000, reward total was -20.000000. running mean: -20.024785
episode 1227.000000, reward total was -20.000000. running mean: -20.024538
episode 1228.000000, reward total was -20.000000. running mean: -20.024292
episode 1229.000000, reward total was -18.000000. running mean: -20.004049
episode 1230.000000, reward total was -20.000000. running mean: -20.004009
episode 1231.000000, reward total was -19.000000. running mean: -19.993969
episode 1232.000000, reward total was -20.000000. running mean: -19.994029
episode 1233.000000, reward total was -20.000000. running mean: -19.994089
episode 1234.000000, reward total was -20.000000. running mean: -19.994148
episode 1235.000000, rewa

episode 1332.000000, reward total was -19.000000. running mean: -20.058676
episode 1333.000000, reward total was -21.000000. running mean: -20.068089
episode 1334.000000, reward total was -21.000000. running mean: -20.077408
episode 1335.000000, reward total was -20.000000. running mean: -20.076634
episode 1336.000000, reward total was -21.000000. running mean: -20.085868
episode 1337.000000, reward total was -17.000000. running mean: -20.055009
episode 1338.000000, reward total was -19.000000. running mean: -20.044459
episode 1339.000000, reward total was -21.000000. running mean: -20.054015
episode 1340.000000, reward total was -21.000000. running mean: -20.063474
episode 1341.000000, reward total was -19.000000. running mean: -20.052840
episode 1342.000000, reward total was -18.000000. running mean: -20.032311
episode 1343.000000, reward total was -16.000000. running mean: -19.991988
episode 1344.000000, reward total was -20.000000. running mean: -19.992068
episode 1345.000000, rewa

episode 1442.000000, reward total was -21.000000. running mean: -19.826922
episode 1443.000000, reward total was -20.000000. running mean: -19.828653
episode 1444.000000, reward total was -21.000000. running mean: -19.840366
episode 1445.000000, reward total was -19.000000. running mean: -19.831963
episode 1446.000000, reward total was -19.000000. running mean: -19.823643
episode 1447.000000, reward total was -21.000000. running mean: -19.835407
episode 1448.000000, reward total was -19.000000. running mean: -19.827052
episode 1449.000000, reward total was -20.000000. running mean: -19.828782
episode 1450.000000, reward total was -21.000000. running mean: -19.840494
episode 1451.000000, reward total was -21.000000. running mean: -19.852089
episode 1452.000000, reward total was -21.000000. running mean: -19.863568
episode 1453.000000, reward total was -20.000000. running mean: -19.864933
episode 1454.000000, reward total was -20.000000. running mean: -19.866283
episode 1455.000000, rewa

episode 1552.000000, reward total was -20.000000. running mean: -19.949683
episode 1553.000000, reward total was -21.000000. running mean: -19.960186
episode 1554.000000, reward total was -20.000000. running mean: -19.960584
episode 1555.000000, reward total was -20.000000. running mean: -19.960978
episode 1556.000000, reward total was -21.000000. running mean: -19.971369
episode 1557.000000, reward total was -17.000000. running mean: -19.941655
episode 1558.000000, reward total was -20.000000. running mean: -19.942238
episode 1559.000000, reward total was -20.000000. running mean: -19.942816
episode 1560.000000, reward total was -21.000000. running mean: -19.953388
episode 1561.000000, reward total was -19.000000. running mean: -19.943854
episode 1562.000000, reward total was -21.000000. running mean: -19.954415
episode 1563.000000, reward total was -20.000000. running mean: -19.954871
episode 1564.000000, reward total was -20.000000. running mean: -19.955323
episode 1565.000000, rewa

episode 1662.000000, reward total was -20.000000. running mean: -19.895893
episode 1663.000000, reward total was -20.000000. running mean: -19.896934
episode 1664.000000, reward total was -21.000000. running mean: -19.907965
episode 1665.000000, reward total was -20.000000. running mean: -19.908885
episode 1666.000000, reward total was -20.000000. running mean: -19.909796
episode 1667.000000, reward total was -20.000000. running mean: -19.910698
episode 1668.000000, reward total was -18.000000. running mean: -19.891591
episode 1669.000000, reward total was -19.000000. running mean: -19.882676
episode 1670.000000, reward total was -21.000000. running mean: -19.893849
episode 1671.000000, reward total was -21.000000. running mean: -19.904910
episode 1672.000000, reward total was -19.000000. running mean: -19.895861
episode 1673.000000, reward total was -21.000000. running mean: -19.906903
episode 1674.000000, reward total was -20.000000. running mean: -19.907834
episode 1675.000000, rewa

episode 1772.000000, reward total was -21.000000. running mean: -19.823663
episode 1773.000000, reward total was -21.000000. running mean: -19.835427
episode 1774.000000, reward total was -21.000000. running mean: -19.847072
episode 1775.000000, reward total was -21.000000. running mean: -19.858602
episode 1776.000000, reward total was -18.000000. running mean: -19.840016
episode 1777.000000, reward total was -19.000000. running mean: -19.831615
episode 1778.000000, reward total was -21.000000. running mean: -19.843299
episode 1779.000000, reward total was -20.000000. running mean: -19.844866
episode 1780.000000, reward total was -20.000000. running mean: -19.846418
episode 1781.000000, reward total was -18.000000. running mean: -19.827953
episode 1782.000000, reward total was -20.000000. running mean: -19.829674
episode 1783.000000, reward total was -20.000000. running mean: -19.831377
episode 1784.000000, reward total was -20.000000. running mean: -19.833063
episode 1785.000000, rewa

episode 1882.000000, reward total was -21.000000. running mean: -19.783510
episode 1883.000000, reward total was -20.000000. running mean: -19.785675
episode 1884.000000, reward total was -20.000000. running mean: -19.787818
episode 1885.000000, reward total was -20.000000. running mean: -19.789940
episode 1886.000000, reward total was -18.000000. running mean: -19.772041
episode 1887.000000, reward total was -19.000000. running mean: -19.764320
episode 1888.000000, reward total was -20.000000. running mean: -19.766677
episode 1889.000000, reward total was -20.000000. running mean: -19.769011
episode 1890.000000, reward total was -18.000000. running mean: -19.751320
episode 1891.000000, reward total was -21.000000. running mean: -19.763807
episode 1892.000000, reward total was -20.000000. running mean: -19.766169
episode 1893.000000, reward total was -19.000000. running mean: -19.758507
episode 1894.000000, reward total was -19.000000. running mean: -19.750922
episode 1895.000000, rewa

episode 1992.000000, reward total was -19.000000. running mean: -19.790513
episode 1993.000000, reward total was -19.000000. running mean: -19.782607
episode 1994.000000, reward total was -21.000000. running mean: -19.794781
episode 1995.000000, reward total was -20.000000. running mean: -19.796833
episode 1996.000000, reward total was -21.000000. running mean: -19.808865
episode 1997.000000, reward total was -17.000000. running mean: -19.780777
episode 1998.000000, reward total was -20.000000. running mean: -19.782969
episode 1999.000000, reward total was -20.000000. running mean: -19.785139
episode 2000.000000, reward total was -19.000000. running mean: -19.777288
episode 2001.000000, reward total was -19.000000. running mean: -19.769515
episode 2002.000000, reward total was -21.000000. running mean: -19.781820
episode 2003.000000, reward total was -20.000000. running mean: -19.784001
episode 2004.000000, reward total was -19.000000. running mean: -19.776161
episode 2005.000000, rewa

episode 2102.000000, reward total was -21.000000. running mean: -19.856594
episode 2103.000000, reward total was -19.000000. running mean: -19.848028
episode 2104.000000, reward total was -21.000000. running mean: -19.859548
episode 2105.000000, reward total was -21.000000. running mean: -19.870952
episode 2106.000000, reward total was -21.000000. running mean: -19.882243
episode 2107.000000, reward total was -19.000000. running mean: -19.873420
episode 2108.000000, reward total was -17.000000. running mean: -19.844686
episode 2109.000000, reward total was -20.000000. running mean: -19.846239
episode 2110.000000, reward total was -18.000000. running mean: -19.827777
episode 2111.000000, reward total was -20.000000. running mean: -19.829499
episode 2112.000000, reward total was -19.000000. running mean: -19.821204
episode 2113.000000, reward total was -20.000000. running mean: -19.822992
episode 2114.000000, reward total was -17.000000. running mean: -19.794762
episode 2115.000000, rewa

episode 2212.000000, reward total was -19.000000. running mean: -19.783980
episode 2213.000000, reward total was -19.000000. running mean: -19.776140
episode 2214.000000, reward total was -20.000000. running mean: -19.778379
episode 2215.000000, reward total was -17.000000. running mean: -19.750595
episode 2216.000000, reward total was -20.000000. running mean: -19.753089
episode 2217.000000, reward total was -20.000000. running mean: -19.755558
episode 2218.000000, reward total was -20.000000. running mean: -19.758002
episode 2219.000000, reward total was -21.000000. running mean: -19.770422
episode 2220.000000, reward total was -21.000000. running mean: -19.782718
episode 2221.000000, reward total was -19.000000. running mean: -19.774891
episode 2222.000000, reward total was -20.000000. running mean: -19.777142
episode 2223.000000, reward total was -19.000000. running mean: -19.769371
episode 2224.000000, reward total was -19.000000. running mean: -19.761677
episode 2225.000000, rewa

episode 2322.000000, reward total was -19.000000. running mean: -19.611021
episode 2323.000000, reward total was -20.000000. running mean: -19.614911
episode 2324.000000, reward total was -21.000000. running mean: -19.628762
episode 2325.000000, reward total was -20.000000. running mean: -19.632474
episode 2326.000000, reward total was -15.000000. running mean: -19.586150
episode 2327.000000, reward total was -20.000000. running mean: -19.590288
episode 2328.000000, reward total was -20.000000. running mean: -19.594385
episode 2329.000000, reward total was -21.000000. running mean: -19.608441
episode 2330.000000, reward total was -21.000000. running mean: -19.622357
episode 2331.000000, reward total was -18.000000. running mean: -19.606134
episode 2332.000000, reward total was -20.000000. running mean: -19.610072
episode 2333.000000, reward total was -20.000000. running mean: -19.613971
episode 2334.000000, reward total was -18.000000. running mean: -19.597832
episode 2335.000000, rewa

episode 2432.000000, reward total was -19.000000. running mean: -19.456057
episode 2433.000000, reward total was -19.000000. running mean: -19.451497
episode 2434.000000, reward total was -20.000000. running mean: -19.456982
episode 2435.000000, reward total was -20.000000. running mean: -19.462412
episode 2436.000000, reward total was -21.000000. running mean: -19.477788
episode 2437.000000, reward total was -19.000000. running mean: -19.473010
episode 2438.000000, reward total was -20.000000. running mean: -19.478280
episode 2439.000000, reward total was -17.000000. running mean: -19.453497
episode 2440.000000, reward total was -17.000000. running mean: -19.428962
episode 2441.000000, reward total was -18.000000. running mean: -19.414672
episode 2442.000000, reward total was -19.000000. running mean: -19.410526
episode 2443.000000, reward total was -20.000000. running mean: -19.416420
episode 2444.000000, reward total was -20.000000. running mean: -19.422256
episode 2445.000000, rewa

episode 2542.000000, reward total was -18.000000. running mean: -19.505211
episode 2543.000000, reward total was -19.000000. running mean: -19.500159
episode 2544.000000, reward total was -20.000000. running mean: -19.505158
episode 2545.000000, reward total was -20.000000. running mean: -19.510106
episode 2546.000000, reward total was -21.000000. running mean: -19.525005
episode 2547.000000, reward total was -21.000000. running mean: -19.539755
episode 2548.000000, reward total was -19.000000. running mean: -19.534358
episode 2549.000000, reward total was -21.000000. running mean: -19.549014
episode 2550.000000, reward total was -19.000000. running mean: -19.543524
episode 2551.000000, reward total was -17.000000. running mean: -19.518089
episode 2552.000000, reward total was -18.000000. running mean: -19.502908
episode 2553.000000, reward total was -18.000000. running mean: -19.487879
episode 2554.000000, reward total was -20.000000. running mean: -19.493000
episode 2555.000000, rewa

episode 2652.000000, reward total was -18.000000. running mean: -19.701962
episode 2653.000000, reward total was -19.000000. running mean: -19.694943
episode 2654.000000, reward total was -20.000000. running mean: -19.697993
episode 2655.000000, reward total was -17.000000. running mean: -19.671013
episode 2656.000000, reward total was -18.000000. running mean: -19.654303
episode 2657.000000, reward total was -18.000000. running mean: -19.637760
episode 2658.000000, reward total was -21.000000. running mean: -19.651383
episode 2659.000000, reward total was -21.000000. running mean: -19.664869
episode 2660.000000, reward total was -20.000000. running mean: -19.668220
episode 2661.000000, reward total was -19.000000. running mean: -19.661538
episode 2662.000000, reward total was -21.000000. running mean: -19.674922
episode 2663.000000, reward total was -21.000000. running mean: -19.688173
episode 2664.000000, reward total was -19.000000. running mean: -19.681291
episode 2665.000000, rewa

episode 2762.000000, reward total was -19.000000. running mean: -19.499623
episode 2763.000000, reward total was -21.000000. running mean: -19.514627
episode 2764.000000, reward total was -19.000000. running mean: -19.509481
episode 2765.000000, reward total was -20.000000. running mean: -19.514386
episode 2766.000000, reward total was -19.000000. running mean: -19.509242
episode 2767.000000, reward total was -19.000000. running mean: -19.504150
episode 2768.000000, reward total was -21.000000. running mean: -19.519108
episode 2769.000000, reward total was -19.000000. running mean: -19.513917
episode 2770.000000, reward total was -21.000000. running mean: -19.528778
episode 2771.000000, reward total was -18.000000. running mean: -19.513490
episode 2772.000000, reward total was -19.000000. running mean: -19.508355
episode 2773.000000, reward total was -17.000000. running mean: -19.483272
episode 2774.000000, reward total was -20.000000. running mean: -19.488439
episode 2775.000000, rewa

episode 2872.000000, reward total was -19.000000. running mean: -19.427744
episode 2873.000000, reward total was -20.000000. running mean: -19.433466
episode 2874.000000, reward total was -21.000000. running mean: -19.449132
episode 2875.000000, reward total was -18.000000. running mean: -19.434640
episode 2876.000000, reward total was -18.000000. running mean: -19.420294
episode 2877.000000, reward total was -19.000000. running mean: -19.416091
episode 2878.000000, reward total was -18.000000. running mean: -19.401930
episode 2879.000000, reward total was -21.000000. running mean: -19.417911
episode 2880.000000, reward total was -21.000000. running mean: -19.433732
episode 2881.000000, reward total was -20.000000. running mean: -19.439394
episode 2882.000000, reward total was -20.000000. running mean: -19.445000
episode 2883.000000, reward total was -19.000000. running mean: -19.440550
episode 2884.000000, reward total was -17.000000. running mean: -19.416145
episode 2885.000000, rewa

episode 2982.000000, reward total was -20.000000. running mean: -19.279108
episode 2983.000000, reward total was -20.000000. running mean: -19.286317
episode 2984.000000, reward total was -20.000000. running mean: -19.293454
episode 2985.000000, reward total was -19.000000. running mean: -19.290520
episode 2986.000000, reward total was -21.000000. running mean: -19.307614
episode 2987.000000, reward total was -18.000000. running mean: -19.294538
episode 2988.000000, reward total was -19.000000. running mean: -19.291593
episode 2989.000000, reward total was -18.000000. running mean: -19.278677
episode 2990.000000, reward total was -19.000000. running mean: -19.275890
episode 2991.000000, reward total was -21.000000. running mean: -19.293131
episode 2992.000000, reward total was -21.000000. running mean: -19.310200
episode 2993.000000, reward total was -18.000000. running mean: -19.297098
episode 2994.000000, reward total was -20.000000. running mean: -19.304127
episode 2995.000000, rewa

episode 3092.000000, reward total was -18.000000. running mean: -19.258127
episode 3093.000000, reward total was -21.000000. running mean: -19.275545
episode 3094.000000, reward total was -18.000000. running mean: -19.262790
episode 3095.000000, reward total was -20.000000. running mean: -19.270162
episode 3096.000000, reward total was -21.000000. running mean: -19.287460
episode 3097.000000, reward total was -20.000000. running mean: -19.294586
episode 3098.000000, reward total was -19.000000. running mean: -19.291640
episode 3099.000000, reward total was -18.000000. running mean: -19.278723
episode 3100.000000, reward total was -20.000000. running mean: -19.285936
episode 3101.000000, reward total was -18.000000. running mean: -19.273077
episode 3102.000000, reward total was -19.000000. running mean: -19.270346
episode 3103.000000, reward total was -19.000000. running mean: -19.267643
episode 3104.000000, reward total was -18.000000. running mean: -19.254966
episode 3105.000000, rewa

episode 3202.000000, reward total was -20.000000. running mean: -19.359074
episode 3203.000000, reward total was -18.000000. running mean: -19.345483
episode 3204.000000, reward total was -20.000000. running mean: -19.352028
episode 3205.000000, reward total was -19.000000. running mean: -19.348508
episode 3206.000000, reward total was -21.000000. running mean: -19.365023
episode 3207.000000, reward total was -21.000000. running mean: -19.381373
episode 3208.000000, reward total was -19.000000. running mean: -19.377559
episode 3209.000000, reward total was -19.000000. running mean: -19.373784
episode 3210.000000, reward total was -21.000000. running mean: -19.390046
episode 3211.000000, reward total was -16.000000. running mean: -19.356145
episode 3212.000000, reward total was -21.000000. running mean: -19.372584
episode 3213.000000, reward total was -20.000000. running mean: -19.378858
episode 3214.000000, reward total was -19.000000. running mean: -19.375069
episode 3215.000000, rewa

episode 3312.000000, reward total was -17.000000. running mean: -19.396875
episode 3313.000000, reward total was -20.000000. running mean: -19.402906
episode 3314.000000, reward total was -19.000000. running mean: -19.398877
episode 3315.000000, reward total was -20.000000. running mean: -19.404889
episode 3316.000000, reward total was -20.000000. running mean: -19.410840
episode 3317.000000, reward total was -17.000000. running mean: -19.386731
episode 3318.000000, reward total was -20.000000. running mean: -19.392864
episode 3319.000000, reward total was -21.000000. running mean: -19.408935
episode 3320.000000, reward total was -15.000000. running mean: -19.364846
episode 3321.000000, reward total was -21.000000. running mean: -19.381197
episode 3322.000000, reward total was -20.000000. running mean: -19.387385
episode 3323.000000, reward total was -17.000000. running mean: -19.363512
episode 3324.000000, reward total was -20.000000. running mean: -19.369877
episode 3325.000000, rewa

episode 3422.000000, reward total was -21.000000. running mean: -19.192851
episode 3423.000000, reward total was -21.000000. running mean: -19.210922
episode 3424.000000, reward total was -19.000000. running mean: -19.208813
episode 3425.000000, reward total was -17.000000. running mean: -19.186725
episode 3426.000000, reward total was -20.000000. running mean: -19.194858
episode 3427.000000, reward total was -21.000000. running mean: -19.212909
episode 3428.000000, reward total was -19.000000. running mean: -19.210780
episode 3429.000000, reward total was -21.000000. running mean: -19.228672
episode 3430.000000, reward total was -20.000000. running mean: -19.236385
episode 3431.000000, reward total was -19.000000. running mean: -19.234021
episode 3432.000000, reward total was -18.000000. running mean: -19.221681
episode 3433.000000, reward total was -21.000000. running mean: -19.239464
episode 3434.000000, reward total was -18.000000. running mean: -19.227070
episode 3435.000000, rewa

episode 3532.000000, reward total was -18.000000. running mean: -19.065463
episode 3533.000000, reward total was -20.000000. running mean: -19.074809
episode 3534.000000, reward total was -17.000000. running mean: -19.054061
episode 3535.000000, reward total was -19.000000. running mean: -19.053520
episode 3536.000000, reward total was -17.000000. running mean: -19.032985
episode 3537.000000, reward total was -19.000000. running mean: -19.032655
episode 3538.000000, reward total was -21.000000. running mean: -19.052328
episode 3539.000000, reward total was -18.000000. running mean: -19.041805
episode 3540.000000, reward total was -20.000000. running mean: -19.051387
episode 3541.000000, reward total was -21.000000. running mean: -19.070873
episode 3542.000000, reward total was -20.000000. running mean: -19.080165
episode 3543.000000, reward total was -17.000000. running mean: -19.059363
episode 3544.000000, reward total was -19.000000. running mean: -19.058769
episode 3545.000000, rewa

episode 3642.000000, reward total was -20.000000. running mean: -18.962425
episode 3643.000000, reward total was -20.000000. running mean: -18.972800
episode 3644.000000, reward total was -19.000000. running mean: -18.973072
episode 3645.000000, reward total was -16.000000. running mean: -18.943342
episode 3646.000000, reward total was -17.000000. running mean: -18.923908
episode 3647.000000, reward total was -21.000000. running mean: -18.944669
episode 3648.000000, reward total was -18.000000. running mean: -18.935223
episode 3649.000000, reward total was -16.000000. running mean: -18.905870
episode 3650.000000, reward total was -20.000000. running mean: -18.916812
episode 3651.000000, reward total was -18.000000. running mean: -18.907644
episode 3652.000000, reward total was -17.000000. running mean: -18.888567
episode 3653.000000, reward total was -19.000000. running mean: -18.889681
episode 3654.000000, reward total was -17.000000. running mean: -18.870785
episode 3655.000000, rewa

episode 3752.000000, reward total was -20.000000. running mean: -18.721528
episode 3753.000000, reward total was -20.000000. running mean: -18.734313
episode 3754.000000, reward total was -16.000000. running mean: -18.706970
episode 3755.000000, reward total was -20.000000. running mean: -18.719900
episode 3756.000000, reward total was -21.000000. running mean: -18.742701
episode 3757.000000, reward total was -19.000000. running mean: -18.745274
episode 3758.000000, reward total was -20.000000. running mean: -18.757821
episode 3759.000000, reward total was -14.000000. running mean: -18.710243
episode 3760.000000, reward total was -21.000000. running mean: -18.733140
episode 3761.000000, reward total was -16.000000. running mean: -18.705809
episode 3762.000000, reward total was -16.000000. running mean: -18.678751
episode 3763.000000, reward total was -21.000000. running mean: -18.701963
episode 3764.000000, reward total was -21.000000. running mean: -18.724944
episode 3765.000000, rewa

episode 3862.000000, reward total was -20.000000. running mean: -18.748856
episode 3863.000000, reward total was -19.000000. running mean: -18.751368
episode 3864.000000, reward total was -17.000000. running mean: -18.733854
episode 3865.000000, reward total was -16.000000. running mean: -18.706516
episode 3866.000000, reward total was -19.000000. running mean: -18.709450
episode 3867.000000, reward total was -18.000000. running mean: -18.702356
episode 3868.000000, reward total was -20.000000. running mean: -18.715332
episode 3869.000000, reward total was -21.000000. running mean: -18.738179
episode 3870.000000, reward total was -17.000000. running mean: -18.720797
episode 3871.000000, reward total was -20.000000. running mean: -18.733589
episode 3872.000000, reward total was -20.000000. running mean: -18.746253
episode 3873.000000, reward total was -19.000000. running mean: -18.748791
episode 3874.000000, reward total was -19.000000. running mean: -18.751303
episode 3875.000000, rewa

episode 3972.000000, reward total was -19.000000. running mean: -18.771736
episode 3973.000000, reward total was -17.000000. running mean: -18.754019
episode 3974.000000, reward total was -21.000000. running mean: -18.776478
episode 3975.000000, reward total was -19.000000. running mean: -18.778714
episode 3976.000000, reward total was -19.000000. running mean: -18.780926
episode 3977.000000, reward total was -20.000000. running mean: -18.793117
episode 3978.000000, reward total was -21.000000. running mean: -18.815186
episode 3979.000000, reward total was -19.000000. running mean: -18.817034
episode 3980.000000, reward total was -18.000000. running mean: -18.808864
episode 3981.000000, reward total was -17.000000. running mean: -18.790775
episode 3982.000000, reward total was -19.000000. running mean: -18.792867
episode 3983.000000, reward total was -20.000000. running mean: -18.804939
episode 3984.000000, reward total was -19.000000. running mean: -18.806889
episode 3985.000000, rewa

episode 4082.000000, reward total was -15.000000. running mean: -18.681154
episode 4083.000000, reward total was -18.000000. running mean: -18.674342
episode 4084.000000, reward total was -19.000000. running mean: -18.677599
episode 4085.000000, reward total was -18.000000. running mean: -18.670823
episode 4086.000000, reward total was -19.000000. running mean: -18.674115
episode 4087.000000, reward total was -14.000000. running mean: -18.627374
episode 4088.000000, reward total was -19.000000. running mean: -18.631100
episode 4089.000000, reward total was -17.000000. running mean: -18.614789
episode 4090.000000, reward total was -18.000000. running mean: -18.608641
episode 4091.000000, reward total was -17.000000. running mean: -18.592555
episode 4092.000000, reward total was -18.000000. running mean: -18.586629
episode 4093.000000, reward total was -18.000000. running mean: -18.580763
episode 4094.000000, reward total was -20.000000. running mean: -18.594955
episode 4095.000000, rewa

episode 4192.000000, reward total was -20.000000. running mean: -18.466657
episode 4193.000000, reward total was -20.000000. running mean: -18.481990
episode 4194.000000, reward total was -21.000000. running mean: -18.507170
episode 4195.000000, reward total was -17.000000. running mean: -18.492098
episode 4196.000000, reward total was -15.000000. running mean: -18.457177
episode 4197.000000, reward total was -19.000000. running mean: -18.462606
episode 4198.000000, reward total was -21.000000. running mean: -18.487980
episode 4199.000000, reward total was -15.000000. running mean: -18.453100
episode 4200.000000, reward total was -17.000000. running mean: -18.438569
episode 4201.000000, reward total was -20.000000. running mean: -18.454183
episode 4202.000000, reward total was -18.000000. running mean: -18.449641
episode 4203.000000, reward total was -21.000000. running mean: -18.475145
episode 4204.000000, reward total was -18.000000. running mean: -18.470393
episode 4205.000000, rewa

episode 4302.000000, reward total was -18.000000. running mean: -18.497969
episode 4303.000000, reward total was -21.000000. running mean: -18.522990
episode 4304.000000, reward total was -20.000000. running mean: -18.537760
episode 4305.000000, reward total was -21.000000. running mean: -18.562382
episode 4306.000000, reward total was -18.000000. running mean: -18.556758
episode 4307.000000, reward total was -17.000000. running mean: -18.541191
episode 4308.000000, reward total was -19.000000. running mean: -18.545779
episode 4309.000000, reward total was -16.000000. running mean: -18.520321
episode 4310.000000, reward total was -21.000000. running mean: -18.545118
episode 4311.000000, reward total was -21.000000. running mean: -18.569667
episode 4312.000000, reward total was -19.000000. running mean: -18.573970
episode 4313.000000, reward total was -19.000000. running mean: -18.578230
episode 4314.000000, reward total was -19.000000. running mean: -18.582448
episode 4315.000000, rewa

episode 4412.000000, reward total was -19.000000. running mean: -18.462350
episode 4413.000000, reward total was -19.000000. running mean: -18.467727
episode 4414.000000, reward total was -15.000000. running mean: -18.433050
episode 4415.000000, reward total was -19.000000. running mean: -18.438719
episode 4416.000000, reward total was -20.000000. running mean: -18.454332
episode 4417.000000, reward total was -21.000000. running mean: -18.479789
episode 4418.000000, reward total was -17.000000. running mean: -18.464991
episode 4419.000000, reward total was -17.000000. running mean: -18.450341
episode 4420.000000, reward total was -17.000000. running mean: -18.435837
episode 4421.000000, reward total was -19.000000. running mean: -18.441479
episode 4422.000000, reward total was -18.000000. running mean: -18.437064
episode 4423.000000, reward total was -19.000000. running mean: -18.442694
episode 4424.000000, reward total was -18.000000. running mean: -18.438267
episode 4425.000000, rewa

episode 4522.000000, reward total was -17.000000. running mean: -18.357052
episode 4523.000000, reward total was -16.000000. running mean: -18.333482
episode 4524.000000, reward total was -19.000000. running mean: -18.340147
episode 4525.000000, reward total was -20.000000. running mean: -18.356745
episode 4526.000000, reward total was -19.000000. running mean: -18.363178
episode 4527.000000, reward total was -21.000000. running mean: -18.389546
episode 4528.000000, reward total was -20.000000. running mean: -18.405651
episode 4529.000000, reward total was -18.000000. running mean: -18.401594
episode 4530.000000, reward total was -19.000000. running mean: -18.407578
episode 4531.000000, reward total was -13.000000. running mean: -18.353502
episode 4532.000000, reward total was -19.000000. running mean: -18.359967
episode 4533.000000, reward total was -16.000000. running mean: -18.336368
episode 4534.000000, reward total was -21.000000. running mean: -18.363004
episode 4535.000000, rewa

episode 4632.000000, reward total was -19.000000. running mean: -18.359408
episode 4633.000000, reward total was -19.000000. running mean: -18.365814
episode 4634.000000, reward total was -21.000000. running mean: -18.392156
episode 4635.000000, reward total was -14.000000. running mean: -18.348234
episode 4636.000000, reward total was -21.000000. running mean: -18.374752
episode 4637.000000, reward total was -19.000000. running mean: -18.381004
episode 4638.000000, reward total was -20.000000. running mean: -18.397194
episode 4639.000000, reward total was -19.000000. running mean: -18.403222
episode 4640.000000, reward total was -17.000000. running mean: -18.389190
episode 4641.000000, reward total was -17.000000. running mean: -18.375298
episode 4642.000000, reward total was -14.000000. running mean: -18.331545
episode 4643.000000, reward total was -21.000000. running mean: -18.358230
episode 4644.000000, reward total was -17.000000. running mean: -18.344647
episode 4645.000000, rewa

episode 4742.000000, reward total was -18.000000. running mean: -18.197719
episode 4743.000000, reward total was -20.000000. running mean: -18.215742
episode 4744.000000, reward total was -21.000000. running mean: -18.243584
episode 4745.000000, reward total was -21.000000. running mean: -18.271149
episode 4746.000000, reward total was -19.000000. running mean: -18.278437
episode 4747.000000, reward total was -20.000000. running mean: -18.295653
episode 4748.000000, reward total was -17.000000. running mean: -18.282696
episode 4749.000000, reward total was -21.000000. running mean: -18.309869
episode 4750.000000, reward total was -15.000000. running mean: -18.276771
episode 4751.000000, reward total was -18.000000. running mean: -18.274003
episode 4752.000000, reward total was -17.000000. running mean: -18.261263
episode 4753.000000, reward total was -13.000000. running mean: -18.208650
episode 4754.000000, reward total was -20.000000. running mean: -18.226564
episode 4755.000000, rewa

episode 4852.000000, reward total was -17.000000. running mean: -18.107713
episode 4853.000000, reward total was -20.000000. running mean: -18.126636
episode 4854.000000, reward total was -18.000000. running mean: -18.125370
episode 4855.000000, reward total was -18.000000. running mean: -18.124116
episode 4856.000000, reward total was -16.000000. running mean: -18.102875
episode 4857.000000, reward total was -19.000000. running mean: -18.111846
episode 4858.000000, reward total was -19.000000. running mean: -18.120728
episode 4859.000000, reward total was -15.000000. running mean: -18.089521
episode 4860.000000, reward total was -18.000000. running mean: -18.088625
episode 4861.000000, reward total was -19.000000. running mean: -18.097739
episode 4862.000000, reward total was -16.000000. running mean: -18.076762
episode 4863.000000, reward total was -17.000000. running mean: -18.065994
episode 4864.000000, reward total was -19.000000. running mean: -18.075334
episode 4865.000000, rewa

episode 4962.000000, reward total was -13.000000. running mean: -17.810003
episode 4963.000000, reward total was -17.000000. running mean: -17.801903
episode 4964.000000, reward total was -17.000000. running mean: -17.793884
episode 4965.000000, reward total was -20.000000. running mean: -17.815945
episode 4966.000000, reward total was -18.000000. running mean: -17.817786
episode 4967.000000, reward total was -19.000000. running mean: -17.829608
episode 4968.000000, reward total was -18.000000. running mean: -17.831312
episode 4969.000000, reward total was -15.000000. running mean: -17.802999
episode 4970.000000, reward total was -21.000000. running mean: -17.834969
episode 4971.000000, reward total was -20.000000. running mean: -17.856619
episode 4972.000000, reward total was -13.000000. running mean: -17.808053
episode 4973.000000, reward total was -19.000000. running mean: -17.819973
episode 4974.000000, reward total was -19.000000. running mean: -17.831773
episode 4975.000000, rewa

episode 5072.000000, reward total was -19.000000. running mean: -17.788598
episode 5073.000000, reward total was -19.000000. running mean: -17.800712
episode 5074.000000, reward total was -15.000000. running mean: -17.772704
episode 5075.000000, reward total was -19.000000. running mean: -17.784977
episode 5076.000000, reward total was -17.000000. running mean: -17.777128
episode 5077.000000, reward total was -21.000000. running mean: -17.809356
episode 5078.000000, reward total was -19.000000. running mean: -17.821263
episode 5079.000000, reward total was -21.000000. running mean: -17.853050
episode 5080.000000, reward total was -13.000000. running mean: -17.804520
episode 5081.000000, reward total was -21.000000. running mean: -17.836474
episode 5082.000000, reward total was -19.000000. running mean: -17.848110
episode 5083.000000, reward total was -16.000000. running mean: -17.829629
episode 5084.000000, reward total was -20.000000. running mean: -17.851332
episode 5085.000000, rewa

episode 5182.000000, reward total was -19.000000. running mean: -17.759014
episode 5183.000000, reward total was -19.000000. running mean: -17.771423
episode 5184.000000, reward total was -21.000000. running mean: -17.803709
episode 5185.000000, reward total was -20.000000. running mean: -17.825672
episode 5186.000000, reward total was -18.000000. running mean: -17.827415
episode 5187.000000, reward total was -16.000000. running mean: -17.809141
episode 5188.000000, reward total was -19.000000. running mean: -17.821050
episode 5189.000000, reward total was -14.000000. running mean: -17.782839
episode 5190.000000, reward total was -16.000000. running mean: -17.765011
episode 5191.000000, reward total was -13.000000. running mean: -17.717361
episode 5192.000000, reward total was -15.000000. running mean: -17.690187
episode 5193.000000, reward total was -19.000000. running mean: -17.703285
episode 5194.000000, reward total was -13.000000. running mean: -17.656252
episode 5195.000000, rewa

episode 5292.000000, reward total was -17.000000. running mean: -17.774870
episode 5293.000000, reward total was -17.000000. running mean: -17.767122
episode 5294.000000, reward total was -19.000000. running mean: -17.779450
episode 5295.000000, reward total was -15.000000. running mean: -17.751656
episode 5296.000000, reward total was -17.000000. running mean: -17.744139
episode 5297.000000, reward total was -21.000000. running mean: -17.776698
episode 5298.000000, reward total was -17.000000. running mean: -17.768931
episode 5299.000000, reward total was -18.000000. running mean: -17.771242
episode 5300.000000, reward total was -16.000000. running mean: -17.753529
episode 5301.000000, reward total was -21.000000. running mean: -17.785994
episode 5302.000000, reward total was -18.000000. running mean: -17.788134
episode 5303.000000, reward total was -15.000000. running mean: -17.760253
episode 5304.000000, reward total was -18.000000. running mean: -17.762650
episode 5305.000000, rewa

episode 5402.000000, reward total was -18.000000. running mean: -17.481370
episode 5403.000000, reward total was -17.000000. running mean: -17.476556
episode 5404.000000, reward total was -17.000000. running mean: -17.471791
episode 5405.000000, reward total was -17.000000. running mean: -17.467073
episode 5406.000000, reward total was -19.000000. running mean: -17.482402
episode 5407.000000, reward total was -19.000000. running mean: -17.497578
episode 5408.000000, reward total was -14.000000. running mean: -17.462602
episode 5409.000000, reward total was -14.000000. running mean: -17.427976
episode 5410.000000, reward total was -16.000000. running mean: -17.413696
episode 5411.000000, reward total was -15.000000. running mean: -17.389559
episode 5412.000000, reward total was -17.000000. running mean: -17.385664
episode 5413.000000, reward total was -20.000000. running mean: -17.411807
episode 5414.000000, reward total was -13.000000. running mean: -17.367689
episode 5415.000000, rewa

episode 5512.000000, reward total was -17.000000. running mean: -17.125363
episode 5513.000000, reward total was -15.000000. running mean: -17.104110
episode 5514.000000, reward total was -16.000000. running mean: -17.093069
episode 5515.000000, reward total was -19.000000. running mean: -17.112138
episode 5516.000000, reward total was -17.000000. running mean: -17.111017
episode 5517.000000, reward total was -13.000000. running mean: -17.069906
episode 5518.000000, reward total was -17.000000. running mean: -17.069207
episode 5519.000000, reward total was -15.000000. running mean: -17.048515
episode 5520.000000, reward total was -16.000000. running mean: -17.038030
episode 5521.000000, reward total was -18.000000. running mean: -17.047650
episode 5522.000000, reward total was -16.000000. running mean: -17.037173
episode 5523.000000, reward total was -18.000000. running mean: -17.046802
episode 5524.000000, reward total was -18.000000. running mean: -17.056334
episode 5525.000000, rewa

episode 5622.000000, reward total was -18.000000. running mean: -17.052699
episode 5623.000000, reward total was -19.000000. running mean: -17.072172
episode 5624.000000, reward total was -12.000000. running mean: -17.021450
episode 5625.000000, reward total was -15.000000. running mean: -17.001236
episode 5626.000000, reward total was -11.000000. running mean: -16.941223
episode 5627.000000, reward total was -19.000000. running mean: -16.961811
episode 5628.000000, reward total was -18.000000. running mean: -16.972193
episode 5629.000000, reward total was -17.000000. running mean: -16.972471
episode 5630.000000, reward total was -21.000000. running mean: -17.012746
episode 5631.000000, reward total was -15.000000. running mean: -16.992619
episode 5632.000000, reward total was -18.000000. running mean: -17.002693
episode 5633.000000, reward total was -19.000000. running mean: -17.022666
episode 5634.000000, reward total was -17.000000. running mean: -17.022439
episode 5635.000000, rewa

episode 5732.000000, reward total was -15.000000. running mean: -16.763928
episode 5733.000000, reward total was -17.000000. running mean: -16.766289
episode 5734.000000, reward total was -17.000000. running mean: -16.768626
episode 5735.000000, reward total was -14.000000. running mean: -16.740940
episode 5736.000000, reward total was -18.000000. running mean: -16.753531
episode 5737.000000, reward total was -15.000000. running mean: -16.735995
episode 5738.000000, reward total was -18.000000. running mean: -16.748635
episode 5739.000000, reward total was -19.000000. running mean: -16.771149
episode 5740.000000, reward total was -16.000000. running mean: -16.763437
episode 5741.000000, reward total was -20.000000. running mean: -16.795803
episode 5742.000000, reward total was -17.000000. running mean: -16.797845
episode 5743.000000, reward total was -19.000000. running mean: -16.819867
episode 5744.000000, reward total was -9.000000. running mean: -16.741668
episode 5745.000000, rewar

episode 5842.000000, reward total was -18.000000. running mean: -16.757936
episode 5843.000000, reward total was -17.000000. running mean: -16.760356
episode 5844.000000, reward total was -16.000000. running mean: -16.752753
episode 5845.000000, reward total was -14.000000. running mean: -16.725225
episode 5846.000000, reward total was -16.000000. running mean: -16.717973
episode 5847.000000, reward total was -17.000000. running mean: -16.720793
episode 5848.000000, reward total was -12.000000. running mean: -16.673585
episode 5849.000000, reward total was -7.000000. running mean: -16.576849
episode 5850.000000, reward total was -18.000000. running mean: -16.591081
episode 5851.000000, reward total was -15.000000. running mean: -16.575170
episode 5852.000000, reward total was -19.000000. running mean: -16.599418
episode 5853.000000, reward total was -17.000000. running mean: -16.603424
episode 5854.000000, reward total was -20.000000. running mean: -16.637390
episode 5855.000000, rewar

episode 5952.000000, reward total was -17.000000. running mean: -16.312394
episode 5953.000000, reward total was -17.000000. running mean: -16.319270
episode 5954.000000, reward total was -15.000000. running mean: -16.306077
episode 5955.000000, reward total was -13.000000. running mean: -16.273016
episode 5956.000000, reward total was -18.000000. running mean: -16.290286
episode 5957.000000, reward total was -17.000000. running mean: -16.297383
episode 5958.000000, reward total was -18.000000. running mean: -16.314410
episode 5959.000000, reward total was -17.000000. running mean: -16.321265
episode 5960.000000, reward total was -14.000000. running mean: -16.298053
episode 5961.000000, reward total was -18.000000. running mean: -16.315072
episode 5962.000000, reward total was -16.000000. running mean: -16.311922
episode 5963.000000, reward total was -19.000000. running mean: -16.338802
episode 5964.000000, reward total was -17.000000. running mean: -16.345414
episode 5965.000000, rewa

episode 6062.000000, reward total was -16.000000. running mean: -16.307487
episode 6063.000000, reward total was -15.000000. running mean: -16.294412
episode 6064.000000, reward total was -14.000000. running mean: -16.271468
episode 6065.000000, reward total was -14.000000. running mean: -16.248753
episode 6066.000000, reward total was -16.000000. running mean: -16.246266
episode 6067.000000, reward total was -14.000000. running mean: -16.223803
episode 6068.000000, reward total was -14.000000. running mean: -16.201565
episode 6069.000000, reward total was -13.000000. running mean: -16.169549
episode 6070.000000, reward total was -20.000000. running mean: -16.207854
episode 6071.000000, reward total was -19.000000. running mean: -16.235775
episode 6072.000000, reward total was -17.000000. running mean: -16.243418
episode 6073.000000, reward total was -18.000000. running mean: -16.260983
episode 6074.000000, reward total was -17.000000. running mean: -16.268373
episode 6075.000000, rewa

episode 6172.000000, reward total was -16.000000. running mean: -15.975057
episode 6173.000000, reward total was -13.000000. running mean: -15.945307
episode 6174.000000, reward total was -15.000000. running mean: -15.935854
episode 6175.000000, reward total was -14.000000. running mean: -15.916495
episode 6176.000000, reward total was -17.000000. running mean: -15.927330
episode 6177.000000, reward total was -18.000000. running mean: -15.948057
episode 6178.000000, reward total was -13.000000. running mean: -15.918576
episode 6179.000000, reward total was -12.000000. running mean: -15.879391
episode 6180.000000, reward total was -16.000000. running mean: -15.880597
episode 6181.000000, reward total was -14.000000. running mean: -15.861791
episode 6182.000000, reward total was -17.000000. running mean: -15.873173
episode 6183.000000, reward total was -15.000000. running mean: -15.864441
episode 6184.000000, reward total was -19.000000. running mean: -15.895797
episode 6185.000000, rewa

episode 6282.000000, reward total was -15.000000. running mean: -15.991599
episode 6283.000000, reward total was -19.000000. running mean: -16.021683
episode 6284.000000, reward total was -16.000000. running mean: -16.021466
episode 6285.000000, reward total was -15.000000. running mean: -16.011252
episode 6286.000000, reward total was -13.000000. running mean: -15.981139
episode 6287.000000, reward total was -16.000000. running mean: -15.981328
episode 6288.000000, reward total was -18.000000. running mean: -16.001514
episode 6289.000000, reward total was -16.000000. running mean: -16.001499
episode 6290.000000, reward total was -11.000000. running mean: -15.951484
episode 6291.000000, reward total was -15.000000. running mean: -15.941969
episode 6292.000000, reward total was -18.000000. running mean: -15.962550
episode 6293.000000, reward total was -20.000000. running mean: -16.002924
episode 6294.000000, reward total was -20.000000. running mean: -16.042895
episode 6295.000000, rewa

episode 6392.000000, reward total was -15.000000. running mean: -15.789017
episode 6393.000000, reward total was -14.000000. running mean: -15.771127
episode 6394.000000, reward total was -13.000000. running mean: -15.743415
episode 6395.000000, reward total was -19.000000. running mean: -15.775981
episode 6396.000000, reward total was -18.000000. running mean: -15.798222
episode 6397.000000, reward total was -19.000000. running mean: -15.830239
episode 6398.000000, reward total was -12.000000. running mean: -15.791937
episode 6399.000000, reward total was -19.000000. running mean: -15.824018
episode 6400.000000, reward total was -13.000000. running mean: -15.795777
episode 6401.000000, reward total was -11.000000. running mean: -15.747820
episode 6402.000000, reward total was -19.000000. running mean: -15.780341
episode 6403.000000, reward total was -17.000000. running mean: -15.792538
episode 6404.000000, reward total was -12.000000. running mean: -15.754613
episode 6405.000000, rewa

episode 6502.000000, reward total was -18.000000. running mean: -15.856722
episode 6503.000000, reward total was -15.000000. running mean: -15.848155
episode 6504.000000, reward total was -17.000000. running mean: -15.859674
episode 6505.000000, reward total was -17.000000. running mean: -15.871077
episode 6506.000000, reward total was -13.000000. running mean: -15.842366
episode 6507.000000, reward total was -17.000000. running mean: -15.853942
episode 6508.000000, reward total was -18.000000. running mean: -15.875403
episode 6509.000000, reward total was -14.000000. running mean: -15.856649
episode 6510.000000, reward total was -17.000000. running mean: -15.868083
episode 6511.000000, reward total was -13.000000. running mean: -15.839402
episode 6512.000000, reward total was -11.000000. running mean: -15.791008
episode 6513.000000, reward total was -17.000000. running mean: -15.803098
episode 6514.000000, reward total was -17.000000. running mean: -15.815067
episode 6515.000000, rewa

episode 6612.000000, reward total was -12.000000. running mean: -15.457851
episode 6613.000000, reward total was -13.000000. running mean: -15.433273
episode 6614.000000, reward total was -9.000000. running mean: -15.368940
episode 6615.000000, reward total was -20.000000. running mean: -15.415251
episode 6616.000000, reward total was -17.000000. running mean: -15.431098
episode 6617.000000, reward total was -15.000000. running mean: -15.426787
episode 6618.000000, reward total was -17.000000. running mean: -15.442519
episode 6619.000000, reward total was -18.000000. running mean: -15.468094
episode 6620.000000, reward total was -18.000000. running mean: -15.493413
episode 6621.000000, reward total was -10.000000. running mean: -15.438479
episode 6622.000000, reward total was -16.000000. running mean: -15.444094
episode 6623.000000, reward total was -20.000000. running mean: -15.489653
episode 6624.000000, reward total was -13.000000. running mean: -15.464757
episode 6625.000000, rewar

episode 6722.000000, reward total was -12.000000. running mean: -15.400570
episode 6723.000000, reward total was -17.000000. running mean: -15.416564
episode 6724.000000, reward total was -17.000000. running mean: -15.432398
episode 6725.000000, reward total was -15.000000. running mean: -15.428074
episode 6726.000000, reward total was -19.000000. running mean: -15.463794
episode 6727.000000, reward total was -13.000000. running mean: -15.439156
episode 6728.000000, reward total was -11.000000. running mean: -15.394764
episode 6729.000000, reward total was -14.000000. running mean: -15.380816
episode 6730.000000, reward total was -10.000000. running mean: -15.327008
episode 6731.000000, reward total was -16.000000. running mean: -15.333738
episode 6732.000000, reward total was -16.000000. running mean: -15.340401
episode 6733.000000, reward total was -14.000000. running mean: -15.326997
episode 6734.000000, reward total was -14.000000. running mean: -15.313727
episode 6735.000000, rewa

episode 6832.000000, reward total was -17.000000. running mean: -15.245505
episode 6833.000000, reward total was -16.000000. running mean: -15.253050
episode 6834.000000, reward total was -12.000000. running mean: -15.220520
episode 6835.000000, reward total was -17.000000. running mean: -15.238315
episode 6836.000000, reward total was -16.000000. running mean: -15.245932
episode 6837.000000, reward total was -14.000000. running mean: -15.233472
episode 6838.000000, reward total was -14.000000. running mean: -15.221138
episode 6839.000000, reward total was -11.000000. running mean: -15.178926
episode 6840.000000, reward total was -14.000000. running mean: -15.167137
episode 6841.000000, reward total was -15.000000. running mean: -15.165466
episode 6842.000000, reward total was -16.000000. running mean: -15.173811
episode 6843.000000, reward total was -19.000000. running mean: -15.212073
episode 6844.000000, reward total was -13.000000. running mean: -15.189952
episode 6845.000000, rewa

episode 6942.000000, reward total was -13.000000. running mean: -14.775854
episode 6943.000000, reward total was -11.000000. running mean: -14.738095
episode 6944.000000, reward total was -10.000000. running mean: -14.690714
episode 6945.000000, reward total was -14.000000. running mean: -14.683807
episode 6946.000000, reward total was -16.000000. running mean: -14.696969
episode 6947.000000, reward total was -19.000000. running mean: -14.739999
episode 6948.000000, reward total was -9.000000. running mean: -14.682599
episode 6949.000000, reward total was -17.000000. running mean: -14.705773
episode 6950.000000, reward total was -19.000000. running mean: -14.748716
episode 6951.000000, reward total was -21.000000. running mean: -14.811229
episode 6952.000000, reward total was -17.000000. running mean: -14.833116
episode 6953.000000, reward total was -18.000000. running mean: -14.864785
episode 6954.000000, reward total was -15.000000. running mean: -14.866137
episode 6955.000000, rewar