In [1]:
import gym
import numpy as np

from gym.wrappers import AtariPreprocessing
gym.new_step_api=True
env = gym.make('Pong-v0')

H = 200 # number of hidden layer neurons
D = 80 * 80 # input dimensionality: 80x80 grid
model = {}
model['W1'] = np.random.randn(H,D) / np.sqrt(D) # "Xavier" initialization
model['W2'] = np.random.randn(H) / np.sqrt(H)
# hyperparameters
batch_size = 10 # every how many episodes to do a param update?
learning_rate = 1e-2
gamma = 0.99 # discount factor for reward
decay_rate = 0.99 # decay factor for RMSProp leaky sum of grad^2
grad_buffer = { k : np.zeros_like(v) for k,v in model.items() } # update buffers that add up gradients over a batch
rmsprop_cache = { k : np.zeros_like(v) for k,v in model.items() } # rmsprop memory

def sigmoid(x): 
  return 1.0 / (1.0 + np.exp(-x)) # sigmoid "squashing" function to interval [0,1]

def prepro(I):
  I=np.asarray(I)
  I = I[35:195] # crop
  I = I[::2,::2,0] # downsample by factor of 2
  
  I[I == 144] = 0 # erase background (background type 1)
  
  I[I == 109] = 0 # erase background (background type 2)
  
  I[I != 0] = 1 # everything else (paddles, ball) just set to 1
  return I.astype(float).ravel()

def discount_rewards(r):
  discounted_r = np.zeros_like(r)
  running_add = 0
  for t in reversed(range(0, r.size)):
    if r[t] != 0: running_add = 0 # reset the sum, since this was a game boundary (pong specific!)
    running_add = running_add * gamma + r[t]
    discounted_r[t] = running_add
  return discounted_r

def policy_forward(x):
  h = np.dot(model['W1'], x)
  h[h<0] = 0 # ReLU nonlinearity
  logp = np.dot(model['W2'], h)
  p = sigmoid(logp)
  return p, h # return probability of taking action 2, and hidden state

def policy_backward(epx, eph, epdlogp):
  """ backward pass. (eph is array of intermediate hidden states) """
  dW2 = np.dot(eph.T, epdlogp).ravel()
  dh = np.outer(epdlogp, model['W2'])
  dh[eph <= 0] = 0 # backpro prelu
  dW1 = np.dot(dh.T, epx)
  return {'W1':dW1, 'W2':dW2}

def model_step(model, observation, prev_x):

  cur_x = prepro(observation)
  x = cur_x - prev_x if prev_x is not None else np.zeros(D)
  prev_x = cur_x
  
  # forward the policy network and sample an action from the returned probability
  aprob, _ = policy_forward(x)
  action = 2 if aprob >= 0.5 else 3 # roll the dice!
  
  return action, prev_x

def play_game(env, model):
  observation = env.reset()

  frames = []
  cumulated_reward = 0

  prev_x = None # used in computing the difference frame

  for t in range(1000):
      frames.append(env.render(mode = 'rgb_array'))
      action, prev_x = model_step(model, observation, prev_x)
      observation, reward, done, info = env.step(action)
      cumulated_reward += reward
      if done:
          print("Episode finished after {} timesteps, accumulated reward = {}".format(t+1, cumulated_reward))
          break
  print("Episode finished without success, accumulated reward = {}".format(cumulated_reward))
  env.close()
  display_frames_as_gif(frames)

def train_model(env, model, total_episodes = 100):
  hist = []
  observation = env.reset()

  prev_x = None # used in computing the difference frame
  xs,hs,dlogps,drs = [],[],[],[]
  running_reward = None
  reward_sum = 0
  episode_number = 0

  while True:
  
    cur_x = prepro(observation)
    x = cur_x - prev_x if prev_x is not None else np.zeros(D)
    prev_x = cur_x

    # forward the policy network and sample an action from the returned probability
    aprob, h = policy_forward(x)
    action = 2 if np.random.uniform() < aprob else 3 # roll the dice!

    # record various intermediates (needed later for backprop)
    xs.append(x) # observation
    hs.append(h) # hidden state
    y = 1 if action == 2 else 0 # a "fake label"
    dlogps.append(y - aprob) # grad that encourages the action that was taken to be taken (see http://cs231n.github.io/neural-networks-2/#losses if confused)

    # step the environment and get new measurements
    observation, reward, done, info = env.step(action)
    reward_sum += reward

    drs.append(reward) # record reward (has to be done after we call step() to get reward for previous action)

    if done: # an episode finished
      episode_number += 1

      # stack together all inputs, hidden states, action gradients, and rewards for this episode
      epx = np.vstack(xs)
      eph = np.vstack(hs)
      epdlogp = np.vstack(dlogps)
      epr = np.vstack(drs)
      xs,hs,dlogps,drs = [],[],[],[] # reset array memory

      # compute the discounted reward backwards through time
      discounted_epr = discount_rewards(epr)
      # standardize the rewards to be unit normal (helps control the gradient estimator variance)
      discounted_epr -= np.mean(discounted_epr)
      discounted_epr /= np.std(discounted_epr)

      epdlogp *= discounted_epr # modulate the gradient with advantage (PG magic happens right here.)
      grad = policy_backward(epx, eph, epdlogp)
      for k in model: grad_buffer[k] += grad[k] # accumulate grad over batch

      # perform rmsprop parameter update every batch_size episodes
      if episode_number % batch_size == 0:
        for k,v in model.items():
          g = grad_buffer[k] # gradient
          rmsprop_cache[k] = decay_rate * rmsprop_cache[k] + (1 - decay_rate) * g**2
          model[k] += learning_rate * g / (np.sqrt(rmsprop_cache[k]) + 1e-5)
          grad_buffer[k] = np.zeros_like(v) # reset batch gradient buffer

      # boring book-keeping
      running_reward = reward_sum if running_reward is None else running_reward * 0.99 + reward_sum * 0.01
      hist.append((episode_number, reward_sum, running_reward))
      print ('episode %f, reward total was %f. running mean: %f' % (episode_number, reward_sum, running_reward))
      reward_sum = 0
      observation = env.reset() # reset env
      prev_x = None
      if episode_number == total_episodes: 
        return hist

   
    

  logger.warn(
  deprecation(
  deprecation(


In [2]:
%time hist1 = train_model(env, model, total_episodes=7000)

  logger.deprecation(


episode 1.000000, reward total was -21.000000. running mean: -21.000000
episode 2.000000, reward total was -20.000000. running mean: -20.990000
episode 3.000000, reward total was -21.000000. running mean: -20.990100
episode 4.000000, reward total was -21.000000. running mean: -20.990199
episode 5.000000, reward total was -21.000000. running mean: -20.990297
episode 6.000000, reward total was -18.000000. running mean: -20.960394
episode 7.000000, reward total was -21.000000. running mean: -20.960790
episode 8.000000, reward total was -21.000000. running mean: -20.961182
episode 9.000000, reward total was -20.000000. running mean: -20.951570
episode 10.000000, reward total was -20.000000. running mean: -20.942055
episode 11.000000, reward total was -21.000000. running mean: -20.942634
episode 12.000000, reward total was -21.000000. running mean: -20.943208
episode 13.000000, reward total was -21.000000. running mean: -20.943776
episode 14.000000, reward total was -21.000000. running mean

episode 114.000000, reward total was -20.000000. running mean: -20.756182
episode 115.000000, reward total was -21.000000. running mean: -20.758620
episode 116.000000, reward total was -21.000000. running mean: -20.761034
episode 117.000000, reward total was -21.000000. running mean: -20.763423
episode 118.000000, reward total was -20.000000. running mean: -20.755789
episode 119.000000, reward total was -19.000000. running mean: -20.738231
episode 120.000000, reward total was -21.000000. running mean: -20.740849
episode 121.000000, reward total was -21.000000. running mean: -20.743440
episode 122.000000, reward total was -21.000000. running mean: -20.746006
episode 123.000000, reward total was -21.000000. running mean: -20.748546
episode 124.000000, reward total was -21.000000. running mean: -20.751060
episode 125.000000, reward total was -21.000000. running mean: -20.753550
episode 126.000000, reward total was -21.000000. running mean: -20.756014
episode 127.000000, reward total was -

episode 225.000000, reward total was -21.000000. running mean: -20.716464
episode 226.000000, reward total was -21.000000. running mean: -20.719300
episode 227.000000, reward total was -21.000000. running mean: -20.722107
episode 228.000000, reward total was -21.000000. running mean: -20.724886
episode 229.000000, reward total was -21.000000. running mean: -20.727637
episode 230.000000, reward total was -21.000000. running mean: -20.730361
episode 231.000000, reward total was -21.000000. running mean: -20.733057
episode 232.000000, reward total was -21.000000. running mean: -20.735726
episode 233.000000, reward total was -21.000000. running mean: -20.738369
episode 234.000000, reward total was -21.000000. running mean: -20.740985
episode 235.000000, reward total was -21.000000. running mean: -20.743576
episode 236.000000, reward total was -21.000000. running mean: -20.746140
episode 237.000000, reward total was -21.000000. running mean: -20.748678
episode 238.000000, reward total was -

episode 336.000000, reward total was -21.000000. running mean: -20.339726
episode 337.000000, reward total was -21.000000. running mean: -20.346329
episode 338.000000, reward total was -21.000000. running mean: -20.352865
episode 339.000000, reward total was -21.000000. running mean: -20.359337
episode 340.000000, reward total was -21.000000. running mean: -20.365743
episode 341.000000, reward total was -21.000000. running mean: -20.372086
episode 342.000000, reward total was -21.000000. running mean: -20.378365
episode 343.000000, reward total was -20.000000. running mean: -20.374582
episode 344.000000, reward total was -20.000000. running mean: -20.370836
episode 345.000000, reward total was -21.000000. running mean: -20.377127
episode 346.000000, reward total was -21.000000. running mean: -20.383356
episode 347.000000, reward total was -21.000000. running mean: -20.389523
episode 348.000000, reward total was -20.000000. running mean: -20.385627
episode 349.000000, reward total was -

episode 447.000000, reward total was -20.000000. running mean: -20.169565
episode 448.000000, reward total was -20.000000. running mean: -20.167869
episode 449.000000, reward total was -20.000000. running mean: -20.166190
episode 450.000000, reward total was -20.000000. running mean: -20.164529
episode 451.000000, reward total was -21.000000. running mean: -20.172883
episode 452.000000, reward total was -16.000000. running mean: -20.131154
episode 453.000000, reward total was -18.000000. running mean: -20.109843
episode 454.000000, reward total was -19.000000. running mean: -20.098744
episode 455.000000, reward total was -20.000000. running mean: -20.097757
episode 456.000000, reward total was -20.000000. running mean: -20.096779
episode 457.000000, reward total was -20.000000. running mean: -20.095812
episode 458.000000, reward total was -21.000000. running mean: -20.104854
episode 459.000000, reward total was -20.000000. running mean: -20.103805
episode 460.000000, reward total was -

episode 558.000000, reward total was -19.000000. running mean: -20.116254
episode 559.000000, reward total was -17.000000. running mean: -20.085092
episode 560.000000, reward total was -21.000000. running mean: -20.094241
episode 561.000000, reward total was -21.000000. running mean: -20.103299
episode 562.000000, reward total was -19.000000. running mean: -20.092266
episode 563.000000, reward total was -19.000000. running mean: -20.081343
episode 564.000000, reward total was -21.000000. running mean: -20.090529
episode 565.000000, reward total was -18.000000. running mean: -20.069624
episode 566.000000, reward total was -19.000000. running mean: -20.058928
episode 567.000000, reward total was -20.000000. running mean: -20.058339
episode 568.000000, reward total was -21.000000. running mean: -20.067755
episode 569.000000, reward total was -18.000000. running mean: -20.047078
episode 570.000000, reward total was -21.000000. running mean: -20.056607
episode 571.000000, reward total was -

episode 669.000000, reward total was -19.000000. running mean: -19.662513
episode 670.000000, reward total was -18.000000. running mean: -19.645888
episode 671.000000, reward total was -20.000000. running mean: -19.649429
episode 672.000000, reward total was -19.000000. running mean: -19.642935
episode 673.000000, reward total was -19.000000. running mean: -19.636505
episode 674.000000, reward total was -17.000000. running mean: -19.610140
episode 675.000000, reward total was -17.000000. running mean: -19.584039
episode 676.000000, reward total was -18.000000. running mean: -19.568198
episode 677.000000, reward total was -21.000000. running mean: -19.582517
episode 678.000000, reward total was -19.000000. running mean: -19.576691
episode 679.000000, reward total was -21.000000. running mean: -19.590924
episode 680.000000, reward total was -18.000000. running mean: -19.575015
episode 681.000000, reward total was -21.000000. running mean: -19.589265
episode 682.000000, reward total was -

episode 780.000000, reward total was -16.000000. running mean: -19.310874
episode 781.000000, reward total was -21.000000. running mean: -19.327765
episode 782.000000, reward total was -21.000000. running mean: -19.344487
episode 783.000000, reward total was -19.000000. running mean: -19.341043
episode 784.000000, reward total was -21.000000. running mean: -19.357632
episode 785.000000, reward total was -21.000000. running mean: -19.374056
episode 786.000000, reward total was -17.000000. running mean: -19.350315
episode 787.000000, reward total was -19.000000. running mean: -19.346812
episode 788.000000, reward total was -21.000000. running mean: -19.363344
episode 789.000000, reward total was -19.000000. running mean: -19.359711
episode 790.000000, reward total was -19.000000. running mean: -19.356113
episode 791.000000, reward total was -19.000000. running mean: -19.352552
episode 792.000000, reward total was -19.000000. running mean: -19.349027
episode 793.000000, reward total was -

episode 891.000000, reward total was -19.000000. running mean: -19.253390
episode 892.000000, reward total was -20.000000. running mean: -19.260856
episode 893.000000, reward total was -16.000000. running mean: -19.228248
episode 894.000000, reward total was -21.000000. running mean: -19.245965
episode 895.000000, reward total was -21.000000. running mean: -19.263506
episode 896.000000, reward total was -19.000000. running mean: -19.260871
episode 897.000000, reward total was -19.000000. running mean: -19.258262
episode 898.000000, reward total was -19.000000. running mean: -19.255679
episode 899.000000, reward total was -21.000000. running mean: -19.273122
episode 900.000000, reward total was -17.000000. running mean: -19.250391
episode 901.000000, reward total was -19.000000. running mean: -19.247887
episode 902.000000, reward total was -17.000000. running mean: -19.225408
episode 903.000000, reward total was -17.000000. running mean: -19.203154
episode 904.000000, reward total was -

episode 1002.000000, reward total was -20.000000. running mean: -18.948065
episode 1003.000000, reward total was -21.000000. running mean: -18.968584
episode 1004.000000, reward total was -18.000000. running mean: -18.958898
episode 1005.000000, reward total was -17.000000. running mean: -18.939309
episode 1006.000000, reward total was -20.000000. running mean: -18.949916
episode 1007.000000, reward total was -17.000000. running mean: -18.930417
episode 1008.000000, reward total was -17.000000. running mean: -18.911113
episode 1009.000000, reward total was -20.000000. running mean: -18.922002
episode 1010.000000, reward total was -21.000000. running mean: -18.942782
episode 1011.000000, reward total was -17.000000. running mean: -18.923354
episode 1012.000000, reward total was -19.000000. running mean: -18.924120
episode 1013.000000, reward total was -17.000000. running mean: -18.904879
episode 1014.000000, reward total was -18.000000. running mean: -18.895830
episode 1015.000000, rewa

episode 1112.000000, reward total was -17.000000. running mean: -18.511452
episode 1113.000000, reward total was -21.000000. running mean: -18.536338
episode 1114.000000, reward total was -17.000000. running mean: -18.520974
episode 1115.000000, reward total was -19.000000. running mean: -18.525764
episode 1116.000000, reward total was -19.000000. running mean: -18.530507
episode 1117.000000, reward total was -21.000000. running mean: -18.555202
episode 1118.000000, reward total was -21.000000. running mean: -18.579650
episode 1119.000000, reward total was -20.000000. running mean: -18.593853
episode 1120.000000, reward total was -17.000000. running mean: -18.577915
episode 1121.000000, reward total was -19.000000. running mean: -18.582136
episode 1122.000000, reward total was -20.000000. running mean: -18.596314
episode 1123.000000, reward total was -17.000000. running mean: -18.580351
episode 1124.000000, reward total was -20.000000. running mean: -18.594548
episode 1125.000000, rewa

episode 1222.000000, reward total was -17.000000. running mean: -18.463789
episode 1223.000000, reward total was -19.000000. running mean: -18.469151
episode 1224.000000, reward total was -19.000000. running mean: -18.474460
episode 1225.000000, reward total was -17.000000. running mean: -18.459715
episode 1226.000000, reward total was -21.000000. running mean: -18.485118
episode 1227.000000, reward total was -19.000000. running mean: -18.490267
episode 1228.000000, reward total was -21.000000. running mean: -18.515364
episode 1229.000000, reward total was -19.000000. running mean: -18.520210
episode 1230.000000, reward total was -19.000000. running mean: -18.525008
episode 1231.000000, reward total was -18.000000. running mean: -18.519758
episode 1232.000000, reward total was -20.000000. running mean: -18.534561
episode 1233.000000, reward total was -19.000000. running mean: -18.539215
episode 1234.000000, reward total was -20.000000. running mean: -18.553823
episode 1235.000000, rewa

episode 1332.000000, reward total was -13.000000. running mean: -18.585502
episode 1333.000000, reward total was -19.000000. running mean: -18.589647
episode 1334.000000, reward total was -19.000000. running mean: -18.593751
episode 1335.000000, reward total was -19.000000. running mean: -18.597813
episode 1336.000000, reward total was -19.000000. running mean: -18.601835
episode 1337.000000, reward total was -20.000000. running mean: -18.615817
episode 1338.000000, reward total was -17.000000. running mean: -18.599659
episode 1339.000000, reward total was -17.000000. running mean: -18.583662
episode 1340.000000, reward total was -19.000000. running mean: -18.587826
episode 1341.000000, reward total was -18.000000. running mean: -18.581947
episode 1342.000000, reward total was -18.000000. running mean: -18.576128
episode 1343.000000, reward total was -19.000000. running mean: -18.580367
episode 1344.000000, reward total was -18.000000. running mean: -18.574563
episode 1345.000000, rewa

episode 1442.000000, reward total was -19.000000. running mean: -18.407072
episode 1443.000000, reward total was -19.000000. running mean: -18.413001
episode 1444.000000, reward total was -17.000000. running mean: -18.398871
episode 1445.000000, reward total was -21.000000. running mean: -18.424883
episode 1446.000000, reward total was -19.000000. running mean: -18.430634
episode 1447.000000, reward total was -19.000000. running mean: -18.436327
episode 1448.000000, reward total was -21.000000. running mean: -18.461964
episode 1449.000000, reward total was -16.000000. running mean: -18.437345
episode 1450.000000, reward total was -17.000000. running mean: -18.422971
episode 1451.000000, reward total was -19.000000. running mean: -18.428741
episode 1452.000000, reward total was -19.000000. running mean: -18.434454
episode 1453.000000, reward total was -19.000000. running mean: -18.440109
episode 1454.000000, reward total was -21.000000. running mean: -18.465708
episode 1455.000000, rewa

episode 1552.000000, reward total was -18.000000. running mean: -18.144588
episode 1553.000000, reward total was -17.000000. running mean: -18.133142
episode 1554.000000, reward total was -19.000000. running mean: -18.141811
episode 1555.000000, reward total was -15.000000. running mean: -18.110393
episode 1556.000000, reward total was -21.000000. running mean: -18.139289
episode 1557.000000, reward total was -18.000000. running mean: -18.137896
episode 1558.000000, reward total was -11.000000. running mean: -18.066517
episode 1559.000000, reward total was -16.000000. running mean: -18.045852
episode 1560.000000, reward total was -19.000000. running mean: -18.055393
episode 1561.000000, reward total was -19.000000. running mean: -18.064839
episode 1562.000000, reward total was -19.000000. running mean: -18.074191
episode 1563.000000, reward total was -18.000000. running mean: -18.073449
episode 1564.000000, reward total was -18.000000. running mean: -18.072715
episode 1565.000000, rewa

episode 1662.000000, reward total was -19.000000. running mean: -18.019265
episode 1663.000000, reward total was -19.000000. running mean: -18.029072
episode 1664.000000, reward total was -19.000000. running mean: -18.038782
episode 1665.000000, reward total was -15.000000. running mean: -18.008394
episode 1666.000000, reward total was -17.000000. running mean: -17.998310
episode 1667.000000, reward total was -19.000000. running mean: -18.008327
episode 1668.000000, reward total was -19.000000. running mean: -18.018243
episode 1669.000000, reward total was -19.000000. running mean: -18.028061
episode 1670.000000, reward total was -19.000000. running mean: -18.037780
episode 1671.000000, reward total was -16.000000. running mean: -18.017403
episode 1672.000000, reward total was -17.000000. running mean: -18.007229
episode 1673.000000, reward total was -17.000000. running mean: -17.997156
episode 1674.000000, reward total was -17.000000. running mean: -17.987185
episode 1675.000000, rewa

episode 1772.000000, reward total was -19.000000. running mean: -18.342527
episode 1773.000000, reward total was -16.000000. running mean: -18.319102
episode 1774.000000, reward total was -17.000000. running mean: -18.305911
episode 1775.000000, reward total was -16.000000. running mean: -18.282852
episode 1776.000000, reward total was -17.000000. running mean: -18.270023
episode 1777.000000, reward total was -19.000000. running mean: -18.277323
episode 1778.000000, reward total was -20.000000. running mean: -18.294550
episode 1779.000000, reward total was -17.000000. running mean: -18.281604
episode 1780.000000, reward total was -18.000000. running mean: -18.278788
episode 1781.000000, reward total was -17.000000. running mean: -18.266000
episode 1782.000000, reward total was -19.000000. running mean: -18.273340
episode 1783.000000, reward total was -21.000000. running mean: -18.300607
episode 1784.000000, reward total was -20.000000. running mean: -18.317601
episode 1785.000000, rewa

episode 1882.000000, reward total was -16.000000. running mean: -18.199344
episode 1883.000000, reward total was -20.000000. running mean: -18.217350
episode 1884.000000, reward total was -18.000000. running mean: -18.215177
episode 1885.000000, reward total was -17.000000. running mean: -18.203025
episode 1886.000000, reward total was -20.000000. running mean: -18.220995
episode 1887.000000, reward total was -16.000000. running mean: -18.198785
episode 1888.000000, reward total was -19.000000. running mean: -18.206797
episode 1889.000000, reward total was -17.000000. running mean: -18.194729
episode 1890.000000, reward total was -16.000000. running mean: -18.172782
episode 1891.000000, reward total was -21.000000. running mean: -18.201054
episode 1892.000000, reward total was -19.000000. running mean: -18.209043
episode 1893.000000, reward total was -17.000000. running mean: -18.196953
episode 1894.000000, reward total was -19.000000. running mean: -18.204983
episode 1895.000000, rewa

episode 1992.000000, reward total was -18.000000. running mean: -17.749777
episode 1993.000000, reward total was -18.000000. running mean: -17.752279
episode 1994.000000, reward total was -21.000000. running mean: -17.784757
episode 1995.000000, reward total was -15.000000. running mean: -17.756909
episode 1996.000000, reward total was -21.000000. running mean: -17.789340
episode 1997.000000, reward total was -18.000000. running mean: -17.791447
episode 1998.000000, reward total was -19.000000. running mean: -17.803532
episode 1999.000000, reward total was -17.000000. running mean: -17.795497
episode 2000.000000, reward total was -19.000000. running mean: -17.807542
episode 2001.000000, reward total was -17.000000. running mean: -17.799466
episode 2002.000000, reward total was -12.000000. running mean: -17.741472
episode 2003.000000, reward total was -19.000000. running mean: -17.754057
episode 2004.000000, reward total was -20.000000. running mean: -17.776516
episode 2005.000000, rewa

episode 2102.000000, reward total was -17.000000. running mean: -17.323425
episode 2103.000000, reward total was -17.000000. running mean: -17.320190
episode 2104.000000, reward total was -16.000000. running mean: -17.306988
episode 2105.000000, reward total was -19.000000. running mean: -17.323919
episode 2106.000000, reward total was -17.000000. running mean: -17.320679
episode 2107.000000, reward total was -21.000000. running mean: -17.357473
episode 2108.000000, reward total was -19.000000. running mean: -17.373898
episode 2109.000000, reward total was -17.000000. running mean: -17.370159
episode 2110.000000, reward total was -19.000000. running mean: -17.386457
episode 2111.000000, reward total was -18.000000. running mean: -17.392593
episode 2112.000000, reward total was -16.000000. running mean: -17.378667
episode 2113.000000, reward total was -15.000000. running mean: -17.354880
episode 2114.000000, reward total was -16.000000. running mean: -17.341331
episode 2115.000000, rewa

episode 2212.000000, reward total was -16.000000. running mean: -17.657326
episode 2213.000000, reward total was -19.000000. running mean: -17.670753
episode 2214.000000, reward total was -19.000000. running mean: -17.684045
episode 2215.000000, reward total was -21.000000. running mean: -17.717205
episode 2216.000000, reward total was -19.000000. running mean: -17.730033
episode 2217.000000, reward total was -16.000000. running mean: -17.712732
episode 2218.000000, reward total was -20.000000. running mean: -17.735605
episode 2219.000000, reward total was -10.000000. running mean: -17.658249
episode 2220.000000, reward total was -19.000000. running mean: -17.671666
episode 2221.000000, reward total was -16.000000. running mean: -17.654950
episode 2222.000000, reward total was -19.000000. running mean: -17.668400
episode 2223.000000, reward total was -16.000000. running mean: -17.651716
episode 2224.000000, reward total was -17.000000. running mean: -17.645199
episode 2225.000000, rewa

episode 2322.000000, reward total was -19.000000. running mean: -17.488171
episode 2323.000000, reward total was -19.000000. running mean: -17.503290
episode 2324.000000, reward total was -17.000000. running mean: -17.498257
episode 2325.000000, reward total was -17.000000. running mean: -17.493274
episode 2326.000000, reward total was -14.000000. running mean: -17.458342
episode 2327.000000, reward total was -14.000000. running mean: -17.423758
episode 2328.000000, reward total was -19.000000. running mean: -17.439521
episode 2329.000000, reward total was -19.000000. running mean: -17.455125
episode 2330.000000, reward total was -19.000000. running mean: -17.470574
episode 2331.000000, reward total was -15.000000. running mean: -17.445868
episode 2332.000000, reward total was -19.000000. running mean: -17.461410
episode 2333.000000, reward total was -15.000000. running mean: -17.436796
episode 2334.000000, reward total was -21.000000. running mean: -17.472428
episode 2335.000000, rewa

episode 2432.000000, reward total was -19.000000. running mean: -17.503365
episode 2433.000000, reward total was -19.000000. running mean: -17.518331
episode 2434.000000, reward total was -17.000000. running mean: -17.513148
episode 2435.000000, reward total was -15.000000. running mean: -17.488016
episode 2436.000000, reward total was -21.000000. running mean: -17.523136
episode 2437.000000, reward total was -18.000000. running mean: -17.527905
episode 2438.000000, reward total was -18.000000. running mean: -17.532626
episode 2439.000000, reward total was -19.000000. running mean: -17.547299
episode 2440.000000, reward total was -19.000000. running mean: -17.561826
episode 2441.000000, reward total was -17.000000. running mean: -17.556208
episode 2442.000000, reward total was -19.000000. running mean: -17.570646
episode 2443.000000, reward total was -18.000000. running mean: -17.574939
episode 2444.000000, reward total was -18.000000. running mean: -17.579190
episode 2445.000000, rewa

episode 2542.000000, reward total was -21.000000. running mean: -17.102178
episode 2543.000000, reward total was -17.000000. running mean: -17.101156
episode 2544.000000, reward total was -17.000000. running mean: -17.100144
episode 2545.000000, reward total was -18.000000. running mean: -17.109143
episode 2546.000000, reward total was -19.000000. running mean: -17.128052
episode 2547.000000, reward total was -14.000000. running mean: -17.096771
episode 2548.000000, reward total was -15.000000. running mean: -17.075803
episode 2549.000000, reward total was -14.000000. running mean: -17.045045
episode 2550.000000, reward total was -15.000000. running mean: -17.024595
episode 2551.000000, reward total was -17.000000. running mean: -17.024349
episode 2552.000000, reward total was -17.000000. running mean: -17.024105
episode 2553.000000, reward total was -17.000000. running mean: -17.023864
episode 2554.000000, reward total was -18.000000. running mean: -17.033626
episode 2555.000000, rewa

episode 2652.000000, reward total was -19.000000. running mean: -17.464865
episode 2653.000000, reward total was -19.000000. running mean: -17.480217
episode 2654.000000, reward total was -15.000000. running mean: -17.455414
episode 2655.000000, reward total was -15.000000. running mean: -17.430860
episode 2656.000000, reward total was -17.000000. running mean: -17.426552
episode 2657.000000, reward total was -19.000000. running mean: -17.442286
episode 2658.000000, reward total was -19.000000. running mean: -17.457863
episode 2659.000000, reward total was -19.000000. running mean: -17.473285
episode 2660.000000, reward total was -18.000000. running mean: -17.478552
episode 2661.000000, reward total was -14.000000. running mean: -17.443766
episode 2662.000000, reward total was -17.000000. running mean: -17.439329
episode 2663.000000, reward total was -21.000000. running mean: -17.474935
episode 2664.000000, reward total was -19.000000. running mean: -17.490186
episode 2665.000000, rewa

episode 2762.000000, reward total was -15.000000. running mean: -17.042521
episode 2763.000000, reward total was -16.000000. running mean: -17.032096
episode 2764.000000, reward total was -19.000000. running mean: -17.051775
episode 2765.000000, reward total was -15.000000. running mean: -17.031257
episode 2766.000000, reward total was -19.000000. running mean: -17.050944
episode 2767.000000, reward total was -15.000000. running mean: -17.030435
episode 2768.000000, reward total was -16.000000. running mean: -17.020131
episode 2769.000000, reward total was -15.000000. running mean: -16.999929
episode 2770.000000, reward total was -20.000000. running mean: -17.029930
episode 2771.000000, reward total was -18.000000. running mean: -17.039631
episode 2772.000000, reward total was -16.000000. running mean: -17.029234
episode 2773.000000, reward total was -11.000000. running mean: -16.968942
episode 2774.000000, reward total was -19.000000. running mean: -16.989253
episode 2775.000000, rewa

episode 2872.000000, reward total was -21.000000. running mean: -16.924215
episode 2873.000000, reward total was -20.000000. running mean: -16.954973
episode 2874.000000, reward total was -12.000000. running mean: -16.905423
episode 2875.000000, reward total was -19.000000. running mean: -16.926369
episode 2876.000000, reward total was -16.000000. running mean: -16.917105
episode 2877.000000, reward total was -14.000000. running mean: -16.887934
episode 2878.000000, reward total was -15.000000. running mean: -16.869055
episode 2879.000000, reward total was -19.000000. running mean: -16.890364
episode 2880.000000, reward total was -19.000000. running mean: -16.911461
episode 2881.000000, reward total was -21.000000. running mean: -16.952346
episode 2882.000000, reward total was -21.000000. running mean: -16.992823
episode 2883.000000, reward total was -17.000000. running mean: -16.992894
episode 2884.000000, reward total was -12.000000. running mean: -16.942965
episode 2885.000000, rewa

episode 2982.000000, reward total was -16.000000. running mean: -17.104144
episode 2983.000000, reward total was -19.000000. running mean: -17.123102
episode 2984.000000, reward total was -14.000000. running mean: -17.091871
episode 2985.000000, reward total was -17.000000. running mean: -17.090952
episode 2986.000000, reward total was -11.000000. running mean: -17.030043
episode 2987.000000, reward total was -16.000000. running mean: -17.019742
episode 2988.000000, reward total was -20.000000. running mean: -17.049545
episode 2989.000000, reward total was -17.000000. running mean: -17.049050
episode 2990.000000, reward total was -21.000000. running mean: -17.088559
episode 2991.000000, reward total was -17.000000. running mean: -17.087674
episode 2992.000000, reward total was -17.000000. running mean: -17.086797
episode 2993.000000, reward total was -16.000000. running mean: -17.075929
episode 2994.000000, reward total was -15.000000. running mean: -17.055170
episode 2995.000000, rewa

episode 3092.000000, reward total was -15.000000. running mean: -17.331485
episode 3093.000000, reward total was -17.000000. running mean: -17.328170
episode 3094.000000, reward total was -20.000000. running mean: -17.354888
episode 3095.000000, reward total was -19.000000. running mean: -17.371339
episode 3096.000000, reward total was -19.000000. running mean: -17.387626
episode 3097.000000, reward total was -11.000000. running mean: -17.323750
episode 3098.000000, reward total was -15.000000. running mean: -17.300512
episode 3099.000000, reward total was -19.000000. running mean: -17.317507
episode 3100.000000, reward total was -17.000000. running mean: -17.314332
episode 3101.000000, reward total was -19.000000. running mean: -17.331189
episode 3102.000000, reward total was -19.000000. running mean: -17.347877
episode 3103.000000, reward total was -20.000000. running mean: -17.374398
episode 3104.000000, reward total was -11.000000. running mean: -17.310654
episode 3105.000000, rewa

episode 3202.000000, reward total was -15.000000. running mean: -17.030462
episode 3203.000000, reward total was -19.000000. running mean: -17.050157
episode 3204.000000, reward total was -21.000000. running mean: -17.089656
episode 3205.000000, reward total was -13.000000. running mean: -17.048759
episode 3206.000000, reward total was -18.000000. running mean: -17.058272
episode 3207.000000, reward total was -16.000000. running mean: -17.047689
episode 3208.000000, reward total was -21.000000. running mean: -17.087212
episode 3209.000000, reward total was -17.000000. running mean: -17.086340
episode 3210.000000, reward total was -17.000000. running mean: -17.085476
episode 3211.000000, reward total was -11.000000. running mean: -17.024622
episode 3212.000000, reward total was -14.000000. running mean: -16.994376
episode 3213.000000, reward total was -19.000000. running mean: -17.014432
episode 3214.000000, reward total was -18.000000. running mean: -17.024287
episode 3215.000000, rewa

episode 3312.000000, reward total was -15.000000. running mean: -16.616268
episode 3313.000000, reward total was -15.000000. running mean: -16.600106
episode 3314.000000, reward total was -14.000000. running mean: -16.574105
episode 3315.000000, reward total was -17.000000. running mean: -16.578364
episode 3316.000000, reward total was -17.000000. running mean: -16.582580
episode 3317.000000, reward total was -15.000000. running mean: -16.566754
episode 3318.000000, reward total was -15.000000. running mean: -16.551087
episode 3319.000000, reward total was -13.000000. running mean: -16.515576
episode 3320.000000, reward total was -17.000000. running mean: -16.520420
episode 3321.000000, reward total was -19.000000. running mean: -16.545216
episode 3322.000000, reward total was -15.000000. running mean: -16.529764
episode 3323.000000, reward total was -20.000000. running mean: -16.564466
episode 3324.000000, reward total was -17.000000. running mean: -16.568821
episode 3325.000000, rewa

episode 3422.000000, reward total was -14.000000. running mean: -16.377086
episode 3423.000000, reward total was -18.000000. running mean: -16.393316
episode 3424.000000, reward total was -16.000000. running mean: -16.389382
episode 3425.000000, reward total was -14.000000. running mean: -16.365489
episode 3426.000000, reward total was -17.000000. running mean: -16.371834
episode 3427.000000, reward total was -15.000000. running mean: -16.358115
episode 3428.000000, reward total was -18.000000. running mean: -16.374534
episode 3429.000000, reward total was -19.000000. running mean: -16.400789
episode 3430.000000, reward total was -17.000000. running mean: -16.406781
episode 3431.000000, reward total was -15.000000. running mean: -16.392713
episode 3432.000000, reward total was -16.000000. running mean: -16.388786
episode 3433.000000, reward total was -17.000000. running mean: -16.394898
episode 3434.000000, reward total was -17.000000. running mean: -16.400949
episode 3435.000000, rewa

episode 3532.000000, reward total was -16.000000. running mean: -16.321315
episode 3533.000000, reward total was -15.000000. running mean: -16.308102
episode 3534.000000, reward total was -20.000000. running mean: -16.345021
episode 3535.000000, reward total was -15.000000. running mean: -16.331571
episode 3536.000000, reward total was -18.000000. running mean: -16.348255
episode 3537.000000, reward total was -15.000000. running mean: -16.334773
episode 3538.000000, reward total was -16.000000. running mean: -16.331425
episode 3539.000000, reward total was -17.000000. running mean: -16.338111
episode 3540.000000, reward total was -20.000000. running mean: -16.374730
episode 3541.000000, reward total was -21.000000. running mean: -16.420982
episode 3542.000000, reward total was -13.000000. running mean: -16.386773
episode 3543.000000, reward total was -17.000000. running mean: -16.392905
episode 3544.000000, reward total was -15.000000. running mean: -16.378976
episode 3545.000000, rewa

episode 3642.000000, reward total was -15.000000. running mean: -16.396136
episode 3643.000000, reward total was -14.000000. running mean: -16.372175
episode 3644.000000, reward total was -15.000000. running mean: -16.358453
episode 3645.000000, reward total was -17.000000. running mean: -16.364869
episode 3646.000000, reward total was -11.000000. running mean: -16.311220
episode 3647.000000, reward total was -15.000000. running mean: -16.298108
episode 3648.000000, reward total was -15.000000. running mean: -16.285127
episode 3649.000000, reward total was -17.000000. running mean: -16.292275
episode 3650.000000, reward total was -14.000000. running mean: -16.269353
episode 3651.000000, reward total was -13.000000. running mean: -16.236659
episode 3652.000000, reward total was -18.000000. running mean: -16.254293
episode 3653.000000, reward total was -15.000000. running mean: -16.241750
episode 3654.000000, reward total was -21.000000. running mean: -16.289332
episode 3655.000000, rewa

episode 3752.000000, reward total was -15.000000. running mean: -15.948063
episode 3753.000000, reward total was -13.000000. running mean: -15.918582
episode 3754.000000, reward total was -16.000000. running mean: -15.919396
episode 3755.000000, reward total was -14.000000. running mean: -15.900202
episode 3756.000000, reward total was -15.000000. running mean: -15.891200
episode 3757.000000, reward total was -13.000000. running mean: -15.862288
episode 3758.000000, reward total was -16.000000. running mean: -15.863665
episode 3759.000000, reward total was -21.000000. running mean: -15.915029
episode 3760.000000, reward total was -13.000000. running mean: -15.885878
episode 3761.000000, reward total was -15.000000. running mean: -15.877020
episode 3762.000000, reward total was -17.000000. running mean: -15.888249
episode 3763.000000, reward total was -14.000000. running mean: -15.869367
episode 3764.000000, reward total was -9.000000. running mean: -15.800673
episode 3765.000000, rewar

episode 3862.000000, reward total was -15.000000. running mean: -15.928671
episode 3863.000000, reward total was -16.000000. running mean: -15.929384
episode 3864.000000, reward total was -15.000000. running mean: -15.920090
episode 3865.000000, reward total was -15.000000. running mean: -15.910889
episode 3866.000000, reward total was -16.000000. running mean: -15.911780
episode 3867.000000, reward total was -13.000000. running mean: -15.882662
episode 3868.000000, reward total was -17.000000. running mean: -15.893836
episode 3869.000000, reward total was -15.000000. running mean: -15.884897
episode 3870.000000, reward total was -15.000000. running mean: -15.876048
episode 3871.000000, reward total was -19.000000. running mean: -15.907288
episode 3872.000000, reward total was -16.000000. running mean: -15.908215
episode 3873.000000, reward total was -11.000000. running mean: -15.859133
episode 3874.000000, reward total was -16.000000. running mean: -15.860542
episode 3875.000000, rewa

episode 3972.000000, reward total was -17.000000. running mean: -15.462560
episode 3973.000000, reward total was -17.000000. running mean: -15.477934
episode 3974.000000, reward total was -11.000000. running mean: -15.433155
episode 3975.000000, reward total was -19.000000. running mean: -15.468823
episode 3976.000000, reward total was -19.000000. running mean: -15.504135
episode 3977.000000, reward total was -17.000000. running mean: -15.519094
episode 3978.000000, reward total was -17.000000. running mean: -15.533903
episode 3979.000000, reward total was -21.000000. running mean: -15.588564
episode 3980.000000, reward total was -15.000000. running mean: -15.582678
episode 3981.000000, reward total was -11.000000. running mean: -15.536852
episode 3982.000000, reward total was -15.000000. running mean: -15.531483
episode 3983.000000, reward total was -15.000000. running mean: -15.526168
episode 3984.000000, reward total was -13.000000. running mean: -15.500906
episode 3985.000000, rewa

episode 4082.000000, reward total was -15.000000. running mean: -15.482841
episode 4083.000000, reward total was -15.000000. running mean: -15.478013
episode 4084.000000, reward total was -11.000000. running mean: -15.433233
episode 4085.000000, reward total was -17.000000. running mean: -15.448900
episode 4086.000000, reward total was -14.000000. running mean: -15.434411
episode 4087.000000, reward total was -19.000000. running mean: -15.470067
episode 4088.000000, reward total was -14.000000. running mean: -15.455367
episode 4089.000000, reward total was -18.000000. running mean: -15.480813
episode 4090.000000, reward total was -15.000000. running mean: -15.476005
episode 4091.000000, reward total was -16.000000. running mean: -15.481245
episode 4092.000000, reward total was -16.000000. running mean: -15.486432
episode 4093.000000, reward total was -19.000000. running mean: -15.521568
episode 4094.000000, reward total was -16.000000. running mean: -15.526352
episode 4095.000000, rewa

episode 4192.000000, reward total was -12.000000. running mean: -15.210810
episode 4193.000000, reward total was -16.000000. running mean: -15.218702
episode 4194.000000, reward total was -16.000000. running mean: -15.226515
episode 4195.000000, reward total was -12.000000. running mean: -15.194250
episode 4196.000000, reward total was -16.000000. running mean: -15.202308
episode 4197.000000, reward total was -19.000000. running mean: -15.240284
episode 4198.000000, reward total was -17.000000. running mean: -15.257882
episode 4199.000000, reward total was -15.000000. running mean: -15.255303
episode 4200.000000, reward total was -17.000000. running mean: -15.272750
episode 4201.000000, reward total was -10.000000. running mean: -15.220022
episode 4202.000000, reward total was -16.000000. running mean: -15.227822
episode 4203.000000, reward total was -11.000000. running mean: -15.185544
episode 4204.000000, reward total was -17.000000. running mean: -15.203688
episode 4205.000000, rewa

episode 4302.000000, reward total was -16.000000. running mean: -14.064858
episode 4303.000000, reward total was -12.000000. running mean: -14.044209
episode 4304.000000, reward total was -13.000000. running mean: -14.033767
episode 4305.000000, reward total was -15.000000. running mean: -14.043429
episode 4306.000000, reward total was -11.000000. running mean: -14.012995
episode 4307.000000, reward total was -15.000000. running mean: -14.022865
episode 4308.000000, reward total was -14.000000. running mean: -14.022636
episode 4309.000000, reward total was -12.000000. running mean: -14.002410
episode 4310.000000, reward total was -14.000000. running mean: -14.002386
episode 4311.000000, reward total was -13.000000. running mean: -13.992362
episode 4312.000000, reward total was -17.000000. running mean: -14.022439
episode 4313.000000, reward total was -17.000000. running mean: -14.052214
episode 4314.000000, reward total was -13.000000. running mean: -14.041692
episode 4315.000000, rewa

episode 4412.000000, reward total was -9.000000. running mean: -13.192711
episode 4413.000000, reward total was -15.000000. running mean: -13.210784
episode 4414.000000, reward total was -15.000000. running mean: -13.228676
episode 4415.000000, reward total was -13.000000. running mean: -13.226389
episode 4416.000000, reward total was -8.000000. running mean: -13.174125
episode 4417.000000, reward total was -10.000000. running mean: -13.142384
episode 4418.000000, reward total was -7.000000. running mean: -13.080960
episode 4419.000000, reward total was -8.000000. running mean: -13.030151
episode 4420.000000, reward total was -18.000000. running mean: -13.079849
episode 4421.000000, reward total was -13.000000. running mean: -13.079051
episode 4422.000000, reward total was -8.000000. running mean: -13.028260
episode 4423.000000, reward total was -11.000000. running mean: -13.007977
episode 4424.000000, reward total was -11.000000. running mean: -12.987898
episode 4425.000000, reward to

episode 4522.000000, reward total was -14.000000. running mean: -12.485457
episode 4523.000000, reward total was -13.000000. running mean: -12.490603
episode 4524.000000, reward total was -11.000000. running mean: -12.475697
episode 4525.000000, reward total was -18.000000. running mean: -12.530940
episode 4526.000000, reward total was -10.000000. running mean: -12.505630
episode 4527.000000, reward total was -16.000000. running mean: -12.540574
episode 4528.000000, reward total was -13.000000. running mean: -12.545168
episode 4529.000000, reward total was -10.000000. running mean: -12.519717
episode 4530.000000, reward total was -15.000000. running mean: -12.544519
episode 4531.000000, reward total was -9.000000. running mean: -12.509074
episode 4532.000000, reward total was -7.000000. running mean: -12.453983
episode 4533.000000, reward total was -13.000000. running mean: -12.459444
episode 4534.000000, reward total was -15.000000. running mean: -12.484849
episode 4535.000000, reward

episode 4632.000000, reward total was -15.000000. running mean: -13.006240
episode 4633.000000, reward total was -15.000000. running mean: -13.026178
episode 4634.000000, reward total was -17.000000. running mean: -13.065916
episode 4635.000000, reward total was -13.000000. running mean: -13.065257
episode 4636.000000, reward total was -11.000000. running mean: -13.044605
episode 4637.000000, reward total was -14.000000. running mean: -13.054159
episode 4638.000000, reward total was -3.000000. running mean: -12.953617
episode 4639.000000, reward total was -19.000000. running mean: -13.014081
episode 4640.000000, reward total was -6.000000. running mean: -12.943940
episode 4641.000000, reward total was -17.000000. running mean: -12.984501
episode 4642.000000, reward total was -11.000000. running mean: -12.964656
episode 4643.000000, reward total was -10.000000. running mean: -12.935009
episode 4644.000000, reward total was -12.000000. running mean: -12.925659
episode 4645.000000, reward

episode 4742.000000, reward total was -13.000000. running mean: -13.358216
episode 4743.000000, reward total was -11.000000. running mean: -13.334634
episode 4744.000000, reward total was -15.000000. running mean: -13.351288
episode 4745.000000, reward total was -13.000000. running mean: -13.347775
episode 4746.000000, reward total was -15.000000. running mean: -13.364297
episode 4747.000000, reward total was -13.000000. running mean: -13.360654
episode 4748.000000, reward total was -13.000000. running mean: -13.357048
episode 4749.000000, reward total was -19.000000. running mean: -13.413477
episode 4750.000000, reward total was -11.000000. running mean: -13.389342
episode 4751.000000, reward total was -14.000000. running mean: -13.395449
episode 4752.000000, reward total was -10.000000. running mean: -13.361494
episode 4753.000000, reward total was -9.000000. running mean: -13.317879
episode 4754.000000, reward total was -12.000000. running mean: -13.304701
episode 4755.000000, rewar

episode 4852.000000, reward total was -11.000000. running mean: -13.391179
episode 4853.000000, reward total was -13.000000. running mean: -13.387267
episode 4854.000000, reward total was -17.000000. running mean: -13.423394
episode 4855.000000, reward total was -14.000000. running mean: -13.429161
episode 4856.000000, reward total was -14.000000. running mean: -13.434869
episode 4857.000000, reward total was -13.000000. running mean: -13.430520
episode 4858.000000, reward total was -14.000000. running mean: -13.436215
episode 4859.000000, reward total was -11.000000. running mean: -13.411853
episode 4860.000000, reward total was -17.000000. running mean: -13.447734
episode 4861.000000, reward total was -15.000000. running mean: -13.463257
episode 4862.000000, reward total was -19.000000. running mean: -13.518624
episode 4863.000000, reward total was -15.000000. running mean: -13.533438
episode 4864.000000, reward total was -11.000000. running mean: -13.508104
episode 4865.000000, rewa

episode 4962.000000, reward total was -11.000000. running mean: -13.164535
episode 4963.000000, reward total was -9.000000. running mean: -13.122889
episode 4964.000000, reward total was -7.000000. running mean: -13.061660
episode 4965.000000, reward total was -11.000000. running mean: -13.041044
episode 4966.000000, reward total was -18.000000. running mean: -13.090633
episode 4967.000000, reward total was -16.000000. running mean: -13.119727
episode 4968.000000, reward total was -13.000000. running mean: -13.118530
episode 4969.000000, reward total was -13.000000. running mean: -13.117345
episode 4970.000000, reward total was -12.000000. running mean: -13.106171
episode 4971.000000, reward total was -12.000000. running mean: -13.095109
episode 4972.000000, reward total was -13.000000. running mean: -13.094158
episode 4973.000000, reward total was -14.000000. running mean: -13.103217
episode 4974.000000, reward total was -18.000000. running mean: -13.152185
episode 4975.000000, reward

episode 5072.000000, reward total was -15.000000. running mean: -13.228742
episode 5073.000000, reward total was -15.000000. running mean: -13.246455
episode 5074.000000, reward total was -11.000000. running mean: -13.223990
episode 5075.000000, reward total was -15.000000. running mean: -13.241751
episode 5076.000000, reward total was -15.000000. running mean: -13.259333
episode 5077.000000, reward total was -9.000000. running mean: -13.216740
episode 5078.000000, reward total was -14.000000. running mean: -13.224572
episode 5079.000000, reward total was -8.000000. running mean: -13.172327
episode 5080.000000, reward total was -14.000000. running mean: -13.180603
episode 5081.000000, reward total was -11.000000. running mean: -13.158797
episode 5082.000000, reward total was -7.000000. running mean: -13.097209
episode 5083.000000, reward total was -11.000000. running mean: -13.076237
episode 5084.000000, reward total was -12.000000. running mean: -13.065475
episode 5085.000000, reward 

episode 5182.000000, reward total was -16.000000. running mean: -13.218313
episode 5183.000000, reward total was -16.000000. running mean: -13.246130
episode 5184.000000, reward total was -14.000000. running mean: -13.253669
episode 5185.000000, reward total was -9.000000. running mean: -13.211132
episode 5186.000000, reward total was -13.000000. running mean: -13.209021
episode 5187.000000, reward total was -11.000000. running mean: -13.186931
episode 5188.000000, reward total was -13.000000. running mean: -13.185061
episode 5189.000000, reward total was -13.000000. running mean: -13.183211
episode 5190.000000, reward total was -9.000000. running mean: -13.141379
episode 5191.000000, reward total was -19.000000. running mean: -13.199965
episode 5192.000000, reward total was -14.000000. running mean: -13.207965
episode 5193.000000, reward total was -16.000000. running mean: -13.235885
episode 5194.000000, reward total was -5.000000. running mean: -13.153527
episode 5195.000000, reward 

episode 5292.000000, reward total was -19.000000. running mean: -13.392523
episode 5293.000000, reward total was -13.000000. running mean: -13.388598
episode 5294.000000, reward total was -14.000000. running mean: -13.394712
episode 5295.000000, reward total was -15.000000. running mean: -13.410765
episode 5296.000000, reward total was -12.000000. running mean: -13.396657
episode 5297.000000, reward total was -14.000000. running mean: -13.402691
episode 5298.000000, reward total was -13.000000. running mean: -13.398664
episode 5299.000000, reward total was -11.000000. running mean: -13.374677
episode 5300.000000, reward total was -12.000000. running mean: -13.360931
episode 5301.000000, reward total was -13.000000. running mean: -13.357321
episode 5302.000000, reward total was -11.000000. running mean: -13.333748
episode 5303.000000, reward total was -13.000000. running mean: -13.330411
episode 5304.000000, reward total was -18.000000. running mean: -13.377106
episode 5305.000000, rewa

episode 5402.000000, reward total was -10.000000. running mean: -12.853981
episode 5403.000000, reward total was -14.000000. running mean: -12.865441
episode 5404.000000, reward total was -15.000000. running mean: -12.886787
episode 5405.000000, reward total was -17.000000. running mean: -12.927919
episode 5406.000000, reward total was -6.000000. running mean: -12.858640
episode 5407.000000, reward total was -7.000000. running mean: -12.800053
episode 5408.000000, reward total was -11.000000. running mean: -12.782053
episode 5409.000000, reward total was -12.000000. running mean: -12.774232
episode 5410.000000, reward total was -14.000000. running mean: -12.786490
episode 5411.000000, reward total was -10.000000. running mean: -12.758625
episode 5412.000000, reward total was -17.000000. running mean: -12.801039
episode 5413.000000, reward total was -13.000000. running mean: -12.803028
episode 5414.000000, reward total was -10.000000. running mean: -12.774998
episode 5415.000000, reward

episode 5512.000000, reward total was -14.000000. running mean: -12.246095
episode 5513.000000, reward total was -13.000000. running mean: -12.253634
episode 5514.000000, reward total was -18.000000. running mean: -12.311098
episode 5515.000000, reward total was -12.000000. running mean: -12.307987
episode 5516.000000, reward total was -13.000000. running mean: -12.314907
episode 5517.000000, reward total was -19.000000. running mean: -12.381758
episode 5518.000000, reward total was -11.000000. running mean: -12.367940
episode 5519.000000, reward total was -17.000000. running mean: -12.414261
episode 5520.000000, reward total was -13.000000. running mean: -12.420118
episode 5521.000000, reward total was -14.000000. running mean: -12.435917
episode 5522.000000, reward total was -13.000000. running mean: -12.441558
episode 5523.000000, reward total was -16.000000. running mean: -12.477142
episode 5524.000000, reward total was -12.000000. running mean: -12.472371
episode 5525.000000, rewa

episode 5622.000000, reward total was -15.000000. running mean: -12.653309
episode 5623.000000, reward total was -13.000000. running mean: -12.656776
episode 5624.000000, reward total was -10.000000. running mean: -12.630208
episode 5625.000000, reward total was -10.000000. running mean: -12.603906
episode 5626.000000, reward total was -15.000000. running mean: -12.627867
episode 5627.000000, reward total was -14.000000. running mean: -12.641588
episode 5628.000000, reward total was -9.000000. running mean: -12.605172
episode 5629.000000, reward total was -11.000000. running mean: -12.589121
episode 5630.000000, reward total was -11.000000. running mean: -12.573229
episode 5631.000000, reward total was -13.000000. running mean: -12.577497
episode 5632.000000, reward total was -16.000000. running mean: -12.611722
episode 5633.000000, reward total was -13.000000. running mean: -12.615605
episode 5634.000000, reward total was -13.000000. running mean: -12.619449
episode 5635.000000, rewar

episode 5732.000000, reward total was -15.000000. running mean: -12.095430
episode 5733.000000, reward total was -9.000000. running mean: -12.064475
episode 5734.000000, reward total was -16.000000. running mean: -12.103831
episode 5735.000000, reward total was -6.000000. running mean: -12.042792
episode 5736.000000, reward total was -13.000000. running mean: -12.052364
episode 5737.000000, reward total was -6.000000. running mean: -11.991841
episode 5738.000000, reward total was -12.000000. running mean: -11.991922
episode 5739.000000, reward total was -13.000000. running mean: -12.002003
episode 5740.000000, reward total was -18.000000. running mean: -12.061983
episode 5741.000000, reward total was -12.000000. running mean: -12.061363
episode 5742.000000, reward total was -4.000000. running mean: -11.980750
episode 5743.000000, reward total was -11.000000. running mean: -11.970942
episode 5744.000000, reward total was -19.000000. running mean: -12.041233
episode 5745.000000, reward t

episode 5842.000000, reward total was -9.000000. running mean: -12.612558
episode 5843.000000, reward total was -12.000000. running mean: -12.606433
episode 5844.000000, reward total was -9.000000. running mean: -12.570368
episode 5845.000000, reward total was -13.000000. running mean: -12.574665
episode 5846.000000, reward total was -9.000000. running mean: -12.538918
episode 5847.000000, reward total was -16.000000. running mean: -12.573529
episode 5848.000000, reward total was -13.000000. running mean: -12.577794
episode 5849.000000, reward total was -10.000000. running mean: -12.552016
episode 5850.000000, reward total was -15.000000. running mean: -12.576496
episode 5851.000000, reward total was -12.000000. running mean: -12.570731
episode 5852.000000, reward total was -9.000000. running mean: -12.535023
episode 5853.000000, reward total was -11.000000. running mean: -12.519673
episode 5854.000000, reward total was -9.000000. running mean: -12.484476
episode 5855.000000, reward to

episode 5952.000000, reward total was -16.000000. running mean: -12.689530
episode 5953.000000, reward total was -13.000000. running mean: -12.692634
episode 5954.000000, reward total was -16.000000. running mean: -12.725708
episode 5955.000000, reward total was -13.000000. running mean: -12.728451
episode 5956.000000, reward total was -14.000000. running mean: -12.741166
episode 5957.000000, reward total was -13.000000. running mean: -12.743755
episode 5958.000000, reward total was -9.000000. running mean: -12.706317
episode 5959.000000, reward total was -15.000000. running mean: -12.729254
episode 5960.000000, reward total was -17.000000. running mean: -12.771962
episode 5961.000000, reward total was -14.000000. running mean: -12.784242
episode 5962.000000, reward total was -17.000000. running mean: -12.826399
episode 5963.000000, reward total was -10.000000. running mean: -12.798135
episode 5964.000000, reward total was -15.000000. running mean: -12.820154
episode 5965.000000, rewar

episode 6062.000000, reward total was -14.000000. running mean: -12.774940
episode 6063.000000, reward total was -13.000000. running mean: -12.777191
episode 6064.000000, reward total was -16.000000. running mean: -12.809419
episode 6065.000000, reward total was -12.000000. running mean: -12.801324
episode 6066.000000, reward total was -15.000000. running mean: -12.823311
episode 6067.000000, reward total was -13.000000. running mean: -12.825078
episode 6068.000000, reward total was -12.000000. running mean: -12.816827
episode 6069.000000, reward total was -12.000000. running mean: -12.808659
episode 6070.000000, reward total was -12.000000. running mean: -12.800572
episode 6071.000000, reward total was -14.000000. running mean: -12.812567
episode 6072.000000, reward total was -13.000000. running mean: -12.814441
episode 6073.000000, reward total was -12.000000. running mean: -12.806297
episode 6074.000000, reward total was -8.000000. running mean: -12.758234
episode 6075.000000, rewar

episode 6172.000000, reward total was -6.000000. running mean: -12.392427
episode 6173.000000, reward total was -13.000000. running mean: -12.398503
episode 6174.000000, reward total was -14.000000. running mean: -12.414518
episode 6175.000000, reward total was -16.000000. running mean: -12.450373
episode 6176.000000, reward total was -10.000000. running mean: -12.425869
episode 6177.000000, reward total was -11.000000. running mean: -12.411610
episode 6178.000000, reward total was -15.000000. running mean: -12.437494
episode 6179.000000, reward total was -12.000000. running mean: -12.433119
episode 6180.000000, reward total was -5.000000. running mean: -12.358788
episode 6181.000000, reward total was -15.000000. running mean: -12.385200
episode 6182.000000, reward total was -14.000000. running mean: -12.401348
episode 6183.000000, reward total was -13.000000. running mean: -12.407335
episode 6184.000000, reward total was -11.000000. running mean: -12.393261
episode 6185.000000, reward

episode 6282.000000, reward total was -11.000000. running mean: -12.019372
episode 6283.000000, reward total was -7.000000. running mean: -11.969179
episode 6284.000000, reward total was -9.000000. running mean: -11.939487
episode 6285.000000, reward total was -9.000000. running mean: -11.910092
episode 6286.000000, reward total was -9.000000. running mean: -11.880991
episode 6287.000000, reward total was -9.000000. running mean: -11.852181
episode 6288.000000, reward total was -15.000000. running mean: -11.883659
episode 6289.000000, reward total was -16.000000. running mean: -11.924823
episode 6290.000000, reward total was -11.000000. running mean: -11.915574
episode 6291.000000, reward total was -11.000000. running mean: -11.906419
episode 6292.000000, reward total was -13.000000. running mean: -11.917355
episode 6293.000000, reward total was -9.000000. running mean: -11.888181
episode 6294.000000, reward total was -13.000000. running mean: -11.899299
episode 6295.000000, reward tot

episode 6392.000000, reward total was -11.000000. running mean: -11.877220
episode 6393.000000, reward total was -11.000000. running mean: -11.868448
episode 6394.000000, reward total was -6.000000. running mean: -11.809763
episode 6395.000000, reward total was -11.000000. running mean: -11.801666
episode 6396.000000, reward total was -15.000000. running mean: -11.833649
episode 6397.000000, reward total was -12.000000. running mean: -11.835312
episode 6398.000000, reward total was -16.000000. running mean: -11.876959
episode 6399.000000, reward total was -10.000000. running mean: -11.858190
episode 6400.000000, reward total was -15.000000. running mean: -11.889608
episode 6401.000000, reward total was -10.000000. running mean: -11.870712
episode 6402.000000, reward total was -17.000000. running mean: -11.922005
episode 6403.000000, reward total was -12.000000. running mean: -11.922785
episode 6404.000000, reward total was -9.000000. running mean: -11.893557
episode 6405.000000, reward

episode 6502.000000, reward total was -11.000000. running mean: -12.282094
episode 6503.000000, reward total was -13.000000. running mean: -12.289273
episode 6504.000000, reward total was -9.000000. running mean: -12.256381
episode 6505.000000, reward total was -16.000000. running mean: -12.293817
episode 6506.000000, reward total was -11.000000. running mean: -12.280879
episode 6507.000000, reward total was -12.000000. running mean: -12.278070
episode 6508.000000, reward total was -15.000000. running mean: -12.305289
episode 6509.000000, reward total was -18.000000. running mean: -12.362236
episode 6510.000000, reward total was -16.000000. running mean: -12.398614
episode 6511.000000, reward total was -13.000000. running mean: -12.404628
episode 6512.000000, reward total was -12.000000. running mean: -12.400581
episode 6513.000000, reward total was -11.000000. running mean: -12.386576
episode 6514.000000, reward total was -18.000000. running mean: -12.442710
episode 6515.000000, rewar

episode 6612.000000, reward total was -10.000000. running mean: -12.075402
episode 6613.000000, reward total was -12.000000. running mean: -12.074648
episode 6614.000000, reward total was -15.000000. running mean: -12.103902
episode 6615.000000, reward total was -13.000000. running mean: -12.112863
episode 6616.000000, reward total was -14.000000. running mean: -12.131734
episode 6617.000000, reward total was -15.000000. running mean: -12.160417
episode 6618.000000, reward total was -14.000000. running mean: -12.178812
episode 6619.000000, reward total was -15.000000. running mean: -12.207024
episode 6620.000000, reward total was -7.000000. running mean: -12.154954
episode 6621.000000, reward total was -11.000000. running mean: -12.143405
episode 6622.000000, reward total was -11.000000. running mean: -12.131970
episode 6623.000000, reward total was -7.000000. running mean: -12.080651
episode 6624.000000, reward total was -15.000000. running mean: -12.109844
episode 6625.000000, reward

episode 6722.000000, reward total was -8.000000. running mean: -11.831483
episode 6723.000000, reward total was -11.000000. running mean: -11.823168
episode 6724.000000, reward total was -12.000000. running mean: -11.824937
episode 6725.000000, reward total was -15.000000. running mean: -11.856687
episode 6726.000000, reward total was -12.000000. running mean: -11.858121
episode 6727.000000, reward total was -10.000000. running mean: -11.839539
episode 6728.000000, reward total was -16.000000. running mean: -11.881144
episode 6729.000000, reward total was -15.000000. running mean: -11.912333
episode 6730.000000, reward total was -9.000000. running mean: -11.883209
episode 6731.000000, reward total was -9.000000. running mean: -11.854377
episode 6732.000000, reward total was -15.000000. running mean: -11.885833
episode 6733.000000, reward total was -10.000000. running mean: -11.866975
episode 6734.000000, reward total was -15.000000. running mean: -11.898305
episode 6735.000000, reward 

episode 6832.000000, reward total was -13.000000. running mean: -11.413276
episode 6833.000000, reward total was -11.000000. running mean: -11.409143
episode 6834.000000, reward total was -18.000000. running mean: -11.475051
episode 6835.000000, reward total was -14.000000. running mean: -11.500301
episode 6836.000000, reward total was -13.000000. running mean: -11.515298
episode 6837.000000, reward total was -10.000000. running mean: -11.500145
episode 6838.000000, reward total was -9.000000. running mean: -11.475143
episode 6839.000000, reward total was -13.000000. running mean: -11.490392
episode 6840.000000, reward total was -9.000000. running mean: -11.465488
episode 6841.000000, reward total was -5.000000. running mean: -11.400833
episode 6842.000000, reward total was -5.000000. running mean: -11.336825
episode 6843.000000, reward total was -10.000000. running mean: -11.323457
episode 6844.000000, reward total was -11.000000. running mean: -11.320222
episode 6845.000000, reward t

episode 6942.000000, reward total was -8.000000. running mean: -10.609257
episode 6943.000000, reward total was -7.000000. running mean: -10.573165
episode 6944.000000, reward total was -9.000000. running mean: -10.557433
episode 6945.000000, reward total was -9.000000. running mean: -10.541859
episode 6946.000000, reward total was -9.000000. running mean: -10.526440
episode 6947.000000, reward total was -8.000000. running mean: -10.501176
episode 6948.000000, reward total was -11.000000. running mean: -10.506164
episode 6949.000000, reward total was -10.000000. running mean: -10.501102
episode 6950.000000, reward total was -17.000000. running mean: -10.566091
episode 6951.000000, reward total was -8.000000. running mean: -10.540430
episode 6952.000000, reward total was -14.000000. running mean: -10.575026
episode 6953.000000, reward total was -12.000000. running mean: -10.589276
episode 6954.000000, reward total was -13.000000. running mean: -10.613383
episode 6955.000000, reward tota