In [1]:
import gym
import numpy as np
from gym.wrappers import AtariPreprocessing
gym.new_step_api=True
env = gym.make('Pong-v0')
H = 1600 # number of hidden layer neurons
D = 80 * 80 # input dimensionality: 80x80 grid
model = {}
model['W1'] = np.random.randn(H,D) / np.sqrt(D) # "Xavier" initialization
model['W2'] = np.random.randn(H) / np.sqrt(H)
# hyperparameters
batch_size = 10 # every how many episodes to do a param update?
learning_rate = 1e-4
gamma = 0.99 # discount factor for reward
decay_rate = 0.99 # decay factor for RMSProp leaky sum of grad^2
grad_buffer = { k : np.zeros_like(v) for k,v in model.items() } # update buffers that add up gradients over a batch
rmsprop_cache = { k : np.zeros_like(v) for k,v in model.items() } # rmsprop memory

def sigmoid(x): 
  return 1.0 / (1.0 + np.exp(-x)) # sigmoid "squashing" function to interval [0,1]

def prepro(I):
  I=np.asarray(I)
  I = I[35:195] # crop
  I = I[::2,::2,0] # downsample by factor of 2
  
  I[I == 144] = 0 # erase background (background type 1)
  
  I[I == 109] = 0 # erase background (background type 2)
  
  I[I != 0] = 1 # everything else (paddles, ball) just set to 1
  return I.astype(float).ravel()

def discount_rewards(r):
  discounted_r = np.zeros_like(r)
  running_add = 0
  for t in reversed(range(0, r.size)):
    if r[t] != 0: running_add = 0 # reset the sum, since this was a game boundary (pong specific!)
    running_add = running_add * gamma + r[t]
    discounted_r[t] = running_add
  return discounted_r

def policy_forward(x):
  h = np.dot(model['W1'], x)
  h[h<0] = 0 # ReLU nonlinearity
  logp = np.dot(model['W2'], h)
  p = sigmoid(logp)
  return p, h # return probability of taking action 2, and hidden state

def policy_backward(epx, eph, epdlogp):
  """ backward pass. (eph is array of intermediate hidden states) """
  dW2 = np.dot(eph.T, epdlogp).ravel()
  dh = np.outer(epdlogp, model['W2'])
  dh[eph <= 0] = 0 # backpro prelu
  dW1 = np.dot(dh.T, epx)
  return {'W1':dW1, 'W2':dW2}

def model_step(model, observation, prev_x):

  cur_x = prepro(observation)
  x = cur_x - prev_x if prev_x is not None else np.zeros(D)
  prev_x = cur_x
  
  # forward the policy network and sample an action from the returned probability
  aprob, _ = policy_forward(x)
  action = 2 if aprob >= 0.5 else 3 # roll the dice!
  
  return action, prev_x

def play_game(env, model):
  observation = env.reset()

  frames = []
  cumulated_reward = 0

  prev_x = None # used in computing the difference frame

  for t in range(1000):
      frames.append(env.render(mode = 'rgb_array'))
      action, prev_x = model_step(model, observation, prev_x)
      observation, reward, done, info = env.step(action)
      cumulated_reward += reward
      if done:
          print("Episode finished after {} timesteps, accumulated reward = {}".format(t+1, cumulated_reward))
          break
  print("Episode finished without success, accumulated reward = {}".format(cumulated_reward))
  env.close()
  display_frames_as_gif(frames)

def train_model(env, model, total_episodes = 100):
  hist = []
  observation = env.reset()

  prev_x = None # used in computing the difference frame
  xs,hs,dlogps,drs = [],[],[],[]
  running_reward = None
  reward_sum = 0
  episode_number = 0

  while True:
  
    cur_x = prepro(observation)
    x = cur_x - prev_x if prev_x is not None else np.zeros(D)
    prev_x = cur_x

    # forward the policy network and sample an action from the returned probability
    aprob, h = policy_forward(x)
    action = 2 if np.random.uniform() < aprob else 3 # roll the dice!

    # record various intermediates (needed later for backprop)
    xs.append(x) # observation
    hs.append(h) # hidden state
    y = 1 if action == 2 else 0 # a "fake label"
    dlogps.append(y - aprob) # grad that encourages the action that was taken to be taken (see http://cs231n.github.io/neural-networks-2/#losses if confused)

    # step the environment and get new measurements
    observation, reward, done, info = env.step(action)
    reward_sum += reward

    drs.append(reward) # record reward (has to be done after we call step() to get reward for previous action)

    if done: # an episode finished
      episode_number += 1

      # stack together all inputs, hidden states, action gradients, and rewards for this episode
      epx = np.vstack(xs)
      eph = np.vstack(hs)
      epdlogp = np.vstack(dlogps)
      epr = np.vstack(drs)
      xs,hs,dlogps,drs = [],[],[],[] # reset array memory

      # compute the discounted reward backwards through time
      discounted_epr = discount_rewards(epr)
      # standardize the rewards to be unit normal (helps control the gradient estimator variance)
      discounted_epr -= np.mean(discounted_epr)
      discounted_epr /= np.std(discounted_epr)

      epdlogp *= discounted_epr # modulate the gradient with advantage (PG magic happens right here.)
      grad = policy_backward(epx, eph, epdlogp)
      for k in model: grad_buffer[k] += grad[k] # accumulate grad over batch

      # perform rmsprop parameter update every batch_size episodes
      if episode_number % batch_size == 0:
        for k,v in model.items():
          g = grad_buffer[k] # gradient
          rmsprop_cache[k] = decay_rate * rmsprop_cache[k] + (1 - decay_rate) * g**2
          model[k] += learning_rate * g / (np.sqrt(rmsprop_cache[k]) + 1e-5)
          grad_buffer[k] = np.zeros_like(v) # reset batch gradient buffer

      # boring book-keeping
      running_reward = reward_sum if running_reward is None else running_reward * 0.99 + reward_sum * 0.01
      hist.append((episode_number, reward_sum, running_reward))
      print ('episode %f, reward total was %f. running mean: %f' % (episode_number, reward_sum, running_reward))
      reward_sum = 0
      observation = env.reset() # reset env
      prev_x = None
      if episode_number == total_episodes: 
        return hist

   
    

  logger.warn(
  deprecation(
  deprecation(


In [2]:
%time hist1 = train_model(env, model, total_episodes=7000)

  logger.deprecation(


episode 1.000000, reward total was -21.000000. running mean: -21.000000
episode 2.000000, reward total was -21.000000. running mean: -21.000000
episode 3.000000, reward total was -20.000000. running mean: -20.990000
episode 4.000000, reward total was -21.000000. running mean: -20.990100
episode 5.000000, reward total was -20.000000. running mean: -20.980199
episode 6.000000, reward total was -21.000000. running mean: -20.980397
episode 7.000000, reward total was -21.000000. running mean: -20.980593
episode 8.000000, reward total was -21.000000. running mean: -20.980787
episode 9.000000, reward total was -20.000000. running mean: -20.970979
episode 10.000000, reward total was -21.000000. running mean: -20.971269
episode 11.000000, reward total was -19.000000. running mean: -20.951557
episode 12.000000, reward total was -21.000000. running mean: -20.952041
episode 13.000000, reward total was -21.000000. running mean: -20.952521
episode 14.000000, reward total was -20.000000. running mean

episode 114.000000, reward total was -21.000000. running mean: -20.550743
episode 115.000000, reward total was -21.000000. running mean: -20.555236
episode 116.000000, reward total was -20.000000. running mean: -20.549684
episode 117.000000, reward total was -21.000000. running mean: -20.554187
episode 118.000000, reward total was -20.000000. running mean: -20.548645
episode 119.000000, reward total was -21.000000. running mean: -20.553158
episode 120.000000, reward total was -19.000000. running mean: -20.537627
episode 121.000000, reward total was -21.000000. running mean: -20.542251
episode 122.000000, reward total was -21.000000. running mean: -20.546828
episode 123.000000, reward total was -21.000000. running mean: -20.551360
episode 124.000000, reward total was -21.000000. running mean: -20.555846
episode 125.000000, reward total was -21.000000. running mean: -20.560288
episode 126.000000, reward total was -20.000000. running mean: -20.554685
episode 127.000000, reward total was -

episode 225.000000, reward total was -20.000000. running mean: -20.402876
episode 226.000000, reward total was -21.000000. running mean: -20.408847
episode 227.000000, reward total was -21.000000. running mean: -20.414758
episode 228.000000, reward total was -20.000000. running mean: -20.410611
episode 229.000000, reward total was -19.000000. running mean: -20.396505
episode 230.000000, reward total was -21.000000. running mean: -20.402540
episode 231.000000, reward total was -20.000000. running mean: -20.398514
episode 232.000000, reward total was -21.000000. running mean: -20.404529
episode 233.000000, reward total was -20.000000. running mean: -20.400484
episode 234.000000, reward total was -21.000000. running mean: -20.406479
episode 235.000000, reward total was -19.000000. running mean: -20.392414
episode 236.000000, reward total was -19.000000. running mean: -20.378490
episode 237.000000, reward total was -20.000000. running mean: -20.374705
episode 238.000000, reward total was -

episode 336.000000, reward total was -19.000000. running mean: -20.384421
episode 337.000000, reward total was -21.000000. running mean: -20.390577
episode 338.000000, reward total was -21.000000. running mean: -20.396671
episode 339.000000, reward total was -21.000000. running mean: -20.402705
episode 340.000000, reward total was -19.000000. running mean: -20.388678
episode 341.000000, reward total was -20.000000. running mean: -20.384791
episode 342.000000, reward total was -20.000000. running mean: -20.380943
episode 343.000000, reward total was -19.000000. running mean: -20.367134
episode 344.000000, reward total was -20.000000. running mean: -20.363462
episode 345.000000, reward total was -19.000000. running mean: -20.349828
episode 346.000000, reward total was -19.000000. running mean: -20.336329
episode 347.000000, reward total was -21.000000. running mean: -20.342966
episode 348.000000, reward total was -21.000000. running mean: -20.349536
episode 349.000000, reward total was -

episode 447.000000, reward total was -20.000000. running mean: -20.322165
episode 448.000000, reward total was -21.000000. running mean: -20.328943
episode 449.000000, reward total was -21.000000. running mean: -20.335654
episode 450.000000, reward total was -20.000000. running mean: -20.332297
episode 451.000000, reward total was -20.000000. running mean: -20.328974
episode 452.000000, reward total was -21.000000. running mean: -20.335685
episode 453.000000, reward total was -19.000000. running mean: -20.322328
episode 454.000000, reward total was -21.000000. running mean: -20.329105
episode 455.000000, reward total was -21.000000. running mean: -20.335814
episode 456.000000, reward total was -20.000000. running mean: -20.332455
episode 457.000000, reward total was -21.000000. running mean: -20.339131
episode 458.000000, reward total was -21.000000. running mean: -20.345740
episode 459.000000, reward total was -21.000000. running mean: -20.352282
episode 460.000000, reward total was -

episode 558.000000, reward total was -20.000000. running mean: -20.290663
episode 559.000000, reward total was -19.000000. running mean: -20.277757
episode 560.000000, reward total was -21.000000. running mean: -20.284979
episode 561.000000, reward total was -20.000000. running mean: -20.282129
episode 562.000000, reward total was -21.000000. running mean: -20.289308
episode 563.000000, reward total was -21.000000. running mean: -20.296415
episode 564.000000, reward total was -20.000000. running mean: -20.293451
episode 565.000000, reward total was -21.000000. running mean: -20.300516
episode 566.000000, reward total was -21.000000. running mean: -20.307511
episode 567.000000, reward total was -20.000000. running mean: -20.304436
episode 568.000000, reward total was -19.000000. running mean: -20.291392
episode 569.000000, reward total was -20.000000. running mean: -20.288478
episode 570.000000, reward total was -17.000000. running mean: -20.255593
episode 571.000000, reward total was -

episode 669.000000, reward total was -18.000000. running mean: -20.221858
episode 670.000000, reward total was -20.000000. running mean: -20.219639
episode 671.000000, reward total was -19.000000. running mean: -20.207443
episode 672.000000, reward total was -21.000000. running mean: -20.215369
episode 673.000000, reward total was -20.000000. running mean: -20.213215
episode 674.000000, reward total was -19.000000. running mean: -20.201083
episode 675.000000, reward total was -20.000000. running mean: -20.199072
episode 676.000000, reward total was -19.000000. running mean: -20.187081
episode 677.000000, reward total was -20.000000. running mean: -20.185210
episode 678.000000, reward total was -20.000000. running mean: -20.183358
episode 679.000000, reward total was -20.000000. running mean: -20.181525
episode 680.000000, reward total was -18.000000. running mean: -20.159709
episode 681.000000, reward total was -20.000000. running mean: -20.158112
episode 682.000000, reward total was -

episode 780.000000, reward total was -20.000000. running mean: -20.183010
episode 781.000000, reward total was -19.000000. running mean: -20.171180
episode 782.000000, reward total was -21.000000. running mean: -20.179468
episode 783.000000, reward total was -21.000000. running mean: -20.187673
episode 784.000000, reward total was -21.000000. running mean: -20.195797
episode 785.000000, reward total was -20.000000. running mean: -20.193839
episode 786.000000, reward total was -21.000000. running mean: -20.201900
episode 787.000000, reward total was -21.000000. running mean: -20.209881
episode 788.000000, reward total was -20.000000. running mean: -20.207783
episode 789.000000, reward total was -21.000000. running mean: -20.215705
episode 790.000000, reward total was -21.000000. running mean: -20.223548
episode 791.000000, reward total was -20.000000. running mean: -20.221312
episode 792.000000, reward total was -20.000000. running mean: -20.219099
episode 793.000000, reward total was -

episode 891.000000, reward total was -21.000000. running mean: -20.193336
episode 892.000000, reward total was -19.000000. running mean: -20.181403
episode 893.000000, reward total was -19.000000. running mean: -20.169589
episode 894.000000, reward total was -20.000000. running mean: -20.167893
episode 895.000000, reward total was -21.000000. running mean: -20.176214
episode 896.000000, reward total was -21.000000. running mean: -20.184452
episode 897.000000, reward total was -21.000000. running mean: -20.192607
episode 898.000000, reward total was -19.000000. running mean: -20.180681
episode 899.000000, reward total was -21.000000. running mean: -20.188874
episode 900.000000, reward total was -21.000000. running mean: -20.196986
episode 901.000000, reward total was -20.000000. running mean: -20.195016
episode 902.000000, reward total was -21.000000. running mean: -20.203066
episode 903.000000, reward total was -20.000000. running mean: -20.201035
episode 904.000000, reward total was -

episode 1002.000000, reward total was -21.000000. running mean: -20.106127
episode 1003.000000, reward total was -20.000000. running mean: -20.105066
episode 1004.000000, reward total was -21.000000. running mean: -20.114015
episode 1005.000000, reward total was -20.000000. running mean: -20.112875
episode 1006.000000, reward total was -21.000000. running mean: -20.121746
episode 1007.000000, reward total was -19.000000. running mean: -20.110529
episode 1008.000000, reward total was -20.000000. running mean: -20.109424
episode 1009.000000, reward total was -21.000000. running mean: -20.118329
episode 1010.000000, reward total was -21.000000. running mean: -20.127146
episode 1011.000000, reward total was -21.000000. running mean: -20.135875
episode 1012.000000, reward total was -21.000000. running mean: -20.144516
episode 1013.000000, reward total was -20.000000. running mean: -20.143071
episode 1014.000000, reward total was -20.000000. running mean: -20.141640
episode 1015.000000, rewa

episode 1112.000000, reward total was -19.000000. running mean: -19.877604
episode 1113.000000, reward total was -18.000000. running mean: -19.858828
episode 1114.000000, reward total was -19.000000. running mean: -19.850240
episode 1115.000000, reward total was -20.000000. running mean: -19.851738
episode 1116.000000, reward total was -21.000000. running mean: -19.863220
episode 1117.000000, reward total was -19.000000. running mean: -19.854588
episode 1118.000000, reward total was -21.000000. running mean: -19.866042
episode 1119.000000, reward total was -20.000000. running mean: -19.867382
episode 1120.000000, reward total was -21.000000. running mean: -19.878708
episode 1121.000000, reward total was -18.000000. running mean: -19.859921
episode 1122.000000, reward total was -21.000000. running mean: -19.871322
episode 1123.000000, reward total was -21.000000. running mean: -19.882609
episode 1124.000000, reward total was -20.000000. running mean: -19.883782
episode 1125.000000, rewa

episode 1222.000000, reward total was -17.000000. running mean: -19.764560
episode 1223.000000, reward total was -21.000000. running mean: -19.776915
episode 1224.000000, reward total was -20.000000. running mean: -19.779146
episode 1225.000000, reward total was -21.000000. running mean: -19.791354
episode 1226.000000, reward total was -21.000000. running mean: -19.803441
episode 1227.000000, reward total was -20.000000. running mean: -19.805406
episode 1228.000000, reward total was -20.000000. running mean: -19.807352
episode 1229.000000, reward total was -19.000000. running mean: -19.799279
episode 1230.000000, reward total was -21.000000. running mean: -19.811286
episode 1231.000000, reward total was -17.000000. running mean: -19.783173
episode 1232.000000, reward total was -21.000000. running mean: -19.795341
episode 1233.000000, reward total was -20.000000. running mean: -19.797388
episode 1234.000000, reward total was -19.000000. running mean: -19.789414
episode 1235.000000, rewa

episode 1332.000000, reward total was -20.000000. running mean: -19.817355
episode 1333.000000, reward total was -19.000000. running mean: -19.809181
episode 1334.000000, reward total was -19.000000. running mean: -19.801090
episode 1335.000000, reward total was -21.000000. running mean: -19.813079
episode 1336.000000, reward total was -20.000000. running mean: -19.814948
episode 1337.000000, reward total was -20.000000. running mean: -19.816798
episode 1338.000000, reward total was -18.000000. running mean: -19.798630
episode 1339.000000, reward total was -20.000000. running mean: -19.800644
episode 1340.000000, reward total was -21.000000. running mean: -19.812638
episode 1341.000000, reward total was -21.000000. running mean: -19.824511
episode 1342.000000, reward total was -20.000000. running mean: -19.826266
episode 1343.000000, reward total was -21.000000. running mean: -19.838004
episode 1344.000000, reward total was -20.000000. running mean: -19.839624
episode 1345.000000, rewa

episode 1442.000000, reward total was -21.000000. running mean: -19.846180
episode 1443.000000, reward total was -21.000000. running mean: -19.857718
episode 1444.000000, reward total was -19.000000. running mean: -19.849141
episode 1445.000000, reward total was -20.000000. running mean: -19.850650
episode 1446.000000, reward total was -21.000000. running mean: -19.862143
episode 1447.000000, reward total was -20.000000. running mean: -19.863522
episode 1448.000000, reward total was -20.000000. running mean: -19.864886
episode 1449.000000, reward total was -21.000000. running mean: -19.876238
episode 1450.000000, reward total was -21.000000. running mean: -19.887475
episode 1451.000000, reward total was -21.000000. running mean: -19.898600
episode 1452.000000, reward total was -19.000000. running mean: -19.889614
episode 1453.000000, reward total was -19.000000. running mean: -19.880718
episode 1454.000000, reward total was -18.000000. running mean: -19.861911
episode 1455.000000, rewa

episode 1552.000000, reward total was -21.000000. running mean: -19.719044
episode 1553.000000, reward total was -20.000000. running mean: -19.721854
episode 1554.000000, reward total was -21.000000. running mean: -19.734635
episode 1555.000000, reward total was -20.000000. running mean: -19.737289
episode 1556.000000, reward total was -19.000000. running mean: -19.729916
episode 1557.000000, reward total was -19.000000. running mean: -19.722617
episode 1558.000000, reward total was -21.000000. running mean: -19.735391
episode 1559.000000, reward total was -18.000000. running mean: -19.718037
episode 1560.000000, reward total was -20.000000. running mean: -19.720857
episode 1561.000000, reward total was -20.000000. running mean: -19.723648
episode 1562.000000, reward total was -21.000000. running mean: -19.736411
episode 1563.000000, reward total was -16.000000. running mean: -19.699047
episode 1564.000000, reward total was -20.000000. running mean: -19.702057
episode 1565.000000, rewa

episode 1662.000000, reward total was -20.000000. running mean: -19.817130
episode 1663.000000, reward total was -21.000000. running mean: -19.828959
episode 1664.000000, reward total was -16.000000. running mean: -19.790669
episode 1665.000000, reward total was -21.000000. running mean: -19.802763
episode 1666.000000, reward total was -21.000000. running mean: -19.814735
episode 1667.000000, reward total was -20.000000. running mean: -19.816588
episode 1668.000000, reward total was -19.000000. running mean: -19.808422
episode 1669.000000, reward total was -20.000000. running mean: -19.810338
episode 1670.000000, reward total was -19.000000. running mean: -19.802234
episode 1671.000000, reward total was -20.000000. running mean: -19.804212
episode 1672.000000, reward total was -19.000000. running mean: -19.796170
episode 1673.000000, reward total was -20.000000. running mean: -19.798208
episode 1674.000000, reward total was -19.000000. running mean: -19.790226
episode 1675.000000, rewa

episode 1772.000000, reward total was -20.000000. running mean: -19.687395
episode 1773.000000, reward total was -20.000000. running mean: -19.690521
episode 1774.000000, reward total was -20.000000. running mean: -19.693616
episode 1775.000000, reward total was -19.000000. running mean: -19.686680
episode 1776.000000, reward total was -20.000000. running mean: -19.689813
episode 1777.000000, reward total was -21.000000. running mean: -19.702915
episode 1778.000000, reward total was -20.000000. running mean: -19.705886
episode 1779.000000, reward total was -18.000000. running mean: -19.688827
episode 1780.000000, reward total was -21.000000. running mean: -19.701939
episode 1781.000000, reward total was -18.000000. running mean: -19.684919
episode 1782.000000, reward total was -18.000000. running mean: -19.668070
episode 1783.000000, reward total was -18.000000. running mean: -19.651389
episode 1784.000000, reward total was -20.000000. running mean: -19.654875
episode 1785.000000, rewa

episode 1882.000000, reward total was -20.000000. running mean: -19.546068
episode 1883.000000, reward total was -19.000000. running mean: -19.540608
episode 1884.000000, reward total was -17.000000. running mean: -19.515202
episode 1885.000000, reward total was -21.000000. running mean: -19.530050
episode 1886.000000, reward total was -21.000000. running mean: -19.544749
episode 1887.000000, reward total was -18.000000. running mean: -19.529302
episode 1888.000000, reward total was -21.000000. running mean: -19.544009
episode 1889.000000, reward total was -20.000000. running mean: -19.548568
episode 1890.000000, reward total was -19.000000. running mean: -19.543083
episode 1891.000000, reward total was -18.000000. running mean: -19.527652
episode 1892.000000, reward total was -21.000000. running mean: -19.542375
episode 1893.000000, reward total was -17.000000. running mean: -19.516952
episode 1894.000000, reward total was -18.000000. running mean: -19.501782
episode 1895.000000, rewa

episode 1992.000000, reward total was -17.000000. running mean: -19.352500
episode 1993.000000, reward total was -20.000000. running mean: -19.358975
episode 1994.000000, reward total was -21.000000. running mean: -19.375386
episode 1995.000000, reward total was -20.000000. running mean: -19.381632
episode 1996.000000, reward total was -19.000000. running mean: -19.377816
episode 1997.000000, reward total was -20.000000. running mean: -19.384037
episode 1998.000000, reward total was -20.000000. running mean: -19.390197
episode 1999.000000, reward total was -20.000000. running mean: -19.396295
episode 2000.000000, reward total was -21.000000. running mean: -19.412332
episode 2001.000000, reward total was -21.000000. running mean: -19.428209
episode 2002.000000, reward total was -20.000000. running mean: -19.433927
episode 2003.000000, reward total was -20.000000. running mean: -19.439587
episode 2004.000000, reward total was -20.000000. running mean: -19.445192
episode 2005.000000, rewa

episode 2102.000000, reward total was -20.000000. running mean: -19.215598
episode 2103.000000, reward total was -21.000000. running mean: -19.233442
episode 2104.000000, reward total was -19.000000. running mean: -19.231108
episode 2105.000000, reward total was -21.000000. running mean: -19.248796
episode 2106.000000, reward total was -19.000000. running mean: -19.246308
episode 2107.000000, reward total was -20.000000. running mean: -19.253845
episode 2108.000000, reward total was -20.000000. running mean: -19.261307
episode 2109.000000, reward total was -19.000000. running mean: -19.258694
episode 2110.000000, reward total was -19.000000. running mean: -19.256107
episode 2111.000000, reward total was -19.000000. running mean: -19.253546
episode 2112.000000, reward total was -21.000000. running mean: -19.271010
episode 2113.000000, reward total was -21.000000. running mean: -19.288300
episode 2114.000000, reward total was -20.000000. running mean: -19.295417
episode 2115.000000, rewa

episode 2212.000000, reward total was -19.000000. running mean: -19.379904
episode 2213.000000, reward total was -19.000000. running mean: -19.376105
episode 2214.000000, reward total was -19.000000. running mean: -19.372344
episode 2215.000000, reward total was -19.000000. running mean: -19.368621
episode 2216.000000, reward total was -19.000000. running mean: -19.364935
episode 2217.000000, reward total was -19.000000. running mean: -19.361285
episode 2218.000000, reward total was -19.000000. running mean: -19.357672
episode 2219.000000, reward total was -21.000000. running mean: -19.374096
episode 2220.000000, reward total was -21.000000. running mean: -19.390355
episode 2221.000000, reward total was -20.000000. running mean: -19.396451
episode 2222.000000, reward total was -19.000000. running mean: -19.392487
episode 2223.000000, reward total was -18.000000. running mean: -19.378562
episode 2224.000000, reward total was -18.000000. running mean: -19.364776
episode 2225.000000, rewa

episode 2322.000000, reward total was -21.000000. running mean: -19.308937
episode 2323.000000, reward total was -18.000000. running mean: -19.295847
episode 2324.000000, reward total was -18.000000. running mean: -19.282889
episode 2325.000000, reward total was -18.000000. running mean: -19.270060
episode 2326.000000, reward total was -18.000000. running mean: -19.257359
episode 2327.000000, reward total was -21.000000. running mean: -19.274786
episode 2328.000000, reward total was -19.000000. running mean: -19.272038
episode 2329.000000, reward total was -18.000000. running mean: -19.259318
episode 2330.000000, reward total was -19.000000. running mean: -19.256724
episode 2331.000000, reward total was -21.000000. running mean: -19.274157
episode 2332.000000, reward total was -18.000000. running mean: -19.261416
episode 2333.000000, reward total was -20.000000. running mean: -19.268801
episode 2334.000000, reward total was -20.000000. running mean: -19.276113
episode 2335.000000, rewa

episode 2432.000000, reward total was -16.000000. running mean: -19.100538
episode 2433.000000, reward total was -21.000000. running mean: -19.119533
episode 2434.000000, reward total was -20.000000. running mean: -19.128338
episode 2435.000000, reward total was -20.000000. running mean: -19.137054
episode 2436.000000, reward total was -20.000000. running mean: -19.145684
episode 2437.000000, reward total was -19.000000. running mean: -19.144227
episode 2438.000000, reward total was -18.000000. running mean: -19.132784
episode 2439.000000, reward total was -19.000000. running mean: -19.131457
episode 2440.000000, reward total was -20.000000. running mean: -19.140142
episode 2441.000000, reward total was -20.000000. running mean: -19.148741
episode 2442.000000, reward total was -20.000000. running mean: -19.157253
episode 2443.000000, reward total was -19.000000. running mean: -19.155681
episode 2444.000000, reward total was -19.000000. running mean: -19.154124
episode 2445.000000, rewa

episode 2542.000000, reward total was -19.000000. running mean: -19.110730
episode 2543.000000, reward total was -17.000000. running mean: -19.089623
episode 2544.000000, reward total was -21.000000. running mean: -19.108727
episode 2545.000000, reward total was -19.000000. running mean: -19.107640
episode 2546.000000, reward total was -20.000000. running mean: -19.116563
episode 2547.000000, reward total was -19.000000. running mean: -19.115398
episode 2548.000000, reward total was -20.000000. running mean: -19.124244
episode 2549.000000, reward total was -20.000000. running mean: -19.133001
episode 2550.000000, reward total was -18.000000. running mean: -19.121671
episode 2551.000000, reward total was -18.000000. running mean: -19.110454
episode 2552.000000, reward total was -21.000000. running mean: -19.129350
episode 2553.000000, reward total was -21.000000. running mean: -19.148056
episode 2554.000000, reward total was -19.000000. running mean: -19.146576
episode 2555.000000, rewa

episode 2652.000000, reward total was -18.000000. running mean: -18.986718
episode 2653.000000, reward total was -19.000000. running mean: -18.986850
episode 2654.000000, reward total was -17.000000. running mean: -18.966982
episode 2655.000000, reward total was -21.000000. running mean: -18.987312
episode 2656.000000, reward total was -18.000000. running mean: -18.977439
episode 2657.000000, reward total was -20.000000. running mean: -18.987665
episode 2658.000000, reward total was -21.000000. running mean: -19.007788
episode 2659.000000, reward total was -20.000000. running mean: -19.017710
episode 2660.000000, reward total was -21.000000. running mean: -19.037533
episode 2661.000000, reward total was -18.000000. running mean: -19.027158
episode 2662.000000, reward total was -21.000000. running mean: -19.046886
episode 2663.000000, reward total was -19.000000. running mean: -19.046417
episode 2664.000000, reward total was -20.000000. running mean: -19.055953
episode 2665.000000, rewa

episode 2762.000000, reward total was -21.000000. running mean: -18.985685
episode 2763.000000, reward total was -14.000000. running mean: -18.935828
episode 2764.000000, reward total was -18.000000. running mean: -18.926470
episode 2765.000000, reward total was -21.000000. running mean: -18.947205
episode 2766.000000, reward total was -20.000000. running mean: -18.957733
episode 2767.000000, reward total was -21.000000. running mean: -18.978156
episode 2768.000000, reward total was -18.000000. running mean: -18.968374
episode 2769.000000, reward total was -18.000000. running mean: -18.958690
episode 2770.000000, reward total was -20.000000. running mean: -18.969103
episode 2771.000000, reward total was -20.000000. running mean: -18.979412
episode 2772.000000, reward total was -17.000000. running mean: -18.959618
episode 2773.000000, reward total was -17.000000. running mean: -18.940022
episode 2774.000000, reward total was -21.000000. running mean: -18.960622
episode 2775.000000, rewa

episode 2872.000000, reward total was -21.000000. running mean: -18.994311
episode 2873.000000, reward total was -20.000000. running mean: -19.004368
episode 2874.000000, reward total was -20.000000. running mean: -19.014324
episode 2875.000000, reward total was -18.000000. running mean: -19.004181
episode 2876.000000, reward total was -17.000000. running mean: -18.984139
episode 2877.000000, reward total was -19.000000. running mean: -18.984297
episode 2878.000000, reward total was -19.000000. running mean: -18.984455
episode 2879.000000, reward total was -21.000000. running mean: -19.004610
episode 2880.000000, reward total was -21.000000. running mean: -19.024564
episode 2881.000000, reward total was -19.000000. running mean: -19.024318
episode 2882.000000, reward total was -18.000000. running mean: -19.014075
episode 2883.000000, reward total was -17.000000. running mean: -18.993934
episode 2884.000000, reward total was -20.000000. running mean: -19.003995
episode 2885.000000, rewa

episode 2982.000000, reward total was -19.000000. running mean: -18.676032
episode 2983.000000, reward total was -19.000000. running mean: -18.679272
episode 2984.000000, reward total was -18.000000. running mean: -18.672479
episode 2985.000000, reward total was -17.000000. running mean: -18.655754
episode 2986.000000, reward total was -21.000000. running mean: -18.679197
episode 2987.000000, reward total was -17.000000. running mean: -18.662405
episode 2988.000000, reward total was -17.000000. running mean: -18.645781
episode 2989.000000, reward total was -21.000000. running mean: -18.669323
episode 2990.000000, reward total was -18.000000. running mean: -18.662630
episode 2991.000000, reward total was -18.000000. running mean: -18.656003
episode 2992.000000, reward total was -20.000000. running mean: -18.669443
episode 2993.000000, reward total was -20.000000. running mean: -18.682749
episode 2994.000000, reward total was -17.000000. running mean: -18.665921
episode 2995.000000, rewa

episode 3092.000000, reward total was -21.000000. running mean: -18.574252
episode 3093.000000, reward total was -18.000000. running mean: -18.568509
episode 3094.000000, reward total was -20.000000. running mean: -18.582824
episode 3095.000000, reward total was -21.000000. running mean: -18.606996
episode 3096.000000, reward total was -19.000000. running mean: -18.610926
episode 3097.000000, reward total was -19.000000. running mean: -18.614817
episode 3098.000000, reward total was -20.000000. running mean: -18.628668
episode 3099.000000, reward total was -18.000000. running mean: -18.622382
episode 3100.000000, reward total was -20.000000. running mean: -18.636158
episode 3101.000000, reward total was -19.000000. running mean: -18.639796
episode 3102.000000, reward total was -17.000000. running mean: -18.623398
episode 3103.000000, reward total was -21.000000. running mean: -18.647164
episode 3104.000000, reward total was -18.000000. running mean: -18.640693
episode 3105.000000, rewa

episode 3202.000000, reward total was -15.000000. running mean: -18.528531
episode 3203.000000, reward total was -15.000000. running mean: -18.493245
episode 3204.000000, reward total was -17.000000. running mean: -18.478313
episode 3205.000000, reward total was -21.000000. running mean: -18.503530
episode 3206.000000, reward total was -20.000000. running mean: -18.518495
episode 3207.000000, reward total was -16.000000. running mean: -18.493310
episode 3208.000000, reward total was -15.000000. running mean: -18.458376
episode 3209.000000, reward total was -20.000000. running mean: -18.473793
episode 3210.000000, reward total was -19.000000. running mean: -18.479055
episode 3211.000000, reward total was -20.000000. running mean: -18.494264
episode 3212.000000, reward total was -19.000000. running mean: -18.499322
episode 3213.000000, reward total was -18.000000. running mean: -18.494328
episode 3214.000000, reward total was -19.000000. running mean: -18.499385
episode 3215.000000, rewa

episode 3312.000000, reward total was -16.000000. running mean: -18.329350
episode 3313.000000, reward total was -16.000000. running mean: -18.306056
episode 3314.000000, reward total was -21.000000. running mean: -18.332996
episode 3315.000000, reward total was -19.000000. running mean: -18.339666
episode 3316.000000, reward total was -21.000000. running mean: -18.366269
episode 3317.000000, reward total was -17.000000. running mean: -18.352606
episode 3318.000000, reward total was -18.000000. running mean: -18.349080
episode 3319.000000, reward total was -21.000000. running mean: -18.375589
episode 3320.000000, reward total was -15.000000. running mean: -18.341833
episode 3321.000000, reward total was -19.000000. running mean: -18.348415
episode 3322.000000, reward total was -16.000000. running mean: -18.324931
episode 3323.000000, reward total was -19.000000. running mean: -18.331682
episode 3324.000000, reward total was -20.000000. running mean: -18.348365
episode 3325.000000, rewa

episode 3422.000000, reward total was -17.000000. running mean: -18.369305
episode 3423.000000, reward total was -19.000000. running mean: -18.375612
episode 3424.000000, reward total was -17.000000. running mean: -18.361856
episode 3425.000000, reward total was -19.000000. running mean: -18.368238
episode 3426.000000, reward total was -17.000000. running mean: -18.354555
episode 3427.000000, reward total was -20.000000. running mean: -18.371010
episode 3428.000000, reward total was -20.000000. running mean: -18.387300
episode 3429.000000, reward total was -17.000000. running mean: -18.373427
episode 3430.000000, reward total was -19.000000. running mean: -18.379692
episode 3431.000000, reward total was -18.000000. running mean: -18.375895
episode 3432.000000, reward total was -20.000000. running mean: -18.392136
episode 3433.000000, reward total was -17.000000. running mean: -18.378215
episode 3434.000000, reward total was -16.000000. running mean: -18.354433
episode 3435.000000, rewa

episode 3532.000000, reward total was -17.000000. running mean: -18.160479
episode 3533.000000, reward total was -13.000000. running mean: -18.108874
episode 3534.000000, reward total was -18.000000. running mean: -18.107785
episode 3535.000000, reward total was -18.000000. running mean: -18.106708
episode 3536.000000, reward total was -14.000000. running mean: -18.065640
episode 3537.000000, reward total was -17.000000. running mean: -18.054984
episode 3538.000000, reward total was -19.000000. running mean: -18.064434
episode 3539.000000, reward total was -17.000000. running mean: -18.053790
episode 3540.000000, reward total was -21.000000. running mean: -18.083252
episode 3541.000000, reward total was -18.000000. running mean: -18.082419
episode 3542.000000, reward total was -20.000000. running mean: -18.101595
episode 3543.000000, reward total was -18.000000. running mean: -18.100579
episode 3544.000000, reward total was -17.000000. running mean: -18.089574
episode 3545.000000, rewa

episode 3642.000000, reward total was -12.000000. running mean: -17.816898
episode 3643.000000, reward total was -15.000000. running mean: -17.788729
episode 3644.000000, reward total was -19.000000. running mean: -17.800842
episode 3645.000000, reward total was -19.000000. running mean: -17.812833
episode 3646.000000, reward total was -17.000000. running mean: -17.804705
episode 3647.000000, reward total was -12.000000. running mean: -17.746658
episode 3648.000000, reward total was -17.000000. running mean: -17.739191
episode 3649.000000, reward total was -17.000000. running mean: -17.731800
episode 3650.000000, reward total was -17.000000. running mean: -17.724482
episode 3651.000000, reward total was -18.000000. running mean: -17.727237
episode 3652.000000, reward total was -14.000000. running mean: -17.689964
episode 3653.000000, reward total was -19.000000. running mean: -17.703065
episode 3654.000000, reward total was -20.000000. running mean: -17.726034
episode 3655.000000, rewa

episode 3752.000000, reward total was -16.000000. running mean: -17.839373
episode 3753.000000, reward total was -19.000000. running mean: -17.850980
episode 3754.000000, reward total was -16.000000. running mean: -17.832470
episode 3755.000000, reward total was -15.000000. running mean: -17.804145
episode 3756.000000, reward total was -16.000000. running mean: -17.786104
episode 3757.000000, reward total was -14.000000. running mean: -17.748243
episode 3758.000000, reward total was -17.000000. running mean: -17.740760
episode 3759.000000, reward total was -19.000000. running mean: -17.753353
episode 3760.000000, reward total was -19.000000. running mean: -17.765819
episode 3761.000000, reward total was -16.000000. running mean: -17.748161
episode 3762.000000, reward total was -18.000000. running mean: -17.750679
episode 3763.000000, reward total was -16.000000. running mean: -17.733173
episode 3764.000000, reward total was -21.000000. running mean: -17.765841
episode 3765.000000, rewa

episode 3862.000000, reward total was -14.000000. running mean: -17.899683
episode 3863.000000, reward total was -16.000000. running mean: -17.880686
episode 3864.000000, reward total was -16.000000. running mean: -17.861879
episode 3865.000000, reward total was -14.000000. running mean: -17.823260
episode 3866.000000, reward total was -16.000000. running mean: -17.805028
episode 3867.000000, reward total was -19.000000. running mean: -17.816978
episode 3868.000000, reward total was -19.000000. running mean: -17.828808
episode 3869.000000, reward total was -16.000000. running mean: -17.810520
episode 3870.000000, reward total was -21.000000. running mean: -17.842415
episode 3871.000000, reward total was -18.000000. running mean: -17.843990
episode 3872.000000, reward total was -18.000000. running mean: -17.845550
episode 3873.000000, reward total was -13.000000. running mean: -17.797095
episode 3874.000000, reward total was -16.000000. running mean: -17.779124
episode 3875.000000, rewa

episode 3972.000000, reward total was -15.000000. running mean: -17.616246
episode 3973.000000, reward total was -17.000000. running mean: -17.610084
episode 3974.000000, reward total was -21.000000. running mean: -17.643983
episode 3975.000000, reward total was -17.000000. running mean: -17.637543
episode 3976.000000, reward total was -14.000000. running mean: -17.601168
episode 3977.000000, reward total was -19.000000. running mean: -17.615156
episode 3978.000000, reward total was -16.000000. running mean: -17.599004
episode 3979.000000, reward total was -15.000000. running mean: -17.573014
episode 3980.000000, reward total was -18.000000. running mean: -17.577284
episode 3981.000000, reward total was -16.000000. running mean: -17.561511
episode 3982.000000, reward total was -11.000000. running mean: -17.495896
episode 3983.000000, reward total was -15.000000. running mean: -17.470937
episode 3984.000000, reward total was -18.000000. running mean: -17.476228
episode 3985.000000, rewa

episode 4082.000000, reward total was -19.000000. running mean: -17.459534
episode 4083.000000, reward total was -15.000000. running mean: -17.434938
episode 4084.000000, reward total was -19.000000. running mean: -17.450589
episode 4085.000000, reward total was -20.000000. running mean: -17.476083
episode 4086.000000, reward total was -17.000000. running mean: -17.471322
episode 4087.000000, reward total was -19.000000. running mean: -17.486609
episode 4088.000000, reward total was -16.000000. running mean: -17.471743
episode 4089.000000, reward total was -17.000000. running mean: -17.467026
episode 4090.000000, reward total was -16.000000. running mean: -17.452355
episode 4091.000000, reward total was -20.000000. running mean: -17.477832
episode 4092.000000, reward total was -20.000000. running mean: -17.503053
episode 4093.000000, reward total was -16.000000. running mean: -17.488023
episode 4094.000000, reward total was -15.000000. running mean: -17.463143
episode 4095.000000, rewa

episode 4192.000000, reward total was -18.000000. running mean: -17.387306
episode 4193.000000, reward total was -17.000000. running mean: -17.383433
episode 4194.000000, reward total was -19.000000. running mean: -17.399599
episode 4195.000000, reward total was -17.000000. running mean: -17.395603
episode 4196.000000, reward total was -19.000000. running mean: -17.411647
episode 4197.000000, reward total was -16.000000. running mean: -17.397531
episode 4198.000000, reward total was -18.000000. running mean: -17.403555
episode 4199.000000, reward total was -18.000000. running mean: -17.409520
episode 4200.000000, reward total was -17.000000. running mean: -17.405424
episode 4201.000000, reward total was -20.000000. running mean: -17.431370
episode 4202.000000, reward total was -15.000000. running mean: -17.407057
episode 4203.000000, reward total was -17.000000. running mean: -17.402986
episode 4204.000000, reward total was -19.000000. running mean: -17.418956
episode 4205.000000, rewa

episode 4302.000000, reward total was -15.000000. running mean: -17.350249
episode 4303.000000, reward total was -18.000000. running mean: -17.356746
episode 4304.000000, reward total was -16.000000. running mean: -17.343179
episode 4305.000000, reward total was -18.000000. running mean: -17.349747
episode 4306.000000, reward total was -18.000000. running mean: -17.356250
episode 4307.000000, reward total was -19.000000. running mean: -17.372687
episode 4308.000000, reward total was -14.000000. running mean: -17.338960
episode 4309.000000, reward total was -15.000000. running mean: -17.315571
episode 4310.000000, reward total was -20.000000. running mean: -17.342415
episode 4311.000000, reward total was -19.000000. running mean: -17.358991
episode 4312.000000, reward total was -19.000000. running mean: -17.375401
episode 4313.000000, reward total was -16.000000. running mean: -17.361647
episode 4314.000000, reward total was -17.000000. running mean: -17.358030
episode 4315.000000, rewa

episode 4412.000000, reward total was -12.000000. running mean: -16.994199
episode 4413.000000, reward total was -17.000000. running mean: -16.994257
episode 4414.000000, reward total was -15.000000. running mean: -16.974315
episode 4415.000000, reward total was -21.000000. running mean: -17.014572
episode 4416.000000, reward total was -18.000000. running mean: -17.024426
episode 4417.000000, reward total was -21.000000. running mean: -17.064182
episode 4418.000000, reward total was -21.000000. running mean: -17.103540
episode 4419.000000, reward total was -21.000000. running mean: -17.142504
episode 4420.000000, reward total was -16.000000. running mean: -17.131079
episode 4421.000000, reward total was -16.000000. running mean: -17.119769
episode 4422.000000, reward total was -13.000000. running mean: -17.078571
episode 4423.000000, reward total was -11.000000. running mean: -17.017785
episode 4424.000000, reward total was -17.000000. running mean: -17.017607
episode 4425.000000, rewa

episode 4522.000000, reward total was -16.000000. running mean: -16.790628
episode 4523.000000, reward total was -19.000000. running mean: -16.812722
episode 4524.000000, reward total was -14.000000. running mean: -16.784594
episode 4525.000000, reward total was -15.000000. running mean: -16.766748
episode 4526.000000, reward total was -19.000000. running mean: -16.789081
episode 4527.000000, reward total was -14.000000. running mean: -16.761190
episode 4528.000000, reward total was -16.000000. running mean: -16.753578
episode 4529.000000, reward total was -13.000000. running mean: -16.716042
episode 4530.000000, reward total was -16.000000. running mean: -16.708882
episode 4531.000000, reward total was -18.000000. running mean: -16.721793
episode 4532.000000, reward total was -15.000000. running mean: -16.704575
episode 4533.000000, reward total was -19.000000. running mean: -16.727529
episode 4534.000000, reward total was -17.000000. running mean: -16.730254
episode 4535.000000, rewa

episode 4632.000000, reward total was -15.000000. running mean: -16.337319
episode 4633.000000, reward total was -16.000000. running mean: -16.333945
episode 4634.000000, reward total was -17.000000. running mean: -16.340606
episode 4635.000000, reward total was -15.000000. running mean: -16.327200
episode 4636.000000, reward total was -15.000000. running mean: -16.313928
episode 4637.000000, reward total was -17.000000. running mean: -16.320789
episode 4638.000000, reward total was -15.000000. running mean: -16.307581
episode 4639.000000, reward total was -15.000000. running mean: -16.294505
episode 4640.000000, reward total was -18.000000. running mean: -16.311560
episode 4641.000000, reward total was -12.000000. running mean: -16.268444
episode 4642.000000, reward total was -18.000000. running mean: -16.285760
episode 4643.000000, reward total was -20.000000. running mean: -16.322902
episode 4644.000000, reward total was -18.000000. running mean: -16.339673
episode 4645.000000, rewa

episode 4742.000000, reward total was -19.000000. running mean: -16.250769
episode 4743.000000, reward total was -13.000000. running mean: -16.218261
episode 4744.000000, reward total was -17.000000. running mean: -16.226078
episode 4745.000000, reward total was -18.000000. running mean: -16.243818
episode 4746.000000, reward total was -15.000000. running mean: -16.231379
episode 4747.000000, reward total was -15.000000. running mean: -16.219066
episode 4748.000000, reward total was -19.000000. running mean: -16.246875
episode 4749.000000, reward total was -17.000000. running mean: -16.254406
episode 4750.000000, reward total was -21.000000. running mean: -16.301862
episode 4751.000000, reward total was -17.000000. running mean: -16.308844
episode 4752.000000, reward total was -18.000000. running mean: -16.325755
episode 4753.000000, reward total was -17.000000. running mean: -16.332498
episode 4754.000000, reward total was -19.000000. running mean: -16.359173
episode 4755.000000, rewa

episode 4852.000000, reward total was -19.000000. running mean: -16.307842
episode 4853.000000, reward total was -15.000000. running mean: -16.294764
episode 4854.000000, reward total was -15.000000. running mean: -16.281816
episode 4855.000000, reward total was -13.000000. running mean: -16.248998
episode 4856.000000, reward total was -13.000000. running mean: -16.216508
episode 4857.000000, reward total was -20.000000. running mean: -16.254343
episode 4858.000000, reward total was -14.000000. running mean: -16.231799
episode 4859.000000, reward total was -17.000000. running mean: -16.239481
episode 4860.000000, reward total was -17.000000. running mean: -16.247087
episode 4861.000000, reward total was -16.000000. running mean: -16.244616
episode 4862.000000, reward total was -17.000000. running mean: -16.252170
episode 4863.000000, reward total was -11.000000. running mean: -16.199648
episode 4864.000000, reward total was -12.000000. running mean: -16.157651
episode 4865.000000, rewa

episode 4962.000000, reward total was -13.000000. running mean: -15.725877
episode 4963.000000, reward total was -13.000000. running mean: -15.698618
episode 4964.000000, reward total was -18.000000. running mean: -15.721632
episode 4965.000000, reward total was -13.000000. running mean: -15.694415
episode 4966.000000, reward total was -15.000000. running mean: -15.687471
episode 4967.000000, reward total was -19.000000. running mean: -15.720596
episode 4968.000000, reward total was -13.000000. running mean: -15.693391
episode 4969.000000, reward total was -16.000000. running mean: -15.696457
episode 4970.000000, reward total was -17.000000. running mean: -15.709492
episode 4971.000000, reward total was -15.000000. running mean: -15.702397
episode 4972.000000, reward total was -13.000000. running mean: -15.675373
episode 4973.000000, reward total was -17.000000. running mean: -15.688619
episode 4974.000000, reward total was -17.000000. running mean: -15.701733
episode 4975.000000, rewa

episode 5072.000000, reward total was -13.000000. running mean: -15.665063
episode 5073.000000, reward total was -11.000000. running mean: -15.618412
episode 5074.000000, reward total was -16.000000. running mean: -15.622228
episode 5075.000000, reward total was -18.000000. running mean: -15.646005
episode 5076.000000, reward total was -9.000000. running mean: -15.579545
episode 5077.000000, reward total was -13.000000. running mean: -15.553750
episode 5078.000000, reward total was -13.000000. running mean: -15.528212
episode 5079.000000, reward total was -19.000000. running mean: -15.562930
episode 5080.000000, reward total was -15.000000. running mean: -15.557301
episode 5081.000000, reward total was -16.000000. running mean: -15.561728
episode 5082.000000, reward total was -16.000000. running mean: -15.566111
episode 5083.000000, reward total was -14.000000. running mean: -15.550450
episode 5084.000000, reward total was -12.000000. running mean: -15.514945
episode 5085.000000, rewar

episode 5182.000000, reward total was -13.000000. running mean: -15.348294
episode 5183.000000, reward total was -16.000000. running mean: -15.354811
episode 5184.000000, reward total was -13.000000. running mean: -15.331263
episode 5185.000000, reward total was -13.000000. running mean: -15.307950
episode 5186.000000, reward total was -12.000000. running mean: -15.274871
episode 5187.000000, reward total was -15.000000. running mean: -15.272122
episode 5188.000000, reward total was -14.000000. running mean: -15.259401
episode 5189.000000, reward total was -15.000000. running mean: -15.256807
episode 5190.000000, reward total was -19.000000. running mean: -15.294239
episode 5191.000000, reward total was -12.000000. running mean: -15.261297
episode 5192.000000, reward total was -3.000000. running mean: -15.138684
episode 5193.000000, reward total was -16.000000. running mean: -15.147297
episode 5194.000000, reward total was -14.000000. running mean: -15.135824
episode 5195.000000, rewar

episode 5292.000000, reward total was -18.000000. running mean: -15.068205
episode 5293.000000, reward total was -21.000000. running mean: -15.127523
episode 5294.000000, reward total was -14.000000. running mean: -15.116248
episode 5295.000000, reward total was -16.000000. running mean: -15.125085
episode 5296.000000, reward total was -12.000000. running mean: -15.093834
episode 5297.000000, reward total was -17.000000. running mean: -15.112896
episode 5298.000000, reward total was -17.000000. running mean: -15.131767
episode 5299.000000, reward total was -14.000000. running mean: -15.120449
episode 5300.000000, reward total was -20.000000. running mean: -15.169245
episode 5301.000000, reward total was -12.000000. running mean: -15.137552
episode 5302.000000, reward total was -13.000000. running mean: -15.116177
episode 5303.000000, reward total was -9.000000. running mean: -15.055015
episode 5304.000000, reward total was -17.000000. running mean: -15.074465
episode 5305.000000, rewar

episode 5402.000000, reward total was -17.000000. running mean: -15.249025
episode 5403.000000, reward total was -11.000000. running mean: -15.206535
episode 5404.000000, reward total was -9.000000. running mean: -15.144470
episode 5405.000000, reward total was -10.000000. running mean: -15.093025
episode 5406.000000, reward total was -9.000000. running mean: -15.032095
episode 5407.000000, reward total was -16.000000. running mean: -15.041774
episode 5408.000000, reward total was -19.000000. running mean: -15.081356
episode 5409.000000, reward total was -18.000000. running mean: -15.110542
episode 5410.000000, reward total was -19.000000. running mean: -15.149437
episode 5411.000000, reward total was -16.000000. running mean: -15.157943
episode 5412.000000, reward total was -12.000000. running mean: -15.126363
episode 5413.000000, reward total was -18.000000. running mean: -15.155099
episode 5414.000000, reward total was -12.000000. running mean: -15.123548
episode 5415.000000, reward

episode 5512.000000, reward total was -16.000000. running mean: -15.013258
episode 5513.000000, reward total was -11.000000. running mean: -14.973125
episode 5514.000000, reward total was -16.000000. running mean: -14.983394
episode 5515.000000, reward total was -11.000000. running mean: -14.943560
episode 5516.000000, reward total was -16.000000. running mean: -14.954124
episode 5517.000000, reward total was -17.000000. running mean: -14.974583
episode 5518.000000, reward total was -17.000000. running mean: -14.994837
episode 5519.000000, reward total was -17.000000. running mean: -15.014889
episode 5520.000000, reward total was -17.000000. running mean: -15.034740
episode 5521.000000, reward total was -6.000000. running mean: -14.944393
episode 5522.000000, reward total was -15.000000. running mean: -14.944949
episode 5523.000000, reward total was -19.000000. running mean: -14.985499
episode 5524.000000, reward total was -13.000000. running mean: -14.965644
episode 5525.000000, rewar

episode 5622.000000, reward total was -13.000000. running mean: -15.285844
episode 5623.000000, reward total was -13.000000. running mean: -15.262986
episode 5624.000000, reward total was -15.000000. running mean: -15.260356
episode 5625.000000, reward total was -10.000000. running mean: -15.207753
episode 5626.000000, reward total was -14.000000. running mean: -15.195675
episode 5627.000000, reward total was -18.000000. running mean: -15.223718
episode 5628.000000, reward total was -13.000000. running mean: -15.201481
episode 5629.000000, reward total was -14.000000. running mean: -15.189466
episode 5630.000000, reward total was -13.000000. running mean: -15.167572
episode 5631.000000, reward total was -17.000000. running mean: -15.185896
episode 5632.000000, reward total was -17.000000. running mean: -15.204037
episode 5633.000000, reward total was -16.000000. running mean: -15.211997
episode 5634.000000, reward total was -17.000000. running mean: -15.229877
episode 5635.000000, rewa

episode 5732.000000, reward total was -16.000000. running mean: -14.590539
episode 5733.000000, reward total was -17.000000. running mean: -14.614633
episode 5734.000000, reward total was -15.000000. running mean: -14.618487
episode 5735.000000, reward total was -11.000000. running mean: -14.582302
episode 5736.000000, reward total was -8.000000. running mean: -14.516479
episode 5737.000000, reward total was -12.000000. running mean: -14.491314
episode 5738.000000, reward total was -16.000000. running mean: -14.506401
episode 5739.000000, reward total was -14.000000. running mean: -14.501337
episode 5740.000000, reward total was -13.000000. running mean: -14.486324
episode 5741.000000, reward total was -11.000000. running mean: -14.451461
episode 5742.000000, reward total was -16.000000. running mean: -14.466946
episode 5743.000000, reward total was -16.000000. running mean: -14.482276
episode 5744.000000, reward total was -13.000000. running mean: -14.467454
episode 5745.000000, rewar

episode 5842.000000, reward total was -9.000000. running mean: -14.152025
episode 5843.000000, reward total was -12.000000. running mean: -14.130504
episode 5844.000000, reward total was -13.000000. running mean: -14.119199
episode 5845.000000, reward total was -16.000000. running mean: -14.138007
episode 5846.000000, reward total was -13.000000. running mean: -14.126627
episode 5847.000000, reward total was -17.000000. running mean: -14.155361
episode 5848.000000, reward total was -18.000000. running mean: -14.193807
episode 5849.000000, reward total was -11.000000. running mean: -14.161869
episode 5850.000000, reward total was -18.000000. running mean: -14.200251
episode 5851.000000, reward total was -15.000000. running mean: -14.208248
episode 5852.000000, reward total was -12.000000. running mean: -14.186166
episode 5853.000000, reward total was -12.000000. running mean: -14.164304
episode 5854.000000, reward total was -13.000000. running mean: -14.152661
episode 5855.000000, rewar

episode 5952.000000, reward total was -11.000000. running mean: -14.215673
episode 5953.000000, reward total was -14.000000. running mean: -14.213516
episode 5954.000000, reward total was -20.000000. running mean: -14.271381
episode 5955.000000, reward total was -6.000000. running mean: -14.188667
episode 5956.000000, reward total was -17.000000. running mean: -14.216780
episode 5957.000000, reward total was -19.000000. running mean: -14.264613
episode 5958.000000, reward total was -15.000000. running mean: -14.271966
episode 5959.000000, reward total was -18.000000. running mean: -14.309247
episode 5960.000000, reward total was -11.000000. running mean: -14.276154
episode 5961.000000, reward total was -9.000000. running mean: -14.223393
episode 5962.000000, reward total was -18.000000. running mean: -14.261159
episode 5963.000000, reward total was -13.000000. running mean: -14.248547
episode 5964.000000, reward total was -17.000000. running mean: -14.276062
episode 5965.000000, reward

episode 6062.000000, reward total was -15.000000. running mean: -14.175995
episode 6063.000000, reward total was -15.000000. running mean: -14.184235
episode 6064.000000, reward total was -16.000000. running mean: -14.202392
episode 6065.000000, reward total was -12.000000. running mean: -14.180368
episode 6066.000000, reward total was -19.000000. running mean: -14.228565
episode 6067.000000, reward total was -13.000000. running mean: -14.216279
episode 6068.000000, reward total was -11.000000. running mean: -14.184116
episode 6069.000000, reward total was -15.000000. running mean: -14.192275
episode 6070.000000, reward total was -15.000000. running mean: -14.200352
episode 6071.000000, reward total was -18.000000. running mean: -14.238349
episode 6072.000000, reward total was -9.000000. running mean: -14.185965
episode 6073.000000, reward total was -18.000000. running mean: -14.224106
episode 6074.000000, reward total was -17.000000. running mean: -14.251865
episode 6075.000000, rewar

episode 6172.000000, reward total was -10.000000. running mean: -13.790552
episode 6173.000000, reward total was -16.000000. running mean: -13.812647
episode 6174.000000, reward total was -9.000000. running mean: -13.764520
episode 6175.000000, reward total was -15.000000. running mean: -13.776875
episode 6176.000000, reward total was -12.000000. running mean: -13.759107
episode 6177.000000, reward total was -12.000000. running mean: -13.741515
episode 6178.000000, reward total was -15.000000. running mean: -13.754100
episode 6179.000000, reward total was -15.000000. running mean: -13.766559
episode 6180.000000, reward total was -11.000000. running mean: -13.738894
episode 6181.000000, reward total was -13.000000. running mean: -13.731505
episode 6182.000000, reward total was -9.000000. running mean: -13.684190
episode 6183.000000, reward total was -5.000000. running mean: -13.597348
episode 6184.000000, reward total was -17.000000. running mean: -13.631374
episode 6185.000000, reward 

episode 6282.000000, reward total was -14.000000. running mean: -13.529707
episode 6283.000000, reward total was -15.000000. running mean: -13.544410
episode 6284.000000, reward total was -17.000000. running mean: -13.578966
episode 6285.000000, reward total was -12.000000. running mean: -13.563176
episode 6286.000000, reward total was -14.000000. running mean: -13.567544
episode 6287.000000, reward total was -11.000000. running mean: -13.541869
episode 6288.000000, reward total was -16.000000. running mean: -13.566450
episode 6289.000000, reward total was -13.000000. running mean: -13.560785
episode 6290.000000, reward total was -13.000000. running mean: -13.555178
episode 6291.000000, reward total was -16.000000. running mean: -13.579626
episode 6292.000000, reward total was -8.000000. running mean: -13.523830
episode 6293.000000, reward total was -13.000000. running mean: -13.518591
episode 6294.000000, reward total was -12.000000. running mean: -13.503405
episode 6295.000000, rewar

episode 6392.000000, reward total was -18.000000. running mean: -13.559499
episode 6393.000000, reward total was -15.000000. running mean: -13.573904
episode 6394.000000, reward total was -13.000000. running mean: -13.568165
episode 6395.000000, reward total was -12.000000. running mean: -13.552484
episode 6396.000000, reward total was -15.000000. running mean: -13.566959
episode 6397.000000, reward total was -19.000000. running mean: -13.621289
episode 6398.000000, reward total was -14.000000. running mean: -13.625076
episode 6399.000000, reward total was -14.000000. running mean: -13.628825
episode 6400.000000, reward total was -8.000000. running mean: -13.572537
episode 6401.000000, reward total was -11.000000. running mean: -13.546812
episode 6402.000000, reward total was -15.000000. running mean: -13.561344
episode 6403.000000, reward total was -17.000000. running mean: -13.595730
episode 6404.000000, reward total was -11.000000. running mean: -13.569773
episode 6405.000000, rewar

episode 6502.000000, reward total was -12.000000. running mean: -12.864112
episode 6503.000000, reward total was -17.000000. running mean: -12.905471
episode 6504.000000, reward total was -6.000000. running mean: -12.836416
episode 6505.000000, reward total was -17.000000. running mean: -12.878052
episode 6506.000000, reward total was -15.000000. running mean: -12.899271
episode 6507.000000, reward total was -17.000000. running mean: -12.940278
episode 6508.000000, reward total was -15.000000. running mean: -12.960876
episode 6509.000000, reward total was -16.000000. running mean: -12.991267
episode 6510.000000, reward total was -15.000000. running mean: -13.011354
episode 6511.000000, reward total was -14.000000. running mean: -13.021241
episode 6512.000000, reward total was -5.000000. running mean: -12.941028
episode 6513.000000, reward total was -14.000000. running mean: -12.951618
episode 6514.000000, reward total was -13.000000. running mean: -12.952102
episode 6515.000000, reward

episode 6612.000000, reward total was -18.000000. running mean: -13.007449
episode 6613.000000, reward total was -17.000000. running mean: -13.047374
episode 6614.000000, reward total was -15.000000. running mean: -13.066901
episode 6615.000000, reward total was -14.000000. running mean: -13.076232
episode 6616.000000, reward total was -14.000000. running mean: -13.085469
episode 6617.000000, reward total was -15.000000. running mean: -13.104615
episode 6618.000000, reward total was -10.000000. running mean: -13.073568
episode 6619.000000, reward total was -10.000000. running mean: -13.042833
episode 6620.000000, reward total was -13.000000. running mean: -13.042404
episode 6621.000000, reward total was -14.000000. running mean: -13.051980
episode 6622.000000, reward total was -5.000000. running mean: -12.971461
episode 6623.000000, reward total was -8.000000. running mean: -12.921746
episode 6624.000000, reward total was -13.000000. running mean: -12.922529
episode 6625.000000, reward

episode 6722.000000, reward total was -9.000000. running mean: -12.547861
episode 6723.000000, reward total was -9.000000. running mean: -12.512382
episode 6724.000000, reward total was -12.000000. running mean: -12.507258
episode 6725.000000, reward total was -11.000000. running mean: -12.492186
episode 6726.000000, reward total was -16.000000. running mean: -12.527264
episode 6727.000000, reward total was -14.000000. running mean: -12.541991
episode 6728.000000, reward total was -17.000000. running mean: -12.586571
episode 6729.000000, reward total was -15.000000. running mean: -12.610706
episode 6730.000000, reward total was -13.000000. running mean: -12.614599
episode 6731.000000, reward total was -9.000000. running mean: -12.578453
episode 6732.000000, reward total was -9.000000. running mean: -12.542668
episode 6733.000000, reward total was -2.000000. running mean: -12.437241
episode 6734.000000, reward total was -15.000000. running mean: -12.462869
episode 6735.000000, reward to

episode 6832.000000, reward total was -14.000000. running mean: -12.505032
episode 6833.000000, reward total was -1.000000. running mean: -12.389982
episode 6834.000000, reward total was -16.000000. running mean: -12.426082
episode 6835.000000, reward total was -16.000000. running mean: -12.461821
episode 6836.000000, reward total was -18.000000. running mean: -12.517203
episode 6837.000000, reward total was -9.000000. running mean: -12.482031
episode 6838.000000, reward total was -17.000000. running mean: -12.527210
episode 6839.000000, reward total was -17.000000. running mean: -12.571938
episode 6840.000000, reward total was -19.000000. running mean: -12.636219
episode 6841.000000, reward total was -14.000000. running mean: -12.649857
episode 6842.000000, reward total was -6.000000. running mean: -12.583358
episode 6843.000000, reward total was -13.000000. running mean: -12.587525
episode 6844.000000, reward total was -16.000000. running mean: -12.621649
episode 6845.000000, reward 

episode 6942.000000, reward total was -12.000000. running mean: -12.381538
episode 6943.000000, reward total was -13.000000. running mean: -12.387723
episode 6944.000000, reward total was -16.000000. running mean: -12.423845
episode 6945.000000, reward total was -15.000000. running mean: -12.449607
episode 6946.000000, reward total was -12.000000. running mean: -12.445111
episode 6947.000000, reward total was -13.000000. running mean: -12.450660
episode 6948.000000, reward total was -9.000000. running mean: -12.416153
episode 6949.000000, reward total was -17.000000. running mean: -12.461992
episode 6950.000000, reward total was -7.000000. running mean: -12.407372
episode 6951.000000, reward total was -16.000000. running mean: -12.443298
episode 6952.000000, reward total was -13.000000. running mean: -12.448865
episode 6953.000000, reward total was -9.000000. running mean: -12.414376
episode 6954.000000, reward total was -11.000000. running mean: -12.400233
episode 6955.000000, reward 