In [1]:
import gym
import numpy as np

from gym.wrappers import AtariPreprocessing
gym.new_step_api=True
env = gym.make('Pong-v0')

H = 200 # number of hidden layer neurons
D = 80 * 80 # input dimensionality: 80x80 grid
model = {}
model['W1'] = np.random.randn(H,D) / np.sqrt(D) # "Xavier" initialization
model['W2'] = np.random.randn(H) / np.sqrt(H)
# hyperparameters
batch_size = 10 # every how many episodes to do a param update?
learning_rate = 1e-4
gamma = 0.99 # discount factor for reward
decay_rate = 0.99 # decay factor for RMSProp leaky sum of grad^2
grad_buffer = { k : np.zeros_like(v) for k,v in model.items() } # update buffers that add up gradients over a batch
rmsprop_cache = { k : np.zeros_like(v) for k,v in model.items() } # rmsprop memory

def sigmoid(x): 
  return 1.0 / (1.0 + np.exp(-x)) # sigmoid "squashing" function to interval [0,1]

def prepro(I):
  I=np.asarray(I)
  I = I[35:195] # crop
  I = I[::2,::2,0] # downsample by factor of 2
  
  I[I == 144] = 0 # erase background (background type 1)
  
  I[I == 109] = 0 # erase background (background type 2)
  
  I[I != 0] = 1 # everything else (paddles, ball) just set to 1
  return I.astype(float).ravel()

def discount_rewards(r):
  discounted_r = np.zeros_like(r)
  running_add = 0
  for t in reversed(range(0, r.size)):
    if r[t] != 0: running_add = 0 # reset the sum, since this was a game boundary (pong specific!)
    running_add = running_add * gamma + r[t]
    discounted_r[t] = running_add
  return discounted_r

def policy_forward(x):
  h = np.dot(model['W1'], x)
  h[h<0] = 0 # ReLU nonlinearity
  logp = np.dot(model['W2'], h)
  p = sigmoid(logp)
  return p, h # return probability of taking action 2, and hidden state

def policy_backward(epx, eph, epdlogp):
  """ backward pass. (eph is array of intermediate hidden states) """
  dW2 = np.dot(eph.T, epdlogp).ravel()
  dh = np.outer(epdlogp, model['W2'])
  dh[eph <= 0] = 0 # backpro prelu
  dW1 = np.dot(dh.T, epx)
  return {'W1':dW1, 'W2':dW2}

def model_step(model, observation, prev_x):

  cur_x = prepro(observation)
  x = cur_x - prev_x if prev_x is not None else np.zeros(D)
  prev_x = cur_x
  
  # forward the policy network and sample an action from the returned probability
  aprob, _ = policy_forward(x)
  action = 2 if aprob >= 0.5 else 3 # roll the dice!
  
  return action, prev_x

def play_game(env, model):
  observation = env.reset()

  frames = []
  cumulated_reward = 0

  prev_x = None # used in computing the difference frame

  for t in range(1000):
      frames.append(env.render(mode = 'rgb_array'))
      action, prev_x = model_step(model, observation, prev_x)
      observation, reward, done, info = env.step(action)
      cumulated_reward += reward
      if done:
          print("Episode finished after {} timesteps, accumulated reward = {}".format(t+1, cumulated_reward))
          break
  print("Episode finished without success, accumulated reward = {}".format(cumulated_reward))
  env.close()
  display_frames_as_gif(frames)

def train_model(env, model, total_episodes = 100):
  hist = []
  observation = env.reset()

  prev_x = None # used in computing the difference frame
  xs,hs,dlogps,drs = [],[],[],[]
  running_reward = None
  reward_sum = 0
  episode_number = 0

  while True:
  
    cur_x = prepro(observation)
    x = cur_x - prev_x if prev_x is not None else np.zeros(D)
    prev_x = cur_x

    # forward the policy network and sample an action from the returned probability
    aprob, h = policy_forward(x)
    action = 2 if np.random.uniform() < aprob else 3 # roll the dice!

    # record various intermediates (needed later for backprop)
    xs.append(x) # observation
    hs.append(h) # hidden state
    y = 1 if action == 2 else 0 # a "fake label"
    dlogps.append(y - aprob) # grad that encourages the action that was taken to be taken (see http://cs231n.github.io/neural-networks-2/#losses if confused)

    # step the environment and get new measurements
    observation, reward, done, info = env.step(action)
    reward_sum += reward

    drs.append(reward) # record reward (has to be done after we call step() to get reward for previous action)

    if done: # an episode finished
      episode_number += 1

      # stack together all inputs, hidden states, action gradients, and rewards for this episode
      epx = np.vstack(xs)
      eph = np.vstack(hs)
      epdlogp = np.vstack(dlogps)
      epr = np.vstack(drs)
      xs,hs,dlogps,drs = [],[],[],[] # reset array memory

      # compute the discounted reward backwards through time
      discounted_epr = discount_rewards(epr)
      # standardize the rewards to be unit normal (helps control the gradient estimator variance)
      discounted_epr -= np.mean(discounted_epr)
      discounted_epr /= np.std(discounted_epr)

      epdlogp *= discounted_epr # modulate the gradient with advantage (PG magic happens right here.)
      grad = policy_backward(epx, eph, epdlogp)
      for k in model: grad_buffer[k] += grad[k] # accumulate grad over batch

      # perform rmsprop parameter update every batch_size episodes
      if episode_number % batch_size == 0:
        for k,v in model.items():
          g = grad_buffer[k] # gradient
          rmsprop_cache[k] = decay_rate * rmsprop_cache[k] + (1 - decay_rate) * g**2
          model[k] += learning_rate * g / (np.sqrt(rmsprop_cache[k]) + 1e-5)
          grad_buffer[k] = np.zeros_like(v) # reset batch gradient buffer

      # boring book-keeping
      running_reward = reward_sum if running_reward is None else running_reward * 0.99 + reward_sum * 0.01
      hist.append((episode_number, reward_sum, running_reward))
      print ('episode %f, reward total was %f. running mean: %f' % (episode_number, reward_sum, running_reward))
      reward_sum = 0
      observation = env.reset() # reset env
      prev_x = None
      if episode_number == total_episodes: 
        return hist

   
    

  logger.warn(
  deprecation(
  deprecation(


In [2]:
%time hist1 = train_model(env, model, total_episodes=7000)

  logger.deprecation(


episode 1.000000, reward total was -20.000000. running mean: -20.000000
episode 2.000000, reward total was -21.000000. running mean: -20.010000
episode 3.000000, reward total was -21.000000. running mean: -20.019900
episode 4.000000, reward total was -20.000000. running mean: -20.019701
episode 5.000000, reward total was -20.000000. running mean: -20.019504
episode 6.000000, reward total was -21.000000. running mean: -20.029309
episode 7.000000, reward total was -21.000000. running mean: -20.039016
episode 8.000000, reward total was -21.000000. running mean: -20.048626
episode 9.000000, reward total was -21.000000. running mean: -20.058139
episode 10.000000, reward total was -21.000000. running mean: -20.067558
episode 11.000000, reward total was -17.000000. running mean: -20.036882
episode 12.000000, reward total was -21.000000. running mean: -20.046514
episode 13.000000, reward total was -21.000000. running mean: -20.056049
episode 14.000000, reward total was -20.000000. running mean

episode 114.000000, reward total was -21.000000. running mean: -20.306171
episode 115.000000, reward total was -21.000000. running mean: -20.313109
episode 116.000000, reward total was -21.000000. running mean: -20.319978
episode 117.000000, reward total was -19.000000. running mean: -20.306778
episode 118.000000, reward total was -20.000000. running mean: -20.303710
episode 119.000000, reward total was -21.000000. running mean: -20.310673
episode 120.000000, reward total was -20.000000. running mean: -20.307567
episode 121.000000, reward total was -21.000000. running mean: -20.314491
episode 122.000000, reward total was -19.000000. running mean: -20.301346
episode 123.000000, reward total was -20.000000. running mean: -20.298332
episode 124.000000, reward total was -21.000000. running mean: -20.305349
episode 125.000000, reward total was -19.000000. running mean: -20.292296
episode 126.000000, reward total was -20.000000. running mean: -20.289373
episode 127.000000, reward total was -

episode 225.000000, reward total was -21.000000. running mean: -20.414238
episode 226.000000, reward total was -21.000000. running mean: -20.420096
episode 227.000000, reward total was -21.000000. running mean: -20.425895
episode 228.000000, reward total was -21.000000. running mean: -20.431636
episode 229.000000, reward total was -20.000000. running mean: -20.427319
episode 230.000000, reward total was -21.000000. running mean: -20.433046
episode 231.000000, reward total was -21.000000. running mean: -20.438716
episode 232.000000, reward total was -21.000000. running mean: -20.444329
episode 233.000000, reward total was -21.000000. running mean: -20.449885
episode 234.000000, reward total was -20.000000. running mean: -20.445386
episode 235.000000, reward total was -21.000000. running mean: -20.450933
episode 236.000000, reward total was -20.000000. running mean: -20.446423
episode 237.000000, reward total was -19.000000. running mean: -20.431959
episode 238.000000, reward total was -

episode 336.000000, reward total was -19.000000. running mean: -20.468822
episode 337.000000, reward total was -21.000000. running mean: -20.474134
episode 338.000000, reward total was -20.000000. running mean: -20.469392
episode 339.000000, reward total was -21.000000. running mean: -20.474698
episode 340.000000, reward total was -21.000000. running mean: -20.479951
episode 341.000000, reward total was -20.000000. running mean: -20.475152
episode 342.000000, reward total was -20.000000. running mean: -20.470400
episode 343.000000, reward total was -21.000000. running mean: -20.475696
episode 344.000000, reward total was -21.000000. running mean: -20.480939
episode 345.000000, reward total was -21.000000. running mean: -20.486130
episode 346.000000, reward total was -20.000000. running mean: -20.481269
episode 347.000000, reward total was -21.000000. running mean: -20.486456
episode 348.000000, reward total was -20.000000. running mean: -20.481591
episode 349.000000, reward total was -

episode 447.000000, reward total was -21.000000. running mean: -20.367165
episode 448.000000, reward total was -20.000000. running mean: -20.363494
episode 449.000000, reward total was -21.000000. running mean: -20.369859
episode 450.000000, reward total was -20.000000. running mean: -20.366160
episode 451.000000, reward total was -21.000000. running mean: -20.372499
episode 452.000000, reward total was -20.000000. running mean: -20.368774
episode 453.000000, reward total was -20.000000. running mean: -20.365086
episode 454.000000, reward total was -21.000000. running mean: -20.371435
episode 455.000000, reward total was -21.000000. running mean: -20.377721
episode 456.000000, reward total was -20.000000. running mean: -20.373943
episode 457.000000, reward total was -21.000000. running mean: -20.380204
episode 458.000000, reward total was -20.000000. running mean: -20.376402
episode 459.000000, reward total was -18.000000. running mean: -20.352638
episode 460.000000, reward total was -

episode 558.000000, reward total was -21.000000. running mean: -20.356225
episode 559.000000, reward total was -21.000000. running mean: -20.362662
episode 560.000000, reward total was -18.000000. running mean: -20.339036
episode 561.000000, reward total was -21.000000. running mean: -20.345645
episode 562.000000, reward total was -21.000000. running mean: -20.352189
episode 563.000000, reward total was -19.000000. running mean: -20.338667
episode 564.000000, reward total was -21.000000. running mean: -20.345280
episode 565.000000, reward total was -21.000000. running mean: -20.351828
episode 566.000000, reward total was -21.000000. running mean: -20.358309
episode 567.000000, reward total was -21.000000. running mean: -20.364726
episode 568.000000, reward total was -21.000000. running mean: -20.371079
episode 569.000000, reward total was -21.000000. running mean: -20.377368
episode 570.000000, reward total was -20.000000. running mean: -20.373595
episode 571.000000, reward total was -

episode 669.000000, reward total was -20.000000. running mean: -20.392238
episode 670.000000, reward total was -19.000000. running mean: -20.378316
episode 671.000000, reward total was -19.000000. running mean: -20.364533
episode 672.000000, reward total was -20.000000. running mean: -20.360887
episode 673.000000, reward total was -21.000000. running mean: -20.367278
episode 674.000000, reward total was -20.000000. running mean: -20.363606
episode 675.000000, reward total was -21.000000. running mean: -20.369970
episode 676.000000, reward total was -21.000000. running mean: -20.376270
episode 677.000000, reward total was -21.000000. running mean: -20.382507
episode 678.000000, reward total was -20.000000. running mean: -20.378682
episode 679.000000, reward total was -21.000000. running mean: -20.384895
episode 680.000000, reward total was -21.000000. running mean: -20.391046
episode 681.000000, reward total was -21.000000. running mean: -20.397136
episode 682.000000, reward total was -

episode 780.000000, reward total was -21.000000. running mean: -20.323799
episode 781.000000, reward total was -21.000000. running mean: -20.330561
episode 782.000000, reward total was -19.000000. running mean: -20.317255
episode 783.000000, reward total was -21.000000. running mean: -20.324083
episode 784.000000, reward total was -21.000000. running mean: -20.330842
episode 785.000000, reward total was -21.000000. running mean: -20.337533
episode 786.000000, reward total was -21.000000. running mean: -20.344158
episode 787.000000, reward total was -21.000000. running mean: -20.350717
episode 788.000000, reward total was -21.000000. running mean: -20.357209
episode 789.000000, reward total was -20.000000. running mean: -20.353637
episode 790.000000, reward total was -20.000000. running mean: -20.350101
episode 791.000000, reward total was -21.000000. running mean: -20.356600
episode 792.000000, reward total was -21.000000. running mean: -20.363034
episode 793.000000, reward total was -

episode 891.000000, reward total was -20.000000. running mean: -20.357660
episode 892.000000, reward total was -20.000000. running mean: -20.354083
episode 893.000000, reward total was -21.000000. running mean: -20.360543
episode 894.000000, reward total was -21.000000. running mean: -20.366937
episode 895.000000, reward total was -21.000000. running mean: -20.373268
episode 896.000000, reward total was -19.000000. running mean: -20.359535
episode 897.000000, reward total was -20.000000. running mean: -20.355940
episode 898.000000, reward total was -21.000000. running mean: -20.362380
episode 899.000000, reward total was -21.000000. running mean: -20.368756
episode 900.000000, reward total was -21.000000. running mean: -20.375069
episode 901.000000, reward total was -20.000000. running mean: -20.371318
episode 902.000000, reward total was -19.000000. running mean: -20.357605
episode 903.000000, reward total was -21.000000. running mean: -20.364029
episode 904.000000, reward total was -

episode 1002.000000, reward total was -21.000000. running mean: -20.457981
episode 1003.000000, reward total was -19.000000. running mean: -20.443401
episode 1004.000000, reward total was -21.000000. running mean: -20.448967
episode 1005.000000, reward total was -21.000000. running mean: -20.454477
episode 1006.000000, reward total was -21.000000. running mean: -20.459932
episode 1007.000000, reward total was -19.000000. running mean: -20.445333
episode 1008.000000, reward total was -19.000000. running mean: -20.430880
episode 1009.000000, reward total was -20.000000. running mean: -20.426571
episode 1010.000000, reward total was -21.000000. running mean: -20.432305
episode 1011.000000, reward total was -21.000000. running mean: -20.437982
episode 1012.000000, reward total was -20.000000. running mean: -20.433602
episode 1013.000000, reward total was -21.000000. running mean: -20.439266
episode 1014.000000, reward total was -21.000000. running mean: -20.444874
episode 1015.000000, rewa

episode 1112.000000, reward total was -21.000000. running mean: -20.296009
episode 1113.000000, reward total was -21.000000. running mean: -20.303048
episode 1114.000000, reward total was -21.000000. running mean: -20.310018
episode 1115.000000, reward total was -21.000000. running mean: -20.316918
episode 1116.000000, reward total was -20.000000. running mean: -20.313749
episode 1117.000000, reward total was -21.000000. running mean: -20.320611
episode 1118.000000, reward total was -21.000000. running mean: -20.327405
episode 1119.000000, reward total was -21.000000. running mean: -20.334131
episode 1120.000000, reward total was -19.000000. running mean: -20.320790
episode 1121.000000, reward total was -21.000000. running mean: -20.327582
episode 1122.000000, reward total was -20.000000. running mean: -20.324306
episode 1123.000000, reward total was -21.000000. running mean: -20.331063
episode 1124.000000, reward total was -18.000000. running mean: -20.307752
episode 1125.000000, rewa

episode 1222.000000, reward total was -21.000000. running mean: -20.288100
episode 1223.000000, reward total was -20.000000. running mean: -20.285219
episode 1224.000000, reward total was -20.000000. running mean: -20.282367
episode 1225.000000, reward total was -20.000000. running mean: -20.279544
episode 1226.000000, reward total was -21.000000. running mean: -20.286748
episode 1227.000000, reward total was -19.000000. running mean: -20.273881
episode 1228.000000, reward total was -21.000000. running mean: -20.281142
episode 1229.000000, reward total was -21.000000. running mean: -20.288330
episode 1230.000000, reward total was -20.000000. running mean: -20.285447
episode 1231.000000, reward total was -21.000000. running mean: -20.292593
episode 1232.000000, reward total was -20.000000. running mean: -20.289667
episode 1233.000000, reward total was -17.000000. running mean: -20.256770
episode 1234.000000, reward total was -19.000000. running mean: -20.244202
episode 1235.000000, rewa

episode 1332.000000, reward total was -21.000000. running mean: -20.370866
episode 1333.000000, reward total was -21.000000. running mean: -20.377157
episode 1334.000000, reward total was -21.000000. running mean: -20.383386
episode 1335.000000, reward total was -21.000000. running mean: -20.389552
episode 1336.000000, reward total was -20.000000. running mean: -20.385656
episode 1337.000000, reward total was -20.000000. running mean: -20.381800
episode 1338.000000, reward total was -19.000000. running mean: -20.367982
episode 1339.000000, reward total was -20.000000. running mean: -20.364302
episode 1340.000000, reward total was -21.000000. running mean: -20.370659
episode 1341.000000, reward total was -20.000000. running mean: -20.366952
episode 1342.000000, reward total was -19.000000. running mean: -20.353283
episode 1343.000000, reward total was -19.000000. running mean: -20.339750
episode 1344.000000, reward total was -21.000000. running mean: -20.346353
episode 1345.000000, rewa

episode 1442.000000, reward total was -20.000000. running mean: -20.230324
episode 1443.000000, reward total was -20.000000. running mean: -20.228021
episode 1444.000000, reward total was -21.000000. running mean: -20.235741
episode 1445.000000, reward total was -21.000000. running mean: -20.243383
episode 1446.000000, reward total was -21.000000. running mean: -20.250949
episode 1447.000000, reward total was -20.000000. running mean: -20.248440
episode 1448.000000, reward total was -18.000000. running mean: -20.225956
episode 1449.000000, reward total was -21.000000. running mean: -20.233696
episode 1450.000000, reward total was -20.000000. running mean: -20.231359
episode 1451.000000, reward total was -20.000000. running mean: -20.229045
episode 1452.000000, reward total was -21.000000. running mean: -20.236755
episode 1453.000000, reward total was -19.000000. running mean: -20.224387
episode 1454.000000, reward total was -21.000000. running mean: -20.232144
episode 1455.000000, rewa

episode 1552.000000, reward total was -21.000000. running mean: -20.243385
episode 1553.000000, reward total was -21.000000. running mean: -20.250951
episode 1554.000000, reward total was -20.000000. running mean: -20.248442
episode 1555.000000, reward total was -21.000000. running mean: -20.255957
episode 1556.000000, reward total was -19.000000. running mean: -20.243398
episode 1557.000000, reward total was -20.000000. running mean: -20.240964
episode 1558.000000, reward total was -21.000000. running mean: -20.248554
episode 1559.000000, reward total was -21.000000. running mean: -20.256068
episode 1560.000000, reward total was -20.000000. running mean: -20.253508
episode 1561.000000, reward total was -20.000000. running mean: -20.250973
episode 1562.000000, reward total was -20.000000. running mean: -20.248463
episode 1563.000000, reward total was -18.000000. running mean: -20.225978
episode 1564.000000, reward total was -21.000000. running mean: -20.233718
episode 1565.000000, rewa

episode 1662.000000, reward total was -21.000000. running mean: -20.329181
episode 1663.000000, reward total was -17.000000. running mean: -20.295889
episode 1664.000000, reward total was -21.000000. running mean: -20.302930
episode 1665.000000, reward total was -19.000000. running mean: -20.289901
episode 1666.000000, reward total was -20.000000. running mean: -20.287002
episode 1667.000000, reward total was -18.000000. running mean: -20.264132
episode 1668.000000, reward total was -21.000000. running mean: -20.271490
episode 1669.000000, reward total was -21.000000. running mean: -20.278775
episode 1670.000000, reward total was -21.000000. running mean: -20.285988
episode 1671.000000, reward total was -21.000000. running mean: -20.293128
episode 1672.000000, reward total was -21.000000. running mean: -20.300196
episode 1673.000000, reward total was -21.000000. running mean: -20.307195
episode 1674.000000, reward total was -20.000000. running mean: -20.304123
episode 1675.000000, rewa

episode 1772.000000, reward total was -21.000000. running mean: -20.320847
episode 1773.000000, reward total was -20.000000. running mean: -20.317639
episode 1774.000000, reward total was -19.000000. running mean: -20.304462
episode 1775.000000, reward total was -21.000000. running mean: -20.311418
episode 1776.000000, reward total was -20.000000. running mean: -20.308304
episode 1777.000000, reward total was -19.000000. running mean: -20.295221
episode 1778.000000, reward total was -21.000000. running mean: -20.302268
episode 1779.000000, reward total was -21.000000. running mean: -20.309246
episode 1780.000000, reward total was -20.000000. running mean: -20.306153
episode 1781.000000, reward total was -21.000000. running mean: -20.313092
episode 1782.000000, reward total was -18.000000. running mean: -20.289961
episode 1783.000000, reward total was -18.000000. running mean: -20.267061
episode 1784.000000, reward total was -20.000000. running mean: -20.264391
episode 1785.000000, rewa

episode 1882.000000, reward total was -17.000000. running mean: -20.211931
episode 1883.000000, reward total was -21.000000. running mean: -20.219811
episode 1884.000000, reward total was -21.000000. running mean: -20.227613
episode 1885.000000, reward total was -20.000000. running mean: -20.225337
episode 1886.000000, reward total was -21.000000. running mean: -20.233084
episode 1887.000000, reward total was -21.000000. running mean: -20.240753
episode 1888.000000, reward total was -21.000000. running mean: -20.248345
episode 1889.000000, reward total was -20.000000. running mean: -20.245862
episode 1890.000000, reward total was -18.000000. running mean: -20.223403
episode 1891.000000, reward total was -19.000000. running mean: -20.211169
episode 1892.000000, reward total was -20.000000. running mean: -20.209058
episode 1893.000000, reward total was -19.000000. running mean: -20.196967
episode 1894.000000, reward total was -21.000000. running mean: -20.204997
episode 1895.000000, rewa

episode 1992.000000, reward total was -21.000000. running mean: -20.227384
episode 1993.000000, reward total was -20.000000. running mean: -20.225110
episode 1994.000000, reward total was -20.000000. running mean: -20.222859
episode 1995.000000, reward total was -21.000000. running mean: -20.230630
episode 1996.000000, reward total was -21.000000. running mean: -20.238324
episode 1997.000000, reward total was -21.000000. running mean: -20.245941
episode 1998.000000, reward total was -21.000000. running mean: -20.253481
episode 1999.000000, reward total was -21.000000. running mean: -20.260946
episode 2000.000000, reward total was -20.000000. running mean: -20.258337
episode 2001.000000, reward total was -17.000000. running mean: -20.225754
episode 2002.000000, reward total was -21.000000. running mean: -20.233496
episode 2003.000000, reward total was -21.000000. running mean: -20.241161
episode 2004.000000, reward total was -21.000000. running mean: -20.248749
episode 2005.000000, rewa

episode 2102.000000, reward total was -21.000000. running mean: -20.168593
episode 2103.000000, reward total was -19.000000. running mean: -20.156907
episode 2104.000000, reward total was -21.000000. running mean: -20.165338
episode 2105.000000, reward total was -20.000000. running mean: -20.163685
episode 2106.000000, reward total was -20.000000. running mean: -20.162048
episode 2107.000000, reward total was -20.000000. running mean: -20.160427
episode 2108.000000, reward total was -19.000000. running mean: -20.148823
episode 2109.000000, reward total was -20.000000. running mean: -20.147335
episode 2110.000000, reward total was -20.000000. running mean: -20.145862
episode 2111.000000, reward total was -20.000000. running mean: -20.144403
episode 2112.000000, reward total was -20.000000. running mean: -20.142959
episode 2113.000000, reward total was -19.000000. running mean: -20.131529
episode 2114.000000, reward total was -21.000000. running mean: -20.140214
episode 2115.000000, rewa

episode 2212.000000, reward total was -21.000000. running mean: -20.152172
episode 2213.000000, reward total was -19.000000. running mean: -20.140650
episode 2214.000000, reward total was -21.000000. running mean: -20.149244
episode 2215.000000, reward total was -19.000000. running mean: -20.137751
episode 2216.000000, reward total was -21.000000. running mean: -20.146374
episode 2217.000000, reward total was -21.000000. running mean: -20.154910
episode 2218.000000, reward total was -20.000000. running mean: -20.153361
episode 2219.000000, reward total was -21.000000. running mean: -20.161827
episode 2220.000000, reward total was -19.000000. running mean: -20.150209
episode 2221.000000, reward total was -21.000000. running mean: -20.158707
episode 2222.000000, reward total was -21.000000. running mean: -20.167120
episode 2223.000000, reward total was -19.000000. running mean: -20.155449
episode 2224.000000, reward total was -19.000000. running mean: -20.143894
episode 2225.000000, rewa

episode 2322.000000, reward total was -21.000000. running mean: -20.036426
episode 2323.000000, reward total was -20.000000. running mean: -20.036061
episode 2324.000000, reward total was -20.000000. running mean: -20.035701
episode 2325.000000, reward total was -21.000000. running mean: -20.045344
episode 2326.000000, reward total was -21.000000. running mean: -20.054890
episode 2327.000000, reward total was -19.000000. running mean: -20.044341
episode 2328.000000, reward total was -20.000000. running mean: -20.043898
episode 2329.000000, reward total was -21.000000. running mean: -20.053459
episode 2330.000000, reward total was -21.000000. running mean: -20.062924
episode 2331.000000, reward total was -20.000000. running mean: -20.062295
episode 2332.000000, reward total was -19.000000. running mean: -20.051672
episode 2333.000000, reward total was -20.000000. running mean: -20.051155
episode 2334.000000, reward total was -20.000000. running mean: -20.050644
episode 2335.000000, rewa

episode 2432.000000, reward total was -21.000000. running mean: -20.108374
episode 2433.000000, reward total was -20.000000. running mean: -20.107290
episode 2434.000000, reward total was -19.000000. running mean: -20.096217
episode 2435.000000, reward total was -18.000000. running mean: -20.075255
episode 2436.000000, reward total was -20.000000. running mean: -20.074502
episode 2437.000000, reward total was -20.000000. running mean: -20.073757
episode 2438.000000, reward total was -21.000000. running mean: -20.083020
episode 2439.000000, reward total was -21.000000. running mean: -20.092190
episode 2440.000000, reward total was -20.000000. running mean: -20.091268
episode 2441.000000, reward total was -18.000000. running mean: -20.070355
episode 2442.000000, reward total was -21.000000. running mean: -20.079651
episode 2443.000000, reward total was -21.000000. running mean: -20.088855
episode 2444.000000, reward total was -21.000000. running mean: -20.097966
episode 2445.000000, rewa

episode 2542.000000, reward total was -19.000000. running mean: -20.079266
episode 2543.000000, reward total was -21.000000. running mean: -20.088473
episode 2544.000000, reward total was -19.000000. running mean: -20.077588
episode 2545.000000, reward total was -20.000000. running mean: -20.076812
episode 2546.000000, reward total was -21.000000. running mean: -20.086044
episode 2547.000000, reward total was -21.000000. running mean: -20.095184
episode 2548.000000, reward total was -21.000000. running mean: -20.104232
episode 2549.000000, reward total was -19.000000. running mean: -20.093190
episode 2550.000000, reward total was -21.000000. running mean: -20.102258
episode 2551.000000, reward total was -20.000000. running mean: -20.101235
episode 2552.000000, reward total was -20.000000. running mean: -20.100223
episode 2553.000000, reward total was -20.000000. running mean: -20.099221
episode 2554.000000, reward total was -21.000000. running mean: -20.108228
episode 2555.000000, rewa

episode 2652.000000, reward total was -20.000000. running mean: -20.129643
episode 2653.000000, reward total was -19.000000. running mean: -20.118347
episode 2654.000000, reward total was -20.000000. running mean: -20.117163
episode 2655.000000, reward total was -19.000000. running mean: -20.105991
episode 2656.000000, reward total was -21.000000. running mean: -20.114932
episode 2657.000000, reward total was -21.000000. running mean: -20.123782
episode 2658.000000, reward total was -21.000000. running mean: -20.132544
episode 2659.000000, reward total was -21.000000. running mean: -20.141219
episode 2660.000000, reward total was -21.000000. running mean: -20.149807
episode 2661.000000, reward total was -21.000000. running mean: -20.158309
episode 2662.000000, reward total was -21.000000. running mean: -20.166726
episode 2663.000000, reward total was -19.000000. running mean: -20.155058
episode 2664.000000, reward total was -21.000000. running mean: -20.163508
episode 2665.000000, rewa

episode 2762.000000, reward total was -21.000000. running mean: -20.105886
episode 2763.000000, reward total was -19.000000. running mean: -20.094827
episode 2764.000000, reward total was -20.000000. running mean: -20.093879
episode 2765.000000, reward total was -19.000000. running mean: -20.082940
episode 2766.000000, reward total was -21.000000. running mean: -20.092111
episode 2767.000000, reward total was -21.000000. running mean: -20.101190
episode 2768.000000, reward total was -20.000000. running mean: -20.100178
episode 2769.000000, reward total was -21.000000. running mean: -20.109176
episode 2770.000000, reward total was -19.000000. running mean: -20.098084
episode 2771.000000, reward total was -18.000000. running mean: -20.077104
episode 2772.000000, reward total was -20.000000. running mean: -20.076333
episode 2773.000000, reward total was -20.000000. running mean: -20.075569
episode 2774.000000, reward total was -20.000000. running mean: -20.074813
episode 2775.000000, rewa

episode 2872.000000, reward total was -20.000000. running mean: -20.215558
episode 2873.000000, reward total was -19.000000. running mean: -20.203403
episode 2874.000000, reward total was -18.000000. running mean: -20.181369
episode 2875.000000, reward total was -17.000000. running mean: -20.149555
episode 2876.000000, reward total was -21.000000. running mean: -20.158059
episode 2877.000000, reward total was -19.000000. running mean: -20.146479
episode 2878.000000, reward total was -21.000000. running mean: -20.155014
episode 2879.000000, reward total was -21.000000. running mean: -20.163464
episode 2880.000000, reward total was -21.000000. running mean: -20.171829
episode 2881.000000, reward total was -15.000000. running mean: -20.120111
episode 2882.000000, reward total was -21.000000. running mean: -20.128910
episode 2883.000000, reward total was -18.000000. running mean: -20.107621
episode 2884.000000, reward total was -20.000000. running mean: -20.106544
episode 2885.000000, rewa

episode 2982.000000, reward total was -20.000000. running mean: -20.023405
episode 2983.000000, reward total was -19.000000. running mean: -20.013171
episode 2984.000000, reward total was -21.000000. running mean: -20.023040
episode 2985.000000, reward total was -19.000000. running mean: -20.012809
episode 2986.000000, reward total was -17.000000. running mean: -19.982681
episode 2987.000000, reward total was -20.000000. running mean: -19.982854
episode 2988.000000, reward total was -21.000000. running mean: -19.993026
episode 2989.000000, reward total was -20.000000. running mean: -19.993095
episode 2990.000000, reward total was -19.000000. running mean: -19.983164
episode 2991.000000, reward total was -20.000000. running mean: -19.983333
episode 2992.000000, reward total was -21.000000. running mean: -19.993499
episode 2993.000000, reward total was -21.000000. running mean: -20.003564
episode 2994.000000, reward total was -21.000000. running mean: -20.013529
episode 2995.000000, rewa

episode 3092.000000, reward total was -18.000000. running mean: -20.021712
episode 3093.000000, reward total was -19.000000. running mean: -20.011495
episode 3094.000000, reward total was -19.000000. running mean: -20.001380
episode 3095.000000, reward total was -21.000000. running mean: -20.011366
episode 3096.000000, reward total was -20.000000. running mean: -20.011252
episode 3097.000000, reward total was -21.000000. running mean: -20.021140
episode 3098.000000, reward total was -20.000000. running mean: -20.020928
episode 3099.000000, reward total was -15.000000. running mean: -19.970719
episode 3100.000000, reward total was -19.000000. running mean: -19.961012
episode 3101.000000, reward total was -20.000000. running mean: -19.961402
episode 3102.000000, reward total was -20.000000. running mean: -19.961788
episode 3103.000000, reward total was -20.000000. running mean: -19.962170
episode 3104.000000, reward total was -20.000000. running mean: -19.962548
episode 3105.000000, rewa

episode 3202.000000, reward total was -20.000000. running mean: -19.894926
episode 3203.000000, reward total was -21.000000. running mean: -19.905976
episode 3204.000000, reward total was -21.000000. running mean: -19.916917
episode 3205.000000, reward total was -21.000000. running mean: -19.927748
episode 3206.000000, reward total was -20.000000. running mean: -19.928470
episode 3207.000000, reward total was -20.000000. running mean: -19.929185
episode 3208.000000, reward total was -20.000000. running mean: -19.929893
episode 3209.000000, reward total was -18.000000. running mean: -19.910595
episode 3210.000000, reward total was -21.000000. running mean: -19.921489
episode 3211.000000, reward total was -19.000000. running mean: -19.912274
episode 3212.000000, reward total was -21.000000. running mean: -19.923151
episode 3213.000000, reward total was -21.000000. running mean: -19.933919
episode 3214.000000, reward total was -20.000000. running mean: -19.934580
episode 3215.000000, rewa

episode 3312.000000, reward total was -21.000000. running mean: -19.883420
episode 3313.000000, reward total was -18.000000. running mean: -19.864586
episode 3314.000000, reward total was -20.000000. running mean: -19.865940
episode 3315.000000, reward total was -21.000000. running mean: -19.877281
episode 3316.000000, reward total was -19.000000. running mean: -19.868508
episode 3317.000000, reward total was -20.000000. running mean: -19.869823
episode 3318.000000, reward total was -21.000000. running mean: -19.881125
episode 3319.000000, reward total was -20.000000. running mean: -19.882314
episode 3320.000000, reward total was -20.000000. running mean: -19.883490
episode 3321.000000, reward total was -20.000000. running mean: -19.884656
episode 3322.000000, reward total was -21.000000. running mean: -19.895809
episode 3323.000000, reward total was -19.000000. running mean: -19.886851
episode 3324.000000, reward total was -20.000000. running mean: -19.887982
episode 3325.000000, rewa

episode 3422.000000, reward total was -20.000000. running mean: -19.864829
episode 3423.000000, reward total was -21.000000. running mean: -19.876180
episode 3424.000000, reward total was -20.000000. running mean: -19.877418
episode 3425.000000, reward total was -21.000000. running mean: -19.888644
episode 3426.000000, reward total was -18.000000. running mean: -19.869758
episode 3427.000000, reward total was -20.000000. running mean: -19.871060
episode 3428.000000, reward total was -20.000000. running mean: -19.872350
episode 3429.000000, reward total was -20.000000. running mean: -19.873626
episode 3430.000000, reward total was -21.000000. running mean: -19.884890
episode 3431.000000, reward total was -21.000000. running mean: -19.896041
episode 3432.000000, reward total was -20.000000. running mean: -19.897081
episode 3433.000000, reward total was -20.000000. running mean: -19.898110
episode 3434.000000, reward total was -21.000000. running mean: -19.909129
episode 3435.000000, rewa

episode 3532.000000, reward total was -21.000000. running mean: -19.893902
episode 3533.000000, reward total was -19.000000. running mean: -19.884963
episode 3534.000000, reward total was -19.000000. running mean: -19.876114
episode 3535.000000, reward total was -20.000000. running mean: -19.877352
episode 3536.000000, reward total was -19.000000. running mean: -19.868579
episode 3537.000000, reward total was -21.000000. running mean: -19.879893
episode 3538.000000, reward total was -20.000000. running mean: -19.881094
episode 3539.000000, reward total was -20.000000. running mean: -19.882283
episode 3540.000000, reward total was -19.000000. running mean: -19.873460
episode 3541.000000, reward total was -20.000000. running mean: -19.874726
episode 3542.000000, reward total was -21.000000. running mean: -19.885979
episode 3543.000000, reward total was -20.000000. running mean: -19.887119
episode 3544.000000, reward total was -21.000000. running mean: -19.898248
episode 3545.000000, rewa

episode 3642.000000, reward total was -21.000000. running mean: -19.917032
episode 3643.000000, reward total was -19.000000. running mean: -19.907862
episode 3644.000000, reward total was -21.000000. running mean: -19.918783
episode 3645.000000, reward total was -20.000000. running mean: -19.919596
episode 3646.000000, reward total was -21.000000. running mean: -19.930400
episode 3647.000000, reward total was -20.000000. running mean: -19.931096
episode 3648.000000, reward total was -19.000000. running mean: -19.921785
episode 3649.000000, reward total was -21.000000. running mean: -19.932567
episode 3650.000000, reward total was -21.000000. running mean: -19.943241
episode 3651.000000, reward total was -18.000000. running mean: -19.923809
episode 3652.000000, reward total was -20.000000. running mean: -19.924571
episode 3653.000000, reward total was -21.000000. running mean: -19.935325
episode 3654.000000, reward total was -19.000000. running mean: -19.925972
episode 3655.000000, rewa

episode 3752.000000, reward total was -20.000000. running mean: -19.846898
episode 3753.000000, reward total was -20.000000. running mean: -19.848429
episode 3754.000000, reward total was -20.000000. running mean: -19.849945
episode 3755.000000, reward total was -20.000000. running mean: -19.851446
episode 3756.000000, reward total was -20.000000. running mean: -19.852931
episode 3757.000000, reward total was -18.000000. running mean: -19.834402
episode 3758.000000, reward total was -19.000000. running mean: -19.826058
episode 3759.000000, reward total was -19.000000. running mean: -19.817797
episode 3760.000000, reward total was -17.000000. running mean: -19.789619
episode 3761.000000, reward total was -18.000000. running mean: -19.771723
episode 3762.000000, reward total was -21.000000. running mean: -19.784006
episode 3763.000000, reward total was -21.000000. running mean: -19.796166
episode 3764.000000, reward total was -20.000000. running mean: -19.798204
episode 3765.000000, rewa

episode 3862.000000, reward total was -18.000000. running mean: -19.668783
episode 3863.000000, reward total was -19.000000. running mean: -19.662095
episode 3864.000000, reward total was -21.000000. running mean: -19.675474
episode 3865.000000, reward total was -19.000000. running mean: -19.668719
episode 3866.000000, reward total was -21.000000. running mean: -19.682032
episode 3867.000000, reward total was -20.000000. running mean: -19.685212
episode 3868.000000, reward total was -15.000000. running mean: -19.638360
episode 3869.000000, reward total was -21.000000. running mean: -19.651976
episode 3870.000000, reward total was -21.000000. running mean: -19.665456
episode 3871.000000, reward total was -20.000000. running mean: -19.668802
episode 3872.000000, reward total was -21.000000. running mean: -19.682114
episode 3873.000000, reward total was -21.000000. running mean: -19.695293
episode 3874.000000, reward total was -21.000000. running mean: -19.708340
episode 3875.000000, rewa

episode 3972.000000, reward total was -20.000000. running mean: -19.940053
episode 3973.000000, reward total was -18.000000. running mean: -19.920653
episode 3974.000000, reward total was -16.000000. running mean: -19.881446
episode 3975.000000, reward total was -16.000000. running mean: -19.842632
episode 3976.000000, reward total was -19.000000. running mean: -19.834205
episode 3977.000000, reward total was -19.000000. running mean: -19.825863
episode 3978.000000, reward total was -20.000000. running mean: -19.827605
episode 3979.000000, reward total was -21.000000. running mean: -19.839329
episode 3980.000000, reward total was -21.000000. running mean: -19.850935
episode 3981.000000, reward total was -19.000000. running mean: -19.842426
episode 3982.000000, reward total was -20.000000. running mean: -19.844002
episode 3983.000000, reward total was -19.000000. running mean: -19.835562
episode 3984.000000, reward total was -20.000000. running mean: -19.837206
episode 3985.000000, rewa

episode 4082.000000, reward total was -21.000000. running mean: -19.896248
episode 4083.000000, reward total was -21.000000. running mean: -19.907285
episode 4084.000000, reward total was -20.000000. running mean: -19.908212
episode 4085.000000, reward total was -18.000000. running mean: -19.889130
episode 4086.000000, reward total was -19.000000. running mean: -19.880239
episode 4087.000000, reward total was -19.000000. running mean: -19.871436
episode 4088.000000, reward total was -21.000000. running mean: -19.882722
episode 4089.000000, reward total was -20.000000. running mean: -19.883895
episode 4090.000000, reward total was -18.000000. running mean: -19.865056
episode 4091.000000, reward total was -20.000000. running mean: -19.866405
episode 4092.000000, reward total was -21.000000. running mean: -19.877741
episode 4093.000000, reward total was -20.000000. running mean: -19.878964
episode 4094.000000, reward total was -19.000000. running mean: -19.870174
episode 4095.000000, rewa

episode 4192.000000, reward total was -19.000000. running mean: -19.914437
episode 4193.000000, reward total was -20.000000. running mean: -19.915293
episode 4194.000000, reward total was -18.000000. running mean: -19.896140
episode 4195.000000, reward total was -21.000000. running mean: -19.907179
episode 4196.000000, reward total was -21.000000. running mean: -19.918107
episode 4197.000000, reward total was -21.000000. running mean: -19.928926
episode 4198.000000, reward total was -19.000000. running mean: -19.919637
episode 4199.000000, reward total was -21.000000. running mean: -19.930440
episode 4200.000000, reward total was -19.000000. running mean: -19.921136
episode 4201.000000, reward total was -19.000000. running mean: -19.911924
episode 4202.000000, reward total was -21.000000. running mean: -19.922805
episode 4203.000000, reward total was -19.000000. running mean: -19.913577
episode 4204.000000, reward total was -19.000000. running mean: -19.904441
episode 4205.000000, rewa

episode 4302.000000, reward total was -19.000000. running mean: -19.879475
episode 4303.000000, reward total was -19.000000. running mean: -19.870680
episode 4304.000000, reward total was -21.000000. running mean: -19.881973
episode 4305.000000, reward total was -16.000000. running mean: -19.843153
episode 4306.000000, reward total was -20.000000. running mean: -19.844722
episode 4307.000000, reward total was -21.000000. running mean: -19.856275
episode 4308.000000, reward total was -21.000000. running mean: -19.867712
episode 4309.000000, reward total was -19.000000. running mean: -19.859035
episode 4310.000000, reward total was -20.000000. running mean: -19.860444
episode 4311.000000, reward total was -19.000000. running mean: -19.851840
episode 4312.000000, reward total was -21.000000. running mean: -19.863322
episode 4313.000000, reward total was -21.000000. running mean: -19.874688
episode 4314.000000, reward total was -21.000000. running mean: -19.885941
episode 4315.000000, rewa

episode 4412.000000, reward total was -21.000000. running mean: -19.865566
episode 4413.000000, reward total was -18.000000. running mean: -19.846910
episode 4414.000000, reward total was -20.000000. running mean: -19.848441
episode 4415.000000, reward total was -21.000000. running mean: -19.859957
episode 4416.000000, reward total was -20.000000. running mean: -19.861357
episode 4417.000000, reward total was -20.000000. running mean: -19.862743
episode 4418.000000, reward total was -18.000000. running mean: -19.844116
episode 4419.000000, reward total was -17.000000. running mean: -19.815675
episode 4420.000000, reward total was -21.000000. running mean: -19.827518
episode 4421.000000, reward total was -21.000000. running mean: -19.839243
episode 4422.000000, reward total was -21.000000. running mean: -19.850851
episode 4423.000000, reward total was -21.000000. running mean: -19.862342
episode 4424.000000, reward total was -19.000000. running mean: -19.853719
episode 4425.000000, rewa

episode 4522.000000, reward total was -20.000000. running mean: -19.814645
episode 4523.000000, reward total was -20.000000. running mean: -19.816499
episode 4524.000000, reward total was -20.000000. running mean: -19.818334
episode 4525.000000, reward total was -21.000000. running mean: -19.830150
episode 4526.000000, reward total was -20.000000. running mean: -19.831849
episode 4527.000000, reward total was -17.000000. running mean: -19.803530
episode 4528.000000, reward total was -21.000000. running mean: -19.815495
episode 4529.000000, reward total was -20.000000. running mean: -19.817340
episode 4530.000000, reward total was -20.000000. running mean: -19.819167
episode 4531.000000, reward total was -21.000000. running mean: -19.830975
episode 4532.000000, reward total was -20.000000. running mean: -19.832665
episode 4533.000000, reward total was -21.000000. running mean: -19.844339
episode 4534.000000, reward total was -21.000000. running mean: -19.855895
episode 4535.000000, rewa

episode 4632.000000, reward total was -19.000000. running mean: -19.701039
episode 4633.000000, reward total was -20.000000. running mean: -19.704029
episode 4634.000000, reward total was -21.000000. running mean: -19.716989
episode 4635.000000, reward total was -19.000000. running mean: -19.709819
episode 4636.000000, reward total was -20.000000. running mean: -19.712721
episode 4637.000000, reward total was -20.000000. running mean: -19.715593
episode 4638.000000, reward total was -16.000000. running mean: -19.678437
episode 4639.000000, reward total was -19.000000. running mean: -19.671653
episode 4640.000000, reward total was -20.000000. running mean: -19.674937
episode 4641.000000, reward total was -17.000000. running mean: -19.648187
episode 4642.000000, reward total was -20.000000. running mean: -19.651705
episode 4643.000000, reward total was -18.000000. running mean: -19.635188
episode 4644.000000, reward total was -19.000000. running mean: -19.628836
episode 4645.000000, rewa

episode 4742.000000, reward total was -17.000000. running mean: -19.663677
episode 4743.000000, reward total was -19.000000. running mean: -19.657040
episode 4744.000000, reward total was -20.000000. running mean: -19.660470
episode 4745.000000, reward total was -19.000000. running mean: -19.653865
episode 4746.000000, reward total was -21.000000. running mean: -19.667326
episode 4747.000000, reward total was -19.000000. running mean: -19.660653
episode 4748.000000, reward total was -20.000000. running mean: -19.664047
episode 4749.000000, reward total was -21.000000. running mean: -19.677406
episode 4750.000000, reward total was -21.000000. running mean: -19.690632
episode 4751.000000, reward total was -20.000000. running mean: -19.693726
episode 4752.000000, reward total was -20.000000. running mean: -19.696788
episode 4753.000000, reward total was -19.000000. running mean: -19.689821
episode 4754.000000, reward total was -21.000000. running mean: -19.702922
episode 4755.000000, rewa

episode 4852.000000, reward total was -18.000000. running mean: -19.605758
episode 4853.000000, reward total was -19.000000. running mean: -19.599700
episode 4854.000000, reward total was -20.000000. running mean: -19.603703
episode 4855.000000, reward total was -19.000000. running mean: -19.597666
episode 4856.000000, reward total was -19.000000. running mean: -19.591689
episode 4857.000000, reward total was -18.000000. running mean: -19.575773
episode 4858.000000, reward total was -20.000000. running mean: -19.580015
episode 4859.000000, reward total was -20.000000. running mean: -19.584215
episode 4860.000000, reward total was -21.000000. running mean: -19.598373
episode 4861.000000, reward total was -18.000000. running mean: -19.582389
episode 4862.000000, reward total was -20.000000. running mean: -19.586565
episode 4863.000000, reward total was -21.000000. running mean: -19.600699
episode 4864.000000, reward total was -18.000000. running mean: -19.584692
episode 4865.000000, rewa

episode 4962.000000, reward total was -21.000000. running mean: -19.594713
episode 4963.000000, reward total was -20.000000. running mean: -19.598766
episode 4964.000000, reward total was -21.000000. running mean: -19.612778
episode 4965.000000, reward total was -20.000000. running mean: -19.616651
episode 4966.000000, reward total was -18.000000. running mean: -19.600484
episode 4967.000000, reward total was -19.000000. running mean: -19.594479
episode 4968.000000, reward total was -21.000000. running mean: -19.608534
episode 4969.000000, reward total was -21.000000. running mean: -19.622449
episode 4970.000000, reward total was -15.000000. running mean: -19.576225
episode 4971.000000, reward total was -20.000000. running mean: -19.580462
episode 4972.000000, reward total was -17.000000. running mean: -19.554658
episode 4973.000000, reward total was -21.000000. running mean: -19.569111
episode 4974.000000, reward total was -21.000000. running mean: -19.583420
episode 4975.000000, rewa

episode 5072.000000, reward total was -17.000000. running mean: -19.429430
episode 5073.000000, reward total was -19.000000. running mean: -19.425135
episode 5074.000000, reward total was -21.000000. running mean: -19.440884
episode 5075.000000, reward total was -20.000000. running mean: -19.446475
episode 5076.000000, reward total was -21.000000. running mean: -19.462010
episode 5077.000000, reward total was -19.000000. running mean: -19.457390
episode 5078.000000, reward total was -20.000000. running mean: -19.462816
episode 5079.000000, reward total was -19.000000. running mean: -19.458188
episode 5080.000000, reward total was -18.000000. running mean: -19.443606
episode 5081.000000, reward total was -18.000000. running mean: -19.429170
episode 5082.000000, reward total was -19.000000. running mean: -19.424879
episode 5083.000000, reward total was -21.000000. running mean: -19.440630
episode 5084.000000, reward total was -21.000000. running mean: -19.456224
episode 5085.000000, rewa

episode 5182.000000, reward total was -20.000000. running mean: -19.495765
episode 5183.000000, reward total was -19.000000. running mean: -19.490808
episode 5184.000000, reward total was -18.000000. running mean: -19.475899
episode 5185.000000, reward total was -20.000000. running mean: -19.481140
episode 5186.000000, reward total was -21.000000. running mean: -19.496329
episode 5187.000000, reward total was -19.000000. running mean: -19.491366
episode 5188.000000, reward total was -21.000000. running mean: -19.506452
episode 5189.000000, reward total was -19.000000. running mean: -19.501388
episode 5190.000000, reward total was -19.000000. running mean: -19.496374
episode 5191.000000, reward total was -21.000000. running mean: -19.511410
episode 5192.000000, reward total was -19.000000. running mean: -19.506296
episode 5193.000000, reward total was -21.000000. running mean: -19.521233
episode 5194.000000, reward total was -21.000000. running mean: -19.536021
episode 5195.000000, rewa

episode 5292.000000, reward total was -19.000000. running mean: -19.546010
episode 5293.000000, reward total was -20.000000. running mean: -19.550550
episode 5294.000000, reward total was -16.000000. running mean: -19.515044
episode 5295.000000, reward total was -19.000000. running mean: -19.509894
episode 5296.000000, reward total was -21.000000. running mean: -19.524795
episode 5297.000000, reward total was -20.000000. running mean: -19.529547
episode 5298.000000, reward total was -20.000000. running mean: -19.534251
episode 5299.000000, reward total was -17.000000. running mean: -19.508909
episode 5300.000000, reward total was -19.000000. running mean: -19.503820
episode 5301.000000, reward total was -19.000000. running mean: -19.498782
episode 5302.000000, reward total was -19.000000. running mean: -19.493794
episode 5303.000000, reward total was -17.000000. running mean: -19.468856
episode 5304.000000, reward total was -20.000000. running mean: -19.474167
episode 5305.000000, rewa

episode 5402.000000, reward total was -21.000000. running mean: -19.474057
episode 5403.000000, reward total was -17.000000. running mean: -19.449316
episode 5404.000000, reward total was -19.000000. running mean: -19.444823
episode 5405.000000, reward total was -20.000000. running mean: -19.450375
episode 5406.000000, reward total was -21.000000. running mean: -19.465871
episode 5407.000000, reward total was -19.000000. running mean: -19.461212
episode 5408.000000, reward total was -21.000000. running mean: -19.476600
episode 5409.000000, reward total was -15.000000. running mean: -19.431834
episode 5410.000000, reward total was -18.000000. running mean: -19.417516
episode 5411.000000, reward total was -21.000000. running mean: -19.433341
episode 5412.000000, reward total was -21.000000. running mean: -19.449007
episode 5413.000000, reward total was -21.000000. running mean: -19.464517
episode 5414.000000, reward total was -19.000000. running mean: -19.459872
episode 5415.000000, rewa

episode 5512.000000, reward total was -19.000000. running mean: -19.486661
episode 5513.000000, reward total was -19.000000. running mean: -19.481794
episode 5514.000000, reward total was -18.000000. running mean: -19.466976
episode 5515.000000, reward total was -21.000000. running mean: -19.482306
episode 5516.000000, reward total was -19.000000. running mean: -19.477483
episode 5517.000000, reward total was -21.000000. running mean: -19.492708
episode 5518.000000, reward total was -18.000000. running mean: -19.477781
episode 5519.000000, reward total was -20.000000. running mean: -19.483004
episode 5520.000000, reward total was -20.000000. running mean: -19.488174
episode 5521.000000, reward total was -21.000000. running mean: -19.503292
episode 5522.000000, reward total was -17.000000. running mean: -19.478259
episode 5523.000000, reward total was -20.000000. running mean: -19.483476
episode 5524.000000, reward total was -20.000000. running mean: -19.488642
episode 5525.000000, rewa

episode 5622.000000, reward total was -20.000000. running mean: -19.531607
episode 5623.000000, reward total was -20.000000. running mean: -19.536291
episode 5624.000000, reward total was -21.000000. running mean: -19.550928
episode 5625.000000, reward total was -19.000000. running mean: -19.545418
episode 5626.000000, reward total was -20.000000. running mean: -19.549964
episode 5627.000000, reward total was -17.000000. running mean: -19.524465
episode 5628.000000, reward total was -20.000000. running mean: -19.529220
episode 5629.000000, reward total was -20.000000. running mean: -19.533928
episode 5630.000000, reward total was -21.000000. running mean: -19.548588
episode 5631.000000, reward total was -20.000000. running mean: -19.553103
episode 5632.000000, reward total was -21.000000. running mean: -19.567572
episode 5633.000000, reward total was -21.000000. running mean: -19.581896
episode 5634.000000, reward total was -20.000000. running mean: -19.586077
episode 5635.000000, rewa

episode 5732.000000, reward total was -17.000000. running mean: -19.489237
episode 5733.000000, reward total was -20.000000. running mean: -19.494344
episode 5734.000000, reward total was -19.000000. running mean: -19.489401
episode 5735.000000, reward total was -20.000000. running mean: -19.494507
episode 5736.000000, reward total was -17.000000. running mean: -19.469562
episode 5737.000000, reward total was -19.000000. running mean: -19.464866
episode 5738.000000, reward total was -19.000000. running mean: -19.460218
episode 5739.000000, reward total was -21.000000. running mean: -19.475615
episode 5740.000000, reward total was -21.000000. running mean: -19.490859
episode 5741.000000, reward total was -20.000000. running mean: -19.495951
episode 5742.000000, reward total was -16.000000. running mean: -19.460991
episode 5743.000000, reward total was -20.000000. running mean: -19.466381
episode 5744.000000, reward total was -20.000000. running mean: -19.471717
episode 5745.000000, rewa

episode 5842.000000, reward total was -21.000000. running mean: -19.506252
episode 5843.000000, reward total was -19.000000. running mean: -19.501190
episode 5844.000000, reward total was -21.000000. running mean: -19.516178
episode 5845.000000, reward total was -17.000000. running mean: -19.491016
episode 5846.000000, reward total was -18.000000. running mean: -19.476106
episode 5847.000000, reward total was -19.000000. running mean: -19.471345
episode 5848.000000, reward total was -19.000000. running mean: -19.466631
episode 5849.000000, reward total was -15.000000. running mean: -19.421965
episode 5850.000000, reward total was -19.000000. running mean: -19.417746
episode 5851.000000, reward total was -18.000000. running mean: -19.403568
episode 5852.000000, reward total was -19.000000. running mean: -19.399532
episode 5853.000000, reward total was -19.000000. running mean: -19.395537
episode 5854.000000, reward total was -19.000000. running mean: -19.391582
episode 5855.000000, rewa

episode 5952.000000, reward total was -17.000000. running mean: -19.421064
episode 5953.000000, reward total was -21.000000. running mean: -19.436854
episode 5954.000000, reward total was -21.000000. running mean: -19.452485
episode 5955.000000, reward total was -18.000000. running mean: -19.437960
episode 5956.000000, reward total was -19.000000. running mean: -19.433581
episode 5957.000000, reward total was -20.000000. running mean: -19.439245
episode 5958.000000, reward total was -21.000000. running mean: -19.454852
episode 5959.000000, reward total was -21.000000. running mean: -19.470304
episode 5960.000000, reward total was -18.000000. running mean: -19.455601
episode 5961.000000, reward total was -21.000000. running mean: -19.471045
episode 5962.000000, reward total was -19.000000. running mean: -19.466334
episode 5963.000000, reward total was -21.000000. running mean: -19.481671
episode 5964.000000, reward total was -20.000000. running mean: -19.486854
episode 5965.000000, rewa

episode 6062.000000, reward total was -19.000000. running mean: -19.390045
episode 6063.000000, reward total was -20.000000. running mean: -19.396145
episode 6064.000000, reward total was -21.000000. running mean: -19.412183
episode 6065.000000, reward total was -17.000000. running mean: -19.388061
episode 6066.000000, reward total was -18.000000. running mean: -19.374181
episode 6067.000000, reward total was -20.000000. running mean: -19.380439
episode 6068.000000, reward total was -19.000000. running mean: -19.376634
episode 6069.000000, reward total was -19.000000. running mean: -19.372868
episode 6070.000000, reward total was -17.000000. running mean: -19.349139
episode 6071.000000, reward total was -18.000000. running mean: -19.335648
episode 6072.000000, reward total was -17.000000. running mean: -19.312292
episode 6073.000000, reward total was -19.000000. running mean: -19.309169
episode 6074.000000, reward total was -21.000000. running mean: -19.326077
episode 6075.000000, rewa

episode 6172.000000, reward total was -21.000000. running mean: -19.267597
episode 6173.000000, reward total was -19.000000. running mean: -19.264921
episode 6174.000000, reward total was -14.000000. running mean: -19.212272
episode 6175.000000, reward total was -17.000000. running mean: -19.190150
episode 6176.000000, reward total was -19.000000. running mean: -19.188248
episode 6177.000000, reward total was -21.000000. running mean: -19.206366
episode 6178.000000, reward total was -20.000000. running mean: -19.214302
episode 6179.000000, reward total was -20.000000. running mean: -19.222159
episode 6180.000000, reward total was -20.000000. running mean: -19.229937
episode 6181.000000, reward total was -19.000000. running mean: -19.227638
episode 6182.000000, reward total was -18.000000. running mean: -19.215362
episode 6183.000000, reward total was -17.000000. running mean: -19.193208
episode 6184.000000, reward total was -21.000000. running mean: -19.211276
episode 6185.000000, rewa

episode 6282.000000, reward total was -19.000000. running mean: -19.491021
episode 6283.000000, reward total was -21.000000. running mean: -19.506111
episode 6284.000000, reward total was -19.000000. running mean: -19.501050
episode 6285.000000, reward total was -19.000000. running mean: -19.496039
episode 6286.000000, reward total was -19.000000. running mean: -19.491079
episode 6287.000000, reward total was -16.000000. running mean: -19.456168
episode 6288.000000, reward total was -19.000000. running mean: -19.451606
episode 6289.000000, reward total was -20.000000. running mean: -19.457090
episode 6290.000000, reward total was -17.000000. running mean: -19.432519
episode 6291.000000, reward total was -19.000000. running mean: -19.428194
episode 6292.000000, reward total was -19.000000. running mean: -19.423912
episode 6293.000000, reward total was -21.000000. running mean: -19.439673
episode 6294.000000, reward total was -20.000000. running mean: -19.445276
episode 6295.000000, rewa

episode 6392.000000, reward total was -19.000000. running mean: -19.312383
episode 6393.000000, reward total was -17.000000. running mean: -19.289259
episode 6394.000000, reward total was -18.000000. running mean: -19.276366
episode 6395.000000, reward total was -19.000000. running mean: -19.273603
episode 6396.000000, reward total was -21.000000. running mean: -19.290866
episode 6397.000000, reward total was -19.000000. running mean: -19.287958
episode 6398.000000, reward total was -21.000000. running mean: -19.305078
episode 6399.000000, reward total was -16.000000. running mean: -19.272027
episode 6400.000000, reward total was -18.000000. running mean: -19.259307
episode 6401.000000, reward total was -19.000000. running mean: -19.256714
episode 6402.000000, reward total was -21.000000. running mean: -19.274147
episode 6403.000000, reward total was -20.000000. running mean: -19.281406
episode 6404.000000, reward total was -17.000000. running mean: -19.258591
episode 6405.000000, rewa

episode 6502.000000, reward total was -17.000000. running mean: -19.074223
episode 6503.000000, reward total was -21.000000. running mean: -19.093481
episode 6504.000000, reward total was -21.000000. running mean: -19.112546
episode 6505.000000, reward total was -17.000000. running mean: -19.091421
episode 6506.000000, reward total was -20.000000. running mean: -19.100506
episode 6507.000000, reward total was -19.000000. running mean: -19.099501
episode 6508.000000, reward total was -18.000000. running mean: -19.088506
episode 6509.000000, reward total was -20.000000. running mean: -19.097621
episode 6510.000000, reward total was -21.000000. running mean: -19.116645
episode 6511.000000, reward total was -18.000000. running mean: -19.105479
episode 6512.000000, reward total was -18.000000. running mean: -19.094424
episode 6513.000000, reward total was -21.000000. running mean: -19.113480
episode 6514.000000, reward total was -19.000000. running mean: -19.112345
episode 6515.000000, rewa

episode 6612.000000, reward total was -19.000000. running mean: -19.265953
episode 6613.000000, reward total was -19.000000. running mean: -19.263294
episode 6614.000000, reward total was -20.000000. running mean: -19.270661
episode 6615.000000, reward total was -18.000000. running mean: -19.257954
episode 6616.000000, reward total was -18.000000. running mean: -19.245375
episode 6617.000000, reward total was -19.000000. running mean: -19.242921
episode 6618.000000, reward total was -20.000000. running mean: -19.250492
episode 6619.000000, reward total was -15.000000. running mean: -19.207987
episode 6620.000000, reward total was -19.000000. running mean: -19.205907
episode 6621.000000, reward total was -20.000000. running mean: -19.213848
episode 6622.000000, reward total was -20.000000. running mean: -19.221709
episode 6623.000000, reward total was -19.000000. running mean: -19.219492
episode 6624.000000, reward total was -20.000000. running mean: -19.227297
episode 6625.000000, rewa

episode 6722.000000, reward total was -21.000000. running mean: -19.304957
episode 6723.000000, reward total was -21.000000. running mean: -19.321907
episode 6724.000000, reward total was -19.000000. running mean: -19.318688
episode 6725.000000, reward total was -20.000000. running mean: -19.325501
episode 6726.000000, reward total was -21.000000. running mean: -19.342246
episode 6727.000000, reward total was -18.000000. running mean: -19.328824
episode 6728.000000, reward total was -21.000000. running mean: -19.345536
episode 6729.000000, reward total was -15.000000. running mean: -19.302080
episode 6730.000000, reward total was -19.000000. running mean: -19.299060
episode 6731.000000, reward total was -19.000000. running mean: -19.296069
episode 6732.000000, reward total was -18.000000. running mean: -19.283108
episode 6733.000000, reward total was -20.000000. running mean: -19.290277
episode 6734.000000, reward total was -20.000000. running mean: -19.297374
episode 6735.000000, rewa

episode 6832.000000, reward total was -20.000000. running mean: -19.157850
episode 6833.000000, reward total was -17.000000. running mean: -19.136271
episode 6834.000000, reward total was -21.000000. running mean: -19.154908
episode 6835.000000, reward total was -19.000000. running mean: -19.153359
episode 6836.000000, reward total was -19.000000. running mean: -19.151826
episode 6837.000000, reward total was -19.000000. running mean: -19.150307
episode 6838.000000, reward total was -19.000000. running mean: -19.148804
episode 6839.000000, reward total was -20.000000. running mean: -19.157316
episode 6840.000000, reward total was -19.000000. running mean: -19.155743
episode 6841.000000, reward total was -20.000000. running mean: -19.164186
episode 6842.000000, reward total was -20.000000. running mean: -19.172544
episode 6843.000000, reward total was -19.000000. running mean: -19.170818
episode 6844.000000, reward total was -18.000000. running mean: -19.159110
episode 6845.000000, rewa

episode 6942.000000, reward total was -20.000000. running mean: -18.985767
episode 6943.000000, reward total was -21.000000. running mean: -19.005909
episode 6944.000000, reward total was -17.000000. running mean: -18.985850
episode 6945.000000, reward total was -21.000000. running mean: -19.005992
episode 6946.000000, reward total was -20.000000. running mean: -19.015932
episode 6947.000000, reward total was -19.000000. running mean: -19.015773
episode 6948.000000, reward total was -21.000000. running mean: -19.035615
episode 6949.000000, reward total was -17.000000. running mean: -19.015259
episode 6950.000000, reward total was -20.000000. running mean: -19.025106
episode 6951.000000, reward total was -21.000000. running mean: -19.044855
episode 6952.000000, reward total was -19.000000. running mean: -19.044407
episode 6953.000000, reward total was -18.000000. running mean: -19.033963
episode 6954.000000, reward total was -20.000000. running mean: -19.043623
episode 6955.000000, rewa