In [None]:
from __future__ import print_function
import gym
import os
import numpy as np
import tensorflow as tf
import pickle
import gym.spaces

# ========================================
# Gym Loads the game
# ========================================
size = 80
gamma = 0.99
decayRate = 0.99
global_step = tf.Variable(0, trainable=False)
start_learning_rate = 5e-3

def preProcess(obs):
    obs = obs[35:195]
    obs = obs[::2, ::2, 0]
    obs[obs == 144] = 0
    obs[obs == 109] = 0
    obs[obs!=0] = 1
    return np.reshape(obs.astype(np.float).ravel(), (size*size, 1))

def discountRewards(r):
    """ take 1D float array of rewards and compute discounted reward """
    discounted_r = np.zeros_like(r)
    running_add = 0
    for t in reversed(range(0, r.size)):
        if r[t] != 0: running_add = 0 # reset the sum, since this was a game boundary (pong specific!)
        running_add = running_add * gamma + r[t]
        discounted_r[t] = running_add
    return discounted_r

hiddenSize = 200
env = gym.make("Pong-v0")
obs = env.reset()
lastObs = np.zeros((size*size, 1))
prevObs = tf.placeholder(tf.float32, shape=(size**2, 1))
episodeCount = 0
curReward = 0
rewardSum = 0
batchSize = 10
render = False
runningReward = None
gradBuffer1 = np.zeros([hiddenSize, size**2]) / np.sqrt(size**2)
gradBuffer2 = np.zeros([hiddenSize, 1]) / np.sqrt(hiddenSize)
rmspropCache1 = np.zeros([hiddenSize, size**2]) / np.sqrt(size**2)
rmspropCache2 = np.zeros([hiddenSize, 1]) / np.sqrt(hiddenSize)

curObs = tf.placeholder(tf.float32, shape=(size**2, 1))
# modifiedObs = tf.placeholder(tf.zeros([size*size, 1]), dtype = float)
# curHidden = tf.zeros([hiddenSize, 1])

episodicObs, episodicHidden = [], []
errorHist, rewardHist = [], []

w1 = tf.Variable(tf.truncated_normal(shape=[size**2, hiddenSize], mean=0.0, stddev=0.02))
w2 = tf.Variable(tf.truncated_normal(shape=[hiddenSize, 1], mean=0.0, stddev=0.02))

Hiddens = tf.placeholder(tf.float32, shape=[None, hiddenSize])
Gradients = tf.placeholder(tf.float32, shape=[None, 1])
Observations = tf.placeholder(tf.float32, shape=[None, size**2])
grad1 = tf.placeholder(tf.float32, shape=[hiddenSize, size**2])
Cache1 = tf.placeholder(tf.float32, shape=[hiddenSize, size**2])
grad2 = tf.placeholder(tf.float32, shape=[hiddenSize, 1])
Cache2 = tf.placeholder(tf.float32, shape=[hiddenSize, 1])

learning_rate = tf.train.exponential_decay(start_learning_rate, global_step,
                                           100000, 0.96, staircase=True)
modifiedObs = tf.subtract(curObs, prevObs)

curHidden = tf.transpose(tf.matmul(tf.transpose(modifiedObs), w1))
layer1 = tf.nn.relu(curHidden)
out = tf.sigmoid(tf.matmul(tf.transpose(layer1), w2))

deltaWeights2 = tf.matmul(tf.transpose(Hiddens), Gradients)
deltaH = tf.nn.relu(tf.matmul(Gradients, tf.transpose(w2)))
deltaWeights1 = tf.transpose(tf.matmul(tf.transpose(Observations), deltaH))

weights1Update = tf.assign_add(w1, tf.transpose(learning_rate * grad1 / (tf.sqrt(Cache1) + 1e-5)))
weights2Update = tf.assign_add(w2, learning_rate * grad2 / (tf.sqrt(Cache2) + 1e-5))

saver = tf.train.Saver({'w1':w1, 'w2':w2})

sess = tf.InteractiveSession()
sess.run(tf.initialize_all_variables())

if os.path.exists('./pongWeights.ckpt'):
    saver.restore(sess, './pongWeights.ckpt')
    print("Weights file found. Restoring weights from file...")


while True:
    if render:
        env.render()

    obs = preProcess(obs)
    #for i in range(size**2):
    #     if obs[i] != 0:
    #        print(i)
    feed_dict = {curObs: obs, prevObs: lastObs}
    outProb, hid, modObs = sess.run([out, layer1, modifiedObs], feed_dict = feed_dict)
    # for i in range(size**2):
    #    if modObs[i]!=0:
    #       print(i)

    # print(outProb)
    action = 2 if np.random.uniform() < outProb else 3
    # print(action)
    lastObs = obs

    episodicObs.append(modObs)
    episodicHidden.append(hid)
    y = 1 if action == 2 else 0
    errorHist.append(y - outProb)

    obs, reward, done, info = env.step(action)
    rewardSum += reward
    rewardHist.append(reward)

    # print(i)
    if done:
        episodicHidden = np.reshape(episodicHidden, (len(episodicHidden), hiddenSize))
        episodicObs = np.reshape(episodicObs, (len(episodicObs), size**2))
        episodeCount += 1
        print("episode over:", episodeCount)
        allObs = np.vstack(episodicObs)
        allHidden = np.vstack(episodicHidden)
        print(len(errorHist), len(errorHist[0]))
        allErrors = np.vstack(errorHist)
        allRewards = np.vstack(rewardHist)


        discountedRewards = discountRewards (allRewards);
        discountedRewards -= np.mean (discountedRewards);
        discountedRewards /= np.std (discountedRewards);

        allErrors *= discountedRewards;
        # print(allErrors)
        delWeights1, delWeights2 = sess.run([deltaWeights1, deltaWeights2],feed_dict = {Hiddens: episodicHidden, Gradients: allErrors,Observations: episodicObs})
        # ct = 0
        # for i in range(hiddenSize):
        #    if delWeights1[5][i] != 0:
        #       ct += 1
        # print(delWeights1[5])
        # print(ct)
        gradBuffer1 += delWeights1
        gradBuffer2 += delWeights2

        episodicObs, episodicHidden, errorHist, rewardHist = [], [], [], []

        if episodeCount % batchSize == 0:
            rmspropCache1 = decayRate * rmspropCache1 + (1 - decayRate) * gradBuffer1 ** 2
            rmspropCache2 = decayRate * rmspropCache2 + (1 - decayRate) * gradBuffer2 ** 2
            w1_upd, w2_upd = sess.run([weights1Update, weights2Update],feed_dict = {grad1: gradBuffer1, grad2: gradBuffer2,Cache1: rmspropCache1, Cache2: rmspropCache2})
            # print(w1.eval()[0])
            gradBuffer1 = np.zeros_like(gradBuffer1)
            gradBuffer2 = np.zeros_like(gradBuffer2)
            saver.save(sess, './pongWeights.ckpt')

        runningReward = rewardSum if runningReward is None else runningReward * 0.99 + rewardSum * 0.01
        print('resetting env. episode reward total was %f. running mean: %f' % (rewardSum, runningReward))
        # if episodeCount % 10 == 0: pickle.dump(, open('save.p', 'wb'))
        rewardSum = 0
        obs = env.reset() # reset env
        lastObs = np.zeros([size**2, 1])

    if reward != 0: # Pong has either +1 or -1 reward exactly when game ends.
        print ('ep %d: game finished, reward: %f' % (episodeCount, reward))
# obs, reward, done, info = env.step(action)



Instructions for updating:
Use `tf.global_variables_initializer` instead.
ep 0: game finished, reward: -1.000000
ep 0: game finished, reward: -1.000000
ep 0: game finished, reward: -1.000000
ep 0: game finished, reward: -1.000000
ep 0: game finished, reward: -1.000000
ep 0: game finished, reward: -1.000000
ep 0: game finished, reward: -1.000000
ep 0: game finished, reward: -1.000000
ep 0: game finished, reward: -1.000000
ep 0: game finished, reward: -1.000000
ep 0: game finished, reward: -1.000000
ep 0: game finished, reward: -1.000000
ep 0: game finished, reward: -1.000000
ep 0: game finished, reward: -1.000000
ep 0: game finished, reward: -1.000000
ep 0: game finished, reward: -1.000000
ep 0: game finished, reward: -1.000000
ep 0: game finished, reward: -1.000000
ep 0: game finished, reward: -1.000000
ep 0: game finished, reward: -1.000000
episode over: 1
1252 1
resetting env. episode reward total was -21.000000. running mean: -21.000000
ep 1: game finished, reward: -1.000000
ep 1: g

ep 8: game finished, reward: -1.000000
ep 8: game finished, reward: -1.000000
ep 8: game finished, reward: -1.000000
ep 8: game finished, reward: -1.000000
ep 8: game finished, reward: -1.000000
ep 8: game finished, reward: -1.000000
ep 8: game finished, reward: -1.000000
ep 8: game finished, reward: -1.000000
ep 8: game finished, reward: -1.000000
ep 8: game finished, reward: -1.000000
ep 8: game finished, reward: -1.000000
ep 8: game finished, reward: 1.000000
ep 8: game finished, reward: -1.000000
ep 8: game finished, reward: -1.000000
episode over: 9
1220 1
resetting env. episode reward total was -20.000000. running mean: -20.932841
ep 9: game finished, reward: -1.000000
ep 9: game finished, reward: -1.000000
ep 9: game finished, reward: -1.000000
ep 9: game finished, reward: -1.000000
ep 9: game finished, reward: -1.000000
ep 9: game finished, reward: -1.000000
ep 9: game finished, reward: -1.000000
ep 9: game finished, reward: -1.000000
ep 9: game finished, reward: -1.000000
ep 9

ep 17: game finished, reward: -1.000000
ep 17: game finished, reward: -1.000000
ep 17: game finished, reward: -1.000000
ep 17: game finished, reward: -1.000000
ep 17: game finished, reward: -1.000000
ep 17: game finished, reward: -1.000000
ep 17: game finished, reward: -1.000000
ep 17: game finished, reward: -1.000000
ep 17: game finished, reward: -1.000000
ep 17: game finished, reward: -1.000000
ep 17: game finished, reward: -1.000000
ep 17: game finished, reward: -1.000000
ep 17: game finished, reward: -1.000000
ep 17: game finished, reward: -1.000000
ep 17: game finished, reward: -1.000000
ep 17: game finished, reward: -1.000000
ep 17: game finished, reward: -1.000000
ep 17: game finished, reward: -1.000000
ep 17: game finished, reward: -1.000000
ep 17: game finished, reward: -1.000000
episode over: 18
1108 1
resetting env. episode reward total was -21.000000. running mean: -20.920194
ep 18: game finished, reward: -1.000000
ep 18: game finished, reward: -1.000000
ep 18: game finishe

ep 25: game finished, reward: -1.000000
ep 25: game finished, reward: -1.000000
ep 25: game finished, reward: -1.000000
episode over: 26
1096 1
resetting env. episode reward total was -21.000000. running mean: -20.926359
ep 26: game finished, reward: -1.000000
ep 26: game finished, reward: -1.000000
ep 26: game finished, reward: -1.000000
ep 26: game finished, reward: -1.000000
ep 26: game finished, reward: -1.000000
ep 26: game finished, reward: -1.000000
ep 26: game finished, reward: -1.000000
ep 26: game finished, reward: -1.000000
ep 26: game finished, reward: -1.000000
ep 26: game finished, reward: -1.000000
ep 26: game finished, reward: -1.000000
ep 26: game finished, reward: -1.000000
ep 26: game finished, reward: -1.000000
ep 26: game finished, reward: -1.000000
ep 26: game finished, reward: -1.000000
ep 26: game finished, reward: -1.000000
ep 26: game finished, reward: -1.000000
ep 26: game finished, reward: -1.000000
ep 26: game finished, reward: -1.000000
ep 26: game finishe

ep 34: game finished, reward: -1.000000
ep 34: game finished, reward: -1.000000
ep 34: game finished, reward: -1.000000
ep 34: game finished, reward: -1.000000
ep 34: game finished, reward: -1.000000
ep 34: game finished, reward: -1.000000
ep 34: game finished, reward: -1.000000
ep 34: game finished, reward: -1.000000
episode over: 35
1164 1
resetting env. episode reward total was -21.000000. running mean: -20.932728
ep 35: game finished, reward: -1.000000
ep 35: game finished, reward: -1.000000
ep 35: game finished, reward: -1.000000
ep 35: game finished, reward: -1.000000
ep 35: game finished, reward: -1.000000
ep 35: game finished, reward: -1.000000
ep 35: game finished, reward: -1.000000
ep 35: game finished, reward: -1.000000
ep 35: game finished, reward: -1.000000
ep 35: game finished, reward: -1.000000
ep 35: game finished, reward: -1.000000
ep 35: game finished, reward: -1.000000
ep 35: game finished, reward: -1.000000
ep 35: game finished, reward: -1.000000
ep 35: game finishe

ep 43: game finished, reward: -1.000000
ep 43: game finished, reward: -1.000000
ep 43: game finished, reward: -1.000000
ep 43: game finished, reward: -1.000000
ep 43: game finished, reward: -1.000000
ep 43: game finished, reward: -1.000000
ep 43: game finished, reward: -1.000000
ep 43: game finished, reward: -1.000000
ep 43: game finished, reward: -1.000000
ep 43: game finished, reward: -1.000000
ep 43: game finished, reward: -1.000000
ep 43: game finished, reward: -1.000000
ep 43: game finished, reward: -1.000000
ep 43: game finished, reward: -1.000000
episode over: 44
1098 1
resetting env. episode reward total was -21.000000. running mean: -20.938546
ep 44: game finished, reward: -1.000000
ep 44: game finished, reward: -1.000000
ep 44: game finished, reward: -1.000000
ep 44: game finished, reward: -1.000000
ep 44: game finished, reward: -1.000000
ep 44: game finished, reward: -1.000000
ep 44: game finished, reward: -1.000000
ep 44: game finished, reward: -1.000000
ep 44: game finishe

ep 52: game finished, reward: -1.000000
ep 52: game finished, reward: -1.000000
ep 52: game finished, reward: -1.000000
ep 52: game finished, reward: -1.000000
ep 52: game finished, reward: -1.000000
ep 52: game finished, reward: -1.000000
ep 52: game finished, reward: -1.000000
ep 52: game finished, reward: -1.000000
ep 52: game finished, reward: -1.000000
ep 52: game finished, reward: -1.000000
ep 52: game finished, reward: -1.000000
ep 52: game finished, reward: -1.000000
ep 52: game finished, reward: -1.000000
ep 52: game finished, reward: -1.000000
ep 52: game finished, reward: -1.000000
ep 52: game finished, reward: -1.000000
ep 52: game finished, reward: -1.000000
ep 52: game finished, reward: -1.000000
ep 52: game finished, reward: -1.000000
episode over: 53
1104 1
resetting env. episode reward total was -21.000000. running mean: -20.943861
ep 53: game finished, reward: -1.000000
ep 53: game finished, reward: -1.000000
ep 53: game finished, reward: -1.000000
ep 53: game finishe

ep 60: game finished, reward: -1.000000
ep 60: game finished, reward: -1.000000
episode over: 61
1086 1
resetting env. episode reward total was -21.000000. running mean: -20.948198
ep 61: game finished, reward: -1.000000
ep 61: game finished, reward: -1.000000
ep 61: game finished, reward: -1.000000
ep 61: game finished, reward: -1.000000
ep 61: game finished, reward: -1.000000
ep 61: game finished, reward: -1.000000
ep 61: game finished, reward: -1.000000
ep 61: game finished, reward: -1.000000
ep 61: game finished, reward: -1.000000
ep 61: game finished, reward: -1.000000
ep 61: game finished, reward: -1.000000
ep 61: game finished, reward: -1.000000
ep 61: game finished, reward: -1.000000
ep 61: game finished, reward: -1.000000
ep 61: game finished, reward: -1.000000
ep 61: game finished, reward: -1.000000
ep 61: game finished, reward: -1.000000
ep 61: game finished, reward: -1.000000
ep 61: game finished, reward: -1.000000
ep 61: game finished, reward: -1.000000
ep 61: game finishe

ep 69: game finished, reward: -1.000000
ep 69: game finished, reward: -1.000000
ep 69: game finished, reward: -1.000000
ep 69: game finished, reward: -1.000000
ep 69: game finished, reward: -1.000000
ep 69: game finished, reward: -1.000000
ep 69: game finished, reward: -1.000000
ep 69: game finished, reward: -1.000000
ep 69: game finished, reward: -1.000000
episode over: 70
1338 1
resetting env. episode reward total was -21.000000. running mean: -20.933657
ep 70: game finished, reward: -1.000000
ep 70: game finished, reward: -1.000000
ep 70: game finished, reward: -1.000000
ep 70: game finished, reward: -1.000000
ep 70: game finished, reward: -1.000000
ep 70: game finished, reward: -1.000000
ep 70: game finished, reward: -1.000000
ep 70: game finished, reward: -1.000000
ep 70: game finished, reward: -1.000000
ep 70: game finished, reward: -1.000000
ep 70: game finished, reward: -1.000000
ep 70: game finished, reward: -1.000000
ep 70: game finished, reward: -1.000000
ep 70: game finishe

ep 78: game finished, reward: -1.000000
ep 78: game finished, reward: -1.000000
ep 78: game finished, reward: -1.000000
ep 78: game finished, reward: -1.000000
ep 78: game finished, reward: -1.000000
ep 78: game finished, reward: -1.000000
ep 78: game finished, reward: -1.000000
ep 78: game finished, reward: -1.000000
ep 78: game finished, reward: -1.000000
ep 78: game finished, reward: -1.000000
ep 78: game finished, reward: -1.000000
ep 78: game finished, reward: -1.000000
ep 78: game finished, reward: -1.000000
ep 78: game finished, reward: -1.000000
ep 78: game finished, reward: -1.000000
episode over: 79
1115 1
resetting env. episode reward total was -21.000000. running mean: -20.939394
ep 79: game finished, reward: -1.000000
ep 79: game finished, reward: -1.000000
ep 79: game finished, reward: -1.000000
ep 79: game finished, reward: -1.000000
ep 79: game finished, reward: -1.000000
ep 79: game finished, reward: -1.000000
ep 79: game finished, reward: -1.000000
ep 79: game finishe

ep 87: game finished, reward: -1.000000
ep 87: game finished, reward: -1.000000
ep 87: game finished, reward: -1.000000
ep 87: game finished, reward: -1.000000
ep 87: game finished, reward: -1.000000
ep 87: game finished, reward: -1.000000
ep 87: game finished, reward: -1.000000
ep 87: game finished, reward: -1.000000
ep 87: game finished, reward: -1.000000
ep 87: game finished, reward: -1.000000
ep 87: game finished, reward: -1.000000
ep 87: game finished, reward: -1.000000
ep 87: game finished, reward: -1.000000
ep 87: game finished, reward: -1.000000
ep 87: game finished, reward: -1.000000
ep 87: game finished, reward: -1.000000
ep 87: game finished, reward: -1.000000
ep 87: game finished, reward: -1.000000
ep 87: game finished, reward: -1.000000
ep 87: game finished, reward: -1.000000
episode over: 88
1278 1
resetting env. episode reward total was -21.000000. running mean: -20.944636
ep 88: game finished, reward: -1.000000
ep 88: game finished, reward: -1.000000
ep 88: game finishe

ep 95: game finished, reward: -1.000000
ep 95: game finished, reward: -1.000000
episode over: 96
1183 1
resetting env. episode reward total was -21.000000. running mean: -20.948913
ep 96: game finished, reward: -1.000000
ep 96: game finished, reward: -1.000000
ep 96: game finished, reward: -1.000000
ep 96: game finished, reward: -1.000000
ep 96: game finished, reward: -1.000000
ep 96: game finished, reward: -1.000000
ep 96: game finished, reward: -1.000000
ep 96: game finished, reward: -1.000000
ep 96: game finished, reward: -1.000000
ep 96: game finished, reward: -1.000000
ep 96: game finished, reward: -1.000000
ep 96: game finished, reward: -1.000000
ep 96: game finished, reward: -1.000000
ep 96: game finished, reward: -1.000000
ep 96: game finished, reward: -1.000000
ep 96: game finished, reward: -1.000000
ep 96: game finished, reward: -1.000000
ep 96: game finished, reward: -1.000000
ep 96: game finished, reward: -1.000000
ep 96: game finished, reward: -1.000000
ep 96: game finishe

ep 104: game finished, reward: -1.000000
ep 104: game finished, reward: -1.000000
ep 104: game finished, reward: -1.000000
ep 104: game finished, reward: -1.000000
ep 104: game finished, reward: -1.000000
ep 104: game finished, reward: -1.000000
ep 104: game finished, reward: -1.000000
ep 104: game finished, reward: 1.000000
ep 104: game finished, reward: -1.000000
ep 104: game finished, reward: -1.000000
ep 104: game finished, reward: -1.000000
ep 104: game finished, reward: -1.000000
ep 104: game finished, reward: -1.000000
episode over: 105
1295 1
resetting env. episode reward total was -20.000000. running mean: -20.933431
ep 105: game finished, reward: -1.000000
ep 105: game finished, reward: -1.000000
ep 105: game finished, reward: -1.000000
ep 105: game finished, reward: -1.000000
ep 105: game finished, reward: -1.000000
ep 105: game finished, reward: -1.000000
ep 105: game finished, reward: -1.000000
ep 105: game finished, reward: -1.000000
ep 105: game finished, reward: -1.0000

ep 112: game finished, reward: -1.000000
ep 112: game finished, reward: -1.000000
episode over: 113
1334 1
resetting env. episode reward total was -21.000000. running mean: -20.928871
ep 113: game finished, reward: -1.000000
ep 113: game finished, reward: -1.000000
ep 113: game finished, reward: -1.000000
ep 113: game finished, reward: -1.000000
ep 113: game finished, reward: -1.000000
ep 113: game finished, reward: -1.000000
ep 113: game finished, reward: -1.000000
ep 113: game finished, reward: -1.000000
ep 113: game finished, reward: -1.000000
ep 113: game finished, reward: -1.000000
ep 113: game finished, reward: -1.000000
ep 113: game finished, reward: -1.000000
ep 113: game finished, reward: -1.000000
ep 113: game finished, reward: -1.000000
ep 113: game finished, reward: -1.000000
ep 113: game finished, reward: -1.000000
ep 113: game finished, reward: -1.000000
ep 113: game finished, reward: -1.000000
ep 113: game finished, reward: -1.000000
ep 113: game finished, reward: -1.000

ep 121: game finished, reward: -1.000000
ep 121: game finished, reward: 1.000000
ep 121: game finished, reward: -1.000000
ep 121: game finished, reward: -1.000000
ep 121: game finished, reward: -1.000000
ep 121: game finished, reward: 1.000000
ep 121: game finished, reward: -1.000000
ep 121: game finished, reward: -1.000000
ep 121: game finished, reward: -1.000000
ep 121: game finished, reward: -1.000000
ep 121: game finished, reward: -1.000000
ep 121: game finished, reward: -1.000000
ep 121: game finished, reward: -1.000000
ep 121: game finished, reward: -1.000000
ep 121: game finished, reward: -1.000000
ep 121: game finished, reward: -1.000000
ep 121: game finished, reward: -1.000000
episode over: 122
1924 1
resetting env. episode reward total was -19.000000. running mean: -20.895905
ep 122: game finished, reward: -1.000000
ep 122: game finished, reward: -1.000000
ep 122: game finished, reward: -1.000000
ep 122: game finished, reward: -1.000000
ep 122: game finished, reward: -1.00000

ep 129: game finished, reward: -1.000000
ep 129: game finished, reward: -1.000000
ep 129: game finished, reward: -1.000000
ep 129: game finished, reward: -1.000000
ep 129: game finished, reward: -1.000000
ep 129: game finished, reward: 1.000000
ep 129: game finished, reward: -1.000000
ep 129: game finished, reward: -1.000000
ep 129: game finished, reward: -1.000000
episode over: 130
1314 1
resetting env. episode reward total was -20.000000. running mean: -20.855324
ep 130: game finished, reward: -1.000000
ep 130: game finished, reward: -1.000000
ep 130: game finished, reward: -1.000000
ep 130: game finished, reward: -1.000000
ep 130: game finished, reward: -1.000000
ep 130: game finished, reward: -1.000000
ep 130: game finished, reward: -1.000000
ep 130: game finished, reward: -1.000000
ep 130: game finished, reward: -1.000000
ep 130: game finished, reward: -1.000000
ep 130: game finished, reward: -1.000000
ep 130: game finished, reward: -1.000000
ep 130: game finished, reward: -1.0000

resetting env. episode reward total was -21.000000. running mean: -20.836900
ep 138: game finished, reward: -1.000000
ep 138: game finished, reward: -1.000000
ep 138: game finished, reward: -1.000000
ep 138: game finished, reward: -1.000000
ep 138: game finished, reward: -1.000000
ep 138: game finished, reward: -1.000000
ep 138: game finished, reward: -1.000000
ep 138: game finished, reward: -1.000000
ep 138: game finished, reward: -1.000000
ep 138: game finished, reward: -1.000000
ep 138: game finished, reward: -1.000000
ep 138: game finished, reward: -1.000000
ep 138: game finished, reward: -1.000000
ep 138: game finished, reward: -1.000000
ep 138: game finished, reward: -1.000000
ep 138: game finished, reward: -1.000000
ep 138: game finished, reward: -1.000000
ep 138: game finished, reward: -1.000000
ep 138: game finished, reward: -1.000000
ep 138: game finished, reward: -1.000000
ep 138: game finished, reward: -1.000000
episode over: 139
1424 1
resetting env. episode reward total w

ep 146: game finished, reward: -1.000000
ep 146: game finished, reward: -1.000000
ep 146: game finished, reward: -1.000000
ep 146: game finished, reward: -1.000000
ep 146: game finished, reward: -1.000000
ep 146: game finished, reward: -1.000000
ep 146: game finished, reward: -1.000000
ep 146: game finished, reward: -1.000000
ep 146: game finished, reward: -1.000000
ep 146: game finished, reward: -1.000000
ep 146: game finished, reward: -1.000000
ep 146: game finished, reward: -1.000000
ep 146: game finished, reward: -1.000000
ep 146: game finished, reward: -1.000000
episode over: 147
1385 1
resetting env. episode reward total was -20.000000. running mean: -20.812854
ep 147: game finished, reward: -1.000000
ep 147: game finished, reward: -1.000000
ep 147: game finished, reward: -1.000000
ep 147: game finished, reward: -1.000000
ep 147: game finished, reward: -1.000000
ep 147: game finished, reward: -1.000000
ep 147: game finished, reward: -1.000000
ep 147: game finished, reward: -1.000

ep 154: game finished, reward: -1.000000
ep 154: game finished, reward: -1.000000
ep 154: game finished, reward: -1.000000
ep 154: game finished, reward: -1.000000
episode over: 155
1430 1
resetting env. episode reward total was -21.000000. running mean: -20.808481
ep 155: game finished, reward: -1.000000
ep 155: game finished, reward: -1.000000
ep 155: game finished, reward: -1.000000
ep 155: game finished, reward: -1.000000
ep 155: game finished, reward: -1.000000
ep 155: game finished, reward: -1.000000
ep 155: game finished, reward: -1.000000
ep 155: game finished, reward: -1.000000
ep 155: game finished, reward: -1.000000
ep 155: game finished, reward: -1.000000
ep 155: game finished, reward: -1.000000
ep 155: game finished, reward: -1.000000
ep 155: game finished, reward: -1.000000
ep 155: game finished, reward: -1.000000
ep 155: game finished, reward: -1.000000
ep 155: game finished, reward: -1.000000
ep 155: game finished, reward: -1.000000
ep 155: game finished, reward: -1.000

ep 163: game finished, reward: -1.000000
ep 163: game finished, reward: -1.000000
ep 163: game finished, reward: -1.000000
ep 163: game finished, reward: -1.000000
ep 163: game finished, reward: -1.000000
ep 163: game finished, reward: -1.000000
ep 163: game finished, reward: -1.000000
ep 163: game finished, reward: -1.000000
ep 163: game finished, reward: -1.000000
ep 163: game finished, reward: -1.000000
ep 163: game finished, reward: -1.000000
ep 163: game finished, reward: -1.000000
ep 163: game finished, reward: -1.000000
ep 163: game finished, reward: -1.000000
ep 163: game finished, reward: -1.000000
episode over: 164
1179 1
resetting env. episode reward total was -21.000000. running mean: -20.825044
ep 164: game finished, reward: -1.000000
ep 164: game finished, reward: -1.000000
ep 164: game finished, reward: -1.000000
ep 164: game finished, reward: -1.000000
ep 164: game finished, reward: -1.000000
ep 164: game finished, reward: -1.000000
ep 164: game finished, reward: -1.000

ep 171: game finished, reward: -1.000000
ep 171: game finished, reward: -1.000000
ep 171: game finished, reward: -1.000000
ep 171: game finished, reward: -1.000000
episode over: 172
1340 1
resetting env. episode reward total was -21.000000. running mean: -20.828661
ep 172: game finished, reward: -1.000000
ep 172: game finished, reward: -1.000000
ep 172: game finished, reward: -1.000000
ep 172: game finished, reward: -1.000000
ep 172: game finished, reward: -1.000000
ep 172: game finished, reward: -1.000000
ep 172: game finished, reward: -1.000000
ep 172: game finished, reward: -1.000000
ep 172: game finished, reward: -1.000000
ep 172: game finished, reward: -1.000000
ep 172: game finished, reward: -1.000000
ep 172: game finished, reward: -1.000000
ep 172: game finished, reward: -1.000000
ep 172: game finished, reward: -1.000000
ep 172: game finished, reward: -1.000000
ep 172: game finished, reward: -1.000000
ep 172: game finished, reward: 1.000000
ep 172: game finished, reward: -1.0000

ep 180: game finished, reward: -1.000000
ep 180: game finished, reward: -1.000000
ep 180: game finished, reward: -1.000000
ep 180: game finished, reward: -1.000000
ep 180: game finished, reward: -1.000000
ep 180: game finished, reward: -1.000000
ep 180: game finished, reward: -1.000000
ep 180: game finished, reward: -1.000000
ep 180: game finished, reward: -1.000000
ep 180: game finished, reward: -1.000000
ep 180: game finished, reward: -1.000000
ep 180: game finished, reward: -1.000000
ep 180: game finished, reward: -1.000000
ep 180: game finished, reward: -1.000000
ep 180: game finished, reward: -1.000000
ep 180: game finished, reward: -1.000000
ep 180: game finished, reward: -1.000000
ep 180: game finished, reward: -1.000000
ep 180: game finished, reward: -1.000000
ep 180: game finished, reward: -1.000000
episode over: 181
1162 1
resetting env. episode reward total was -21.000000. running mean: -20.786792
ep 181: game finished, reward: -1.000000
ep 181: game finished, reward: 1.0000

ep 188: game finished, reward: -1.000000
ep 188: game finished, reward: -1.000000
ep 188: game finished, reward: -1.000000
ep 188: game finished, reward: -1.000000
ep 188: game finished, reward: -1.000000
ep 188: game finished, reward: -1.000000
ep 188: game finished, reward: -1.000000
ep 188: game finished, reward: -1.000000
ep 188: game finished, reward: -1.000000
ep 188: game finished, reward: -1.000000
episode over: 189
1752 1
resetting env. episode reward total was -21.000000. running mean: -20.784622
ep 189: game finished, reward: -1.000000
ep 189: game finished, reward: 1.000000
ep 189: game finished, reward: -1.000000
ep 189: game finished, reward: -1.000000
ep 189: game finished, reward: -1.000000
ep 189: game finished, reward: -1.000000
ep 189: game finished, reward: -1.000000
ep 189: game finished, reward: -1.000000
ep 189: game finished, reward: -1.000000
ep 189: game finished, reward: -1.000000
ep 189: game finished, reward: -1.000000
ep 189: game finished, reward: -1.0000

ep 196: game finished, reward: -1.000000
ep 196: game finished, reward: -1.000000
episode over: 197
1505 1
resetting env. episode reward total was -21.000000. running mean: -20.753319
ep 197: game finished, reward: -1.000000
ep 197: game finished, reward: -1.000000
ep 197: game finished, reward: -1.000000
ep 197: game finished, reward: -1.000000
ep 197: game finished, reward: -1.000000
ep 197: game finished, reward: -1.000000
ep 197: game finished, reward: -1.000000
ep 197: game finished, reward: -1.000000
ep 197: game finished, reward: -1.000000
ep 197: game finished, reward: -1.000000
ep 197: game finished, reward: -1.000000
ep 197: game finished, reward: -1.000000
ep 197: game finished, reward: -1.000000
ep 197: game finished, reward: -1.000000
ep 197: game finished, reward: -1.000000
ep 197: game finished, reward: -1.000000
ep 197: game finished, reward: -1.000000
ep 197: game finished, reward: -1.000000
ep 197: game finished, reward: 1.000000
ep 197: game finished, reward: -1.0000

ep 205: game finished, reward: -1.000000
ep 205: game finished, reward: -1.000000
ep 205: game finished, reward: -1.000000
ep 205: game finished, reward: -1.000000
ep 205: game finished, reward: -1.000000
ep 205: game finished, reward: -1.000000
ep 205: game finished, reward: -1.000000
ep 205: game finished, reward: -1.000000
ep 205: game finished, reward: -1.000000
ep 205: game finished, reward: -1.000000
ep 205: game finished, reward: -1.000000
ep 205: game finished, reward: -1.000000
ep 205: game finished, reward: -1.000000
ep 205: game finished, reward: -1.000000
ep 205: game finished, reward: -1.000000
ep 205: game finished, reward: -1.000000
episode over: 206
1495 1
resetting env. episode reward total was -21.000000. running mean: -20.746594
ep 206: game finished, reward: -1.000000
ep 206: game finished, reward: -1.000000
ep 206: game finished, reward: -1.000000
ep 206: game finished, reward: -1.000000
ep 206: game finished, reward: -1.000000
ep 206: game finished, reward: -1.000

ep 213: game finished, reward: -1.000000
ep 213: game finished, reward: -1.000000
ep 213: game finished, reward: -1.000000
ep 213: game finished, reward: -1.000000
ep 213: game finished, reward: -1.000000
ep 213: game finished, reward: -1.000000
episode over: 214
1472 1
resetting env. episode reward total was -20.000000. running mean: -20.746468
ep 214: game finished, reward: -1.000000
ep 214: game finished, reward: -1.000000
ep 214: game finished, reward: -1.000000
ep 214: game finished, reward: -1.000000
ep 214: game finished, reward: -1.000000
ep 214: game finished, reward: -1.000000
ep 214: game finished, reward: -1.000000
ep 214: game finished, reward: -1.000000
ep 214: game finished, reward: -1.000000
ep 214: game finished, reward: -1.000000
ep 214: game finished, reward: -1.000000
ep 214: game finished, reward: -1.000000
ep 214: game finished, reward: -1.000000
ep 214: game finished, reward: -1.000000
ep 214: game finished, reward: -1.000000
ep 214: game finished, reward: -1.000

ep 222: game finished, reward: -1.000000
ep 222: game finished, reward: -1.000000
ep 222: game finished, reward: -1.000000
ep 222: game finished, reward: -1.000000
ep 222: game finished, reward: 1.000000
ep 222: game finished, reward: -1.000000
ep 222: game finished, reward: -1.000000
ep 222: game finished, reward: -1.000000
ep 222: game finished, reward: -1.000000
ep 222: game finished, reward: -1.000000
ep 222: game finished, reward: -1.000000
ep 222: game finished, reward: 1.000000
ep 222: game finished, reward: -1.000000
ep 222: game finished, reward: -1.000000
ep 222: game finished, reward: -1.000000
ep 222: game finished, reward: -1.000000
ep 222: game finished, reward: -1.000000
ep 222: game finished, reward: -1.000000
ep 222: game finished, reward: -1.000000
ep 222: game finished, reward: -1.000000
ep 222: game finished, reward: -1.000000
ep 222: game finished, reward: -1.000000
episode over: 223
1825 1
resetting env. episode reward total was -19.000000. running mean: -20.70045

ep 230: game finished, reward: -1.000000
ep 230: game finished, reward: -1.000000
ep 230: game finished, reward: -1.000000
ep 230: game finished, reward: -1.000000
ep 230: game finished, reward: -1.000000
ep 230: game finished, reward: -1.000000
ep 230: game finished, reward: -1.000000
ep 230: game finished, reward: -1.000000
ep 230: game finished, reward: -1.000000
ep 230: game finished, reward: -1.000000
episode over: 231
1267 1
resetting env. episode reward total was -21.000000. running mean: -20.704374
ep 231: game finished, reward: -1.000000
ep 231: game finished, reward: -1.000000
ep 231: game finished, reward: -1.000000
ep 231: game finished, reward: -1.000000
ep 231: game finished, reward: -1.000000
ep 231: game finished, reward: -1.000000
ep 231: game finished, reward: -1.000000
ep 231: game finished, reward: -1.000000
ep 231: game finished, reward: -1.000000
ep 231: game finished, reward: -1.000000
ep 231: game finished, reward: -1.000000
ep 231: game finished, reward: -1.000

ep 238: game finished, reward: -1.000000
episode over: 239
1098 1
resetting env. episode reward total was -21.000000. running mean: -20.688969
ep 239: game finished, reward: -1.000000
ep 239: game finished, reward: 1.000000
ep 239: game finished, reward: -1.000000
ep 239: game finished, reward: -1.000000
ep 239: game finished, reward: -1.000000
ep 239: game finished, reward: -1.000000
ep 239: game finished, reward: -1.000000
ep 239: game finished, reward: -1.000000
ep 239: game finished, reward: -1.000000
ep 239: game finished, reward: -1.000000
ep 239: game finished, reward: -1.000000
ep 239: game finished, reward: -1.000000
ep 239: game finished, reward: -1.000000
ep 239: game finished, reward: -1.000000
ep 239: game finished, reward: -1.000000
ep 239: game finished, reward: -1.000000
ep 239: game finished, reward: -1.000000
ep 239: game finished, reward: -1.000000
ep 239: game finished, reward: -1.000000
ep 239: game finished, reward: -1.000000
ep 239: game finished, reward: -1.0000

ep 247: game finished, reward: 1.000000
ep 247: game finished, reward: -1.000000
ep 247: game finished, reward: -1.000000
ep 247: game finished, reward: -1.000000
ep 247: game finished, reward: -1.000000
ep 247: game finished, reward: -1.000000
ep 247: game finished, reward: -1.000000
ep 247: game finished, reward: -1.000000
ep 247: game finished, reward: -1.000000
ep 247: game finished, reward: -1.000000
ep 247: game finished, reward: -1.000000
ep 247: game finished, reward: -1.000000
ep 247: game finished, reward: -1.000000
ep 247: game finished, reward: -1.000000
ep 247: game finished, reward: -1.000000
ep 247: game finished, reward: -1.000000
ep 247: game finished, reward: -1.000000
ep 247: game finished, reward: -1.000000
ep 247: game finished, reward: -1.000000
episode over: 248
1709 1
resetting env. episode reward total was -20.000000. running mean: -20.647531
ep 248: game finished, reward: -1.000000
ep 248: game finished, reward: -1.000000
ep 248: game finished, reward: -1.0000

ep 255: game finished, reward: -1.000000
ep 255: game finished, reward: -1.000000
ep 255: game finished, reward: -1.000000
ep 255: game finished, reward: -1.000000
ep 255: game finished, reward: -1.000000
ep 255: game finished, reward: -1.000000
ep 255: game finished, reward: -1.000000
ep 255: game finished, reward: -1.000000
ep 255: game finished, reward: -1.000000
ep 255: game finished, reward: -1.000000
ep 255: game finished, reward: -1.000000
ep 255: game finished, reward: -1.000000
episode over: 256
1416 1
resetting env. episode reward total was -21.000000. running mean: -20.617883
ep 256: game finished, reward: -1.000000
ep 256: game finished, reward: -1.000000
ep 256: game finished, reward: -1.000000
ep 256: game finished, reward: -1.000000
ep 256: game finished, reward: -1.000000
ep 256: game finished, reward: -1.000000
ep 256: game finished, reward: -1.000000
ep 256: game finished, reward: -1.000000
ep 256: game finished, reward: -1.000000
ep 256: game finished, reward: -1.000

ep 263: game finished, reward: -1.000000
ep 263: game finished, reward: -1.000000
ep 263: game finished, reward: -1.000000
ep 263: game finished, reward: -1.000000
episode over: 264
1645 1
resetting env. episode reward total was -20.000000. running mean: -20.599838
ep 264: game finished, reward: -1.000000
ep 264: game finished, reward: -1.000000
ep 264: game finished, reward: -1.000000
ep 264: game finished, reward: -1.000000
ep 264: game finished, reward: -1.000000
ep 264: game finished, reward: -1.000000
ep 264: game finished, reward: -1.000000
ep 264: game finished, reward: -1.000000
ep 264: game finished, reward: -1.000000
ep 264: game finished, reward: -1.000000
ep 264: game finished, reward: -1.000000
ep 264: game finished, reward: -1.000000
ep 264: game finished, reward: -1.000000
ep 264: game finished, reward: -1.000000
ep 264: game finished, reward: -1.000000
ep 264: game finished, reward: -1.000000
ep 264: game finished, reward: 1.000000
ep 264: game finished, reward: -1.0000

ep 272: game finished, reward: -1.000000
ep 272: game finished, reward: -1.000000
ep 272: game finished, reward: -1.000000
ep 272: game finished, reward: -1.000000
ep 272: game finished, reward: -1.000000
ep 272: game finished, reward: -1.000000
ep 272: game finished, reward: -1.000000
ep 272: game finished, reward: -1.000000
ep 272: game finished, reward: -1.000000
ep 272: game finished, reward: -1.000000
ep 272: game finished, reward: -1.000000
ep 272: game finished, reward: -1.000000
ep 272: game finished, reward: -1.000000
ep 272: game finished, reward: -1.000000
ep 272: game finished, reward: -1.000000
ep 272: game finished, reward: -1.000000
ep 272: game finished, reward: -1.000000
ep 272: game finished, reward: -1.000000
ep 272: game finished, reward: -1.000000
episode over: 273
1102 1
resetting env. episode reward total was -21.000000. running mean: -20.596199
ep 273: game finished, reward: -1.000000
ep 273: game finished, reward: -1.000000
ep 273: game finished, reward: -1.000

ep 280: game finished, reward: -1.000000
ep 280: game finished, reward: -1.000000
ep 280: game finished, reward: -1.000000
ep 280: game finished, reward: -1.000000
ep 280: game finished, reward: -1.000000
ep 280: game finished, reward: -1.000000
ep 280: game finished, reward: -1.000000
ep 280: game finished, reward: -1.000000
ep 280: game finished, reward: -1.000000
episode over: 281
1351 1
resetting env. episode reward total was -21.000000. running mean: -20.607988
ep 281: game finished, reward: -1.000000
ep 281: game finished, reward: -1.000000
ep 281: game finished, reward: -1.000000
ep 281: game finished, reward: -1.000000
ep 281: game finished, reward: -1.000000
ep 281: game finished, reward: -1.000000
ep 281: game finished, reward: -1.000000
ep 281: game finished, reward: -1.000000
ep 281: game finished, reward: -1.000000
ep 281: game finished, reward: -1.000000
ep 281: game finished, reward: -1.000000
ep 281: game finished, reward: -1.000000
ep 281: game finished, reward: -1.000

ep 288: game finished, reward: -1.000000
ep 288: game finished, reward: -1.000000
episode over: 289
1346 1
resetting env. episode reward total was -21.000000. running mean: -20.580628
ep 289: game finished, reward: -1.000000
ep 289: game finished, reward: -1.000000
ep 289: game finished, reward: -1.000000
ep 289: game finished, reward: -1.000000
ep 289: game finished, reward: -1.000000
ep 289: game finished, reward: -1.000000
ep 289: game finished, reward: -1.000000
ep 289: game finished, reward: -1.000000
ep 289: game finished, reward: -1.000000
ep 289: game finished, reward: -1.000000
ep 289: game finished, reward: -1.000000
ep 289: game finished, reward: -1.000000
ep 289: game finished, reward: -1.000000
ep 289: game finished, reward: -1.000000
ep 289: game finished, reward: -1.000000
ep 289: game finished, reward: -1.000000
ep 289: game finished, reward: -1.000000
ep 289: game finished, reward: -1.000000
ep 289: game finished, reward: -1.000000
ep 289: game finished, reward: -1.000

ep 297: game finished, reward: -1.000000
ep 297: game finished, reward: -1.000000
ep 297: game finished, reward: -1.000000
ep 297: game finished, reward: -1.000000
ep 297: game finished, reward: -1.000000
ep 297: game finished, reward: -1.000000
ep 297: game finished, reward: -1.000000
ep 297: game finished, reward: -1.000000
ep 297: game finished, reward: -1.000000
ep 297: game finished, reward: -1.000000
ep 297: game finished, reward: -1.000000
ep 297: game finished, reward: -1.000000
ep 297: game finished, reward: -1.000000
ep 297: game finished, reward: -1.000000
ep 297: game finished, reward: -1.000000
ep 297: game finished, reward: -1.000000
episode over: 298
1264 1
resetting env. episode reward total was -21.000000. running mean: -20.588555
ep 298: game finished, reward: -1.000000
ep 298: game finished, reward: -1.000000
ep 298: game finished, reward: -1.000000
ep 298: game finished, reward: -1.000000
ep 298: game finished, reward: -1.000000
ep 298: game finished, reward: -1.000

ep 305: game finished, reward: -1.000000
ep 305: game finished, reward: -1.000000
ep 305: game finished, reward: -1.000000
ep 305: game finished, reward: -1.000000
ep 305: game finished, reward: -1.000000
ep 305: game finished, reward: -1.000000
ep 305: game finished, reward: -1.000000
episode over: 306
1242 1
resetting env. episode reward total was -21.000000. running mean: -20.601125
ep 306: game finished, reward: -1.000000
ep 306: game finished, reward: -1.000000
ep 306: game finished, reward: -1.000000
ep 306: game finished, reward: -1.000000
ep 306: game finished, reward: -1.000000
ep 306: game finished, reward: -1.000000
ep 306: game finished, reward: -1.000000
ep 306: game finished, reward: -1.000000
ep 306: game finished, reward: -1.000000
ep 306: game finished, reward: -1.000000
ep 306: game finished, reward: -1.000000
ep 306: game finished, reward: -1.000000
ep 306: game finished, reward: -1.000000
ep 306: game finished, reward: -1.000000
ep 306: game finished, reward: -1.000

ep 314: game finished, reward: -1.000000
ep 314: game finished, reward: -1.000000
ep 314: game finished, reward: -1.000000
ep 314: game finished, reward: -1.000000
ep 314: game finished, reward: -1.000000
ep 314: game finished, reward: -1.000000
ep 314: game finished, reward: -1.000000
ep 314: game finished, reward: -1.000000
ep 314: game finished, reward: -1.000000
ep 314: game finished, reward: -1.000000
ep 314: game finished, reward: -1.000000
ep 314: game finished, reward: -1.000000
ep 314: game finished, reward: -1.000000
ep 314: game finished, reward: -1.000000
ep 314: game finished, reward: -1.000000
ep 314: game finished, reward: -1.000000
ep 314: game finished, reward: -1.000000
ep 314: game finished, reward: -1.000000
episode over: 315
1185 1
resetting env. episode reward total was -21.000000. running mean: -20.635621
ep 315: game finished, reward: -1.000000
ep 315: game finished, reward: -1.000000
ep 315: game finished, reward: -1.000000
ep 315: game finished, reward: -1.000

ep 322: game finished, reward: -1.000000
ep 322: game finished, reward: -1.000000
ep 322: game finished, reward: -1.000000
ep 322: game finished, reward: -1.000000
ep 322: game finished, reward: -1.000000
ep 322: game finished, reward: -1.000000
ep 322: game finished, reward: -1.000000
ep 322: game finished, reward: -1.000000
ep 322: game finished, reward: -1.000000
episode over: 323
1244 1
resetting env. episode reward total was -21.000000. running mean: -20.644457
ep 323: game finished, reward: -1.000000
ep 323: game finished, reward: -1.000000
ep 323: game finished, reward: -1.000000
ep 323: game finished, reward: -1.000000
ep 323: game finished, reward: -1.000000
ep 323: game finished, reward: -1.000000
ep 323: game finished, reward: -1.000000
ep 323: game finished, reward: -1.000000
ep 323: game finished, reward: -1.000000
ep 323: game finished, reward: -1.000000
ep 323: game finished, reward: -1.000000
ep 323: game finished, reward: -1.000000
ep 323: game finished, reward: -1.000

episode over: 331
1573 1
resetting env. episode reward total was -20.000000. running mean: -20.633680
ep 331: game finished, reward: -1.000000
ep 331: game finished, reward: -1.000000
ep 331: game finished, reward: -1.000000
ep 331: game finished, reward: -1.000000
ep 331: game finished, reward: -1.000000
ep 331: game finished, reward: -1.000000
ep 331: game finished, reward: -1.000000
ep 331: game finished, reward: -1.000000
ep 331: game finished, reward: -1.000000
ep 331: game finished, reward: -1.000000
ep 331: game finished, reward: -1.000000
ep 331: game finished, reward: -1.000000
ep 331: game finished, reward: -1.000000
ep 331: game finished, reward: -1.000000
ep 331: game finished, reward: -1.000000
ep 331: game finished, reward: -1.000000
ep 331: game finished, reward: -1.000000
ep 331: game finished, reward: -1.000000
ep 331: game finished, reward: -1.000000
ep 331: game finished, reward: -1.000000
ep 331: game finished, reward: -1.000000
episode over: 332
1095 1
resetting en

ep 339: game finished, reward: -1.000000
ep 339: game finished, reward: -1.000000
ep 339: game finished, reward: -1.000000
ep 339: game finished, reward: -1.000000
ep 339: game finished, reward: -1.000000
ep 339: game finished, reward: -1.000000
ep 339: game finished, reward: -1.000000
ep 339: game finished, reward: -1.000000
ep 339: game finished, reward: -1.000000
ep 339: game finished, reward: -1.000000
ep 339: game finished, reward: -1.000000
ep 339: game finished, reward: -1.000000
ep 339: game finished, reward: -1.000000
ep 339: game finished, reward: -1.000000
episode over: 340
1164 1
resetting env. episode reward total was -21.000000. running mean: -20.645954
ep 340: game finished, reward: -1.000000
ep 340: game finished, reward: -1.000000
ep 340: game finished, reward: -1.000000
ep 340: game finished, reward: -1.000000
ep 340: game finished, reward: -1.000000
ep 340: game finished, reward: -1.000000
ep 340: game finished, reward: -1.000000
ep 340: game finished, reward: -1.000

ep 347: game finished, reward: -1.000000
ep 347: game finished, reward: -1.000000
ep 347: game finished, reward: -1.000000
ep 347: game finished, reward: -1.000000
ep 347: game finished, reward: -1.000000
ep 347: game finished, reward: -1.000000
ep 347: game finished, reward: -1.000000
ep 347: game finished, reward: -1.000000
ep 347: game finished, reward: -1.000000
episode over: 348
2154 1
resetting env. episode reward total was -21.000000. running mean: -20.596549
ep 348: game finished, reward: -1.000000
ep 348: game finished, reward: -1.000000
ep 348: game finished, reward: -1.000000
ep 348: game finished, reward: -1.000000
ep 348: game finished, reward: -1.000000
ep 348: game finished, reward: -1.000000
ep 348: game finished, reward: -1.000000
ep 348: game finished, reward: -1.000000
ep 348: game finished, reward: -1.000000
ep 348: game finished, reward: 1.000000
ep 348: game finished, reward: -1.000000
ep 348: game finished, reward: -1.000000
ep 348: game finished, reward: 1.00000

episode over: 356
1343 1
resetting env. episode reward total was -21.000000. running mean: -20.599373
ep 356: game finished, reward: -1.000000
ep 356: game finished, reward: -1.000000
ep 356: game finished, reward: -1.000000
ep 356: game finished, reward: -1.000000
ep 356: game finished, reward: -1.000000
ep 356: game finished, reward: -1.000000
ep 356: game finished, reward: -1.000000
ep 356: game finished, reward: 1.000000
ep 356: game finished, reward: -1.000000
ep 356: game finished, reward: -1.000000
ep 356: game finished, reward: -1.000000
ep 356: game finished, reward: -1.000000
ep 356: game finished, reward: -1.000000
ep 356: game finished, reward: -1.000000
ep 356: game finished, reward: -1.000000
ep 356: game finished, reward: -1.000000
ep 356: game finished, reward: -1.000000
ep 356: game finished, reward: -1.000000
ep 356: game finished, reward: -1.000000
ep 356: game finished, reward: -1.000000
ep 356: game finished, reward: -1.000000
ep 356: game finished, reward: -1.0000

ep 364: game finished, reward: -1.000000
ep 364: game finished, reward: -1.000000
ep 364: game finished, reward: 1.000000
ep 364: game finished, reward: -1.000000
ep 364: game finished, reward: -1.000000
ep 364: game finished, reward: -1.000000
ep 364: game finished, reward: -1.000000
ep 364: game finished, reward: -1.000000
ep 364: game finished, reward: -1.000000
ep 364: game finished, reward: -1.000000
ep 364: game finished, reward: -1.000000
ep 364: game finished, reward: -1.000000
ep 364: game finished, reward: -1.000000
ep 364: game finished, reward: -1.000000
ep 364: game finished, reward: -1.000000
ep 364: game finished, reward: -1.000000
ep 364: game finished, reward: -1.000000
ep 364: game finished, reward: -1.000000
episode over: 365
1264 1
resetting env. episode reward total was -19.000000. running mean: -20.566648
ep 365: game finished, reward: -1.000000
ep 365: game finished, reward: -1.000000
ep 365: game finished, reward: -1.000000
ep 365: game finished, reward: -1.0000

ep 372: game finished, reward: -1.000000
ep 372: game finished, reward: -1.000000
ep 372: game finished, reward: -1.000000
ep 372: game finished, reward: -1.000000
ep 372: game finished, reward: -1.000000
ep 372: game finished, reward: -1.000000
ep 372: game finished, reward: -1.000000
ep 372: game finished, reward: -1.000000
ep 372: game finished, reward: -1.000000
ep 372: game finished, reward: -1.000000
ep 372: game finished, reward: -1.000000
episode over: 373
1255 1
resetting env. episode reward total was -21.000000. running mean: -20.541699
ep 373: game finished, reward: -1.000000
ep 373: game finished, reward: -1.000000
ep 373: game finished, reward: -1.000000
ep 373: game finished, reward: -1.000000
ep 373: game finished, reward: -1.000000
ep 373: game finished, reward: -1.000000
ep 373: game finished, reward: -1.000000
ep 373: game finished, reward: -1.000000
ep 373: game finished, reward: -1.000000
ep 373: game finished, reward: -1.000000
ep 373: game finished, reward: -1.000

ep 380: game finished, reward: -1.000000
ep 380: game finished, reward: -1.000000
ep 380: game finished, reward: -1.000000
ep 380: game finished, reward: 1.000000
ep 380: game finished, reward: -1.000000
ep 380: game finished, reward: -1.000000
ep 380: game finished, reward: -1.000000
ep 380: game finished, reward: 1.000000
ep 380: game finished, reward: -1.000000
ep 380: game finished, reward: -1.000000
ep 380: game finished, reward: -1.000000
episode over: 381
1639 1
resetting env. episode reward total was -18.000000. running mean: -20.451515
ep 381: game finished, reward: -1.000000
ep 381: game finished, reward: 1.000000
ep 381: game finished, reward: -1.000000
ep 381: game finished, reward: -1.000000
ep 381: game finished, reward: -1.000000
ep 381: game finished, reward: -1.000000
ep 381: game finished, reward: -1.000000
ep 381: game finished, reward: -1.000000
ep 381: game finished, reward: -1.000000
ep 381: game finished, reward: -1.000000
ep 381: game finished, reward: 1.000000


ep 388: game finished, reward: -1.000000
ep 388: game finished, reward: -1.000000
ep 388: game finished, reward: -1.000000
ep 388: game finished, reward: -1.000000
ep 388: game finished, reward: -1.000000
ep 388: game finished, reward: -1.000000
episode over: 389
1756 1
resetting env. episode reward total was -21.000000. running mean: -20.417403
ep 389: game finished, reward: -1.000000
ep 389: game finished, reward: -1.000000
ep 389: game finished, reward: -1.000000
ep 389: game finished, reward: -1.000000
ep 389: game finished, reward: -1.000000
ep 389: game finished, reward: -1.000000
ep 389: game finished, reward: -1.000000
ep 389: game finished, reward: -1.000000
ep 389: game finished, reward: -1.000000
ep 389: game finished, reward: -1.000000
ep 389: game finished, reward: -1.000000
ep 389: game finished, reward: -1.000000
ep 389: game finished, reward: -1.000000
ep 389: game finished, reward: -1.000000
ep 389: game finished, reward: -1.000000
ep 389: game finished, reward: -1.000

ep 397: game finished, reward: -1.000000
ep 397: game finished, reward: -1.000000
ep 397: game finished, reward: -1.000000
ep 397: game finished, reward: -1.000000
ep 397: game finished, reward: -1.000000
ep 397: game finished, reward: -1.000000
ep 397: game finished, reward: -1.000000
ep 397: game finished, reward: -1.000000
ep 397: game finished, reward: -1.000000
ep 397: game finished, reward: -1.000000
ep 397: game finished, reward: -1.000000
ep 397: game finished, reward: -1.000000
ep 397: game finished, reward: -1.000000
ep 397: game finished, reward: -1.000000
ep 397: game finished, reward: -1.000000
ep 397: game finished, reward: -1.000000
ep 397: game finished, reward: -1.000000
episode over: 398
1647 1
resetting env. episode reward total was -21.000000. running mean: -20.458277
ep 398: game finished, reward: -1.000000
ep 398: game finished, reward: -1.000000
ep 398: game finished, reward: -1.000000
ep 398: game finished, reward: -1.000000
ep 398: game finished, reward: -1.000

ep 405: game finished, reward: -1.000000
ep 405: game finished, reward: -1.000000
ep 405: game finished, reward: -1.000000
ep 405: game finished, reward: -1.000000
ep 405: game finished, reward: -1.000000
episode over: 406
1192 1
resetting env. episode reward total was -21.000000. running mean: -20.500128
ep 406: game finished, reward: -1.000000
ep 406: game finished, reward: -1.000000
ep 406: game finished, reward: -1.000000
ep 406: game finished, reward: -1.000000
ep 406: game finished, reward: -1.000000
ep 406: game finished, reward: -1.000000
ep 406: game finished, reward: -1.000000
ep 406: game finished, reward: -1.000000
ep 406: game finished, reward: -1.000000
ep 406: game finished, reward: -1.000000
ep 406: game finished, reward: -1.000000
ep 406: game finished, reward: 1.000000
ep 406: game finished, reward: -1.000000
ep 406: game finished, reward: -1.000000
ep 406: game finished, reward: -1.000000
ep 406: game finished, reward: -1.000000
ep 406: game finished, reward: -1.0000

ep 414: game finished, reward: -1.000000
ep 414: game finished, reward: -1.000000
ep 414: game finished, reward: -1.000000
ep 414: game finished, reward: -1.000000
ep 414: game finished, reward: -1.000000
ep 414: game finished, reward: -1.000000
ep 414: game finished, reward: -1.000000
ep 414: game finished, reward: -1.000000
ep 414: game finished, reward: -1.000000
ep 414: game finished, reward: -1.000000
ep 414: game finished, reward: -1.000000
ep 414: game finished, reward: -1.000000
ep 414: game finished, reward: -1.000000
ep 414: game finished, reward: -1.000000
ep 414: game finished, reward: 1.000000
ep 414: game finished, reward: -1.000000
ep 414: game finished, reward: -1.000000
ep 414: game finished, reward: -1.000000
ep 414: game finished, reward: -1.000000
ep 414: game finished, reward: -1.000000
ep 414: game finished, reward: -1.000000
episode over: 415
1215 1
resetting env. episode reward total was -20.000000. running mean: -20.495790
ep 415: game finished, reward: -1.0000

ep 422: game finished, reward: -1.000000
ep 422: game finished, reward: -1.000000
ep 422: game finished, reward: -1.000000
ep 422: game finished, reward: -1.000000
ep 422: game finished, reward: -1.000000
ep 422: game finished, reward: -1.000000
ep 422: game finished, reward: -1.000000
ep 422: game finished, reward: -1.000000
ep 422: game finished, reward: 1.000000
ep 422: game finished, reward: -1.000000
ep 422: game finished, reward: -1.000000
ep 422: game finished, reward: -1.000000
ep 422: game finished, reward: 1.000000
ep 422: game finished, reward: -1.000000
ep 422: game finished, reward: -1.000000
episode over: 423
2388 1
resetting env. episode reward total was -18.000000. running mean: -20.466018
ep 423: game finished, reward: -1.000000
ep 423: game finished, reward: -1.000000
ep 423: game finished, reward: -1.000000
ep 423: game finished, reward: -1.000000
ep 423: game finished, reward: -1.000000
ep 423: game finished, reward: -1.000000
ep 423: game finished, reward: -1.00000

ep 430: game finished, reward: -1.000000
ep 430: game finished, reward: -1.000000
ep 430: game finished, reward: -1.000000
ep 430: game finished, reward: -1.000000
ep 430: game finished, reward: -1.000000
ep 430: game finished, reward: -1.000000
ep 430: game finished, reward: -1.000000
ep 430: game finished, reward: -1.000000
ep 430: game finished, reward: -1.000000
episode over: 431
1345 1
resetting env. episode reward total was -21.000000. running mean: -20.439826
ep 431: game finished, reward: -1.000000
ep 431: game finished, reward: -1.000000
ep 431: game finished, reward: -1.000000
ep 431: game finished, reward: -1.000000
ep 431: game finished, reward: -1.000000
ep 431: game finished, reward: -1.000000
ep 431: game finished, reward: -1.000000
ep 431: game finished, reward: -1.000000
ep 431: game finished, reward: -1.000000
ep 431: game finished, reward: -1.000000
ep 431: game finished, reward: -1.000000
ep 431: game finished, reward: -1.000000
ep 431: game finished, reward: -1.000

ep 438: game finished, reward: -1.000000
episode over: 439
1585 1
resetting env. episode reward total was -21.000000. running mean: -20.435358
ep 439: game finished, reward: -1.000000
ep 439: game finished, reward: -1.000000
ep 439: game finished, reward: -1.000000
ep 439: game finished, reward: -1.000000
ep 439: game finished, reward: -1.000000
ep 439: game finished, reward: -1.000000
ep 439: game finished, reward: -1.000000
ep 439: game finished, reward: -1.000000
ep 439: game finished, reward: -1.000000
ep 439: game finished, reward: -1.000000
ep 439: game finished, reward: -1.000000
ep 439: game finished, reward: -1.000000
ep 439: game finished, reward: -1.000000
ep 439: game finished, reward: -1.000000
ep 439: game finished, reward: -1.000000
ep 439: game finished, reward: -1.000000
ep 439: game finished, reward: -1.000000
ep 439: game finished, reward: -1.000000
ep 439: game finished, reward: -1.000000
ep 439: game finished, reward: 1.000000
ep 439: game finished, reward: -1.0000

ep 447: game finished, reward: -1.000000
ep 447: game finished, reward: -1.000000
ep 447: game finished, reward: -1.000000
ep 447: game finished, reward: -1.000000
ep 447: game finished, reward: -1.000000
ep 447: game finished, reward: -1.000000
ep 447: game finished, reward: -1.000000
ep 447: game finished, reward: -1.000000
ep 447: game finished, reward: -1.000000
ep 447: game finished, reward: -1.000000
ep 447: game finished, reward: -1.000000
ep 447: game finished, reward: -1.000000
ep 447: game finished, reward: -1.000000
ep 447: game finished, reward: -1.000000
episode over: 448
2062 1
resetting env. episode reward total was -21.000000. running mean: -20.465357
ep 448: game finished, reward: -1.000000
ep 448: game finished, reward: -1.000000
ep 448: game finished, reward: -1.000000
ep 448: game finished, reward: -1.000000
ep 448: game finished, reward: -1.000000
ep 448: game finished, reward: -1.000000
ep 448: game finished, reward: -1.000000
ep 448: game finished, reward: -1.000

ep 455: game finished, reward: -1.000000
ep 455: game finished, reward: -1.000000
ep 455: game finished, reward: -1.000000
ep 455: game finished, reward: -1.000000
ep 455: game finished, reward: -1.000000
ep 455: game finished, reward: -1.000000
ep 455: game finished, reward: -1.000000
ep 455: game finished, reward: -1.000000
ep 455: game finished, reward: -1.000000
ep 455: game finished, reward: -1.000000
ep 455: game finished, reward: -1.000000
ep 455: game finished, reward: -1.000000
ep 455: game finished, reward: -1.000000
ep 455: game finished, reward: -1.000000
ep 455: game finished, reward: 1.000000
episode over: 456
1320 1
resetting env. episode reward total was -20.000000. running mean: -20.372630
ep 456: game finished, reward: -1.000000
ep 456: game finished, reward: -1.000000
ep 456: game finished, reward: -1.000000
ep 456: game finished, reward: -1.000000
ep 456: game finished, reward: -1.000000
ep 456: game finished, reward: -1.000000
ep 456: game finished, reward: -1.0000

ep 463: game finished, reward: -1.000000
ep 463: game finished, reward: -1.000000
ep 463: game finished, reward: -1.000000
ep 463: game finished, reward: -1.000000
ep 463: game finished, reward: -1.000000
ep 463: game finished, reward: -1.000000
ep 463: game finished, reward: -1.000000
ep 463: game finished, reward: -1.000000
episode over: 464
1342 1
resetting env. episode reward total was -21.000000. running mean: -20.353647
ep 464: game finished, reward: -1.000000
ep 464: game finished, reward: 1.000000
ep 464: game finished, reward: -1.000000
ep 464: game finished, reward: -1.000000
ep 464: game finished, reward: -1.000000
ep 464: game finished, reward: -1.000000
ep 464: game finished, reward: -1.000000
ep 464: game finished, reward: -1.000000
ep 464: game finished, reward: -1.000000
ep 464: game finished, reward: -1.000000
ep 464: game finished, reward: -1.000000
ep 464: game finished, reward: -1.000000
ep 464: game finished, reward: -1.000000
ep 464: game finished, reward: -1.0000

episode over: 472
1496 1
resetting env. episode reward total was -21.000000. running mean: -20.356122
ep 472: game finished, reward: -1.000000
ep 472: game finished, reward: -1.000000
ep 472: game finished, reward: -1.000000
ep 472: game finished, reward: -1.000000
ep 472: game finished, reward: -1.000000
ep 472: game finished, reward: -1.000000
ep 472: game finished, reward: -1.000000
ep 472: game finished, reward: -1.000000
ep 472: game finished, reward: -1.000000
ep 472: game finished, reward: -1.000000
ep 472: game finished, reward: -1.000000
ep 472: game finished, reward: -1.000000
ep 472: game finished, reward: -1.000000
ep 472: game finished, reward: -1.000000
ep 472: game finished, reward: -1.000000
ep 472: game finished, reward: -1.000000
ep 472: game finished, reward: -1.000000
ep 472: game finished, reward: -1.000000
ep 472: game finished, reward: -1.000000
ep 472: game finished, reward: -1.000000
ep 472: game finished, reward: -1.000000
episode over: 473
1494 1
resetting en

ep 480: game finished, reward: -1.000000
ep 480: game finished, reward: -1.000000
ep 480: game finished, reward: -1.000000
ep 480: game finished, reward: -1.000000
ep 480: game finished, reward: -1.000000
ep 480: game finished, reward: 1.000000
ep 480: game finished, reward: -1.000000
ep 480: game finished, reward: 1.000000
ep 480: game finished, reward: -1.000000
ep 480: game finished, reward: -1.000000
ep 480: game finished, reward: -1.000000
ep 480: game finished, reward: -1.000000
ep 480: game finished, reward: -1.000000
ep 480: game finished, reward: -1.000000
ep 480: game finished, reward: -1.000000
ep 480: game finished, reward: -1.000000
ep 480: game finished, reward: -1.000000
ep 480: game finished, reward: -1.000000
ep 480: game finished, reward: -1.000000
episode over: 481
1508 1
resetting env. episode reward total was -19.000000. running mean: -20.324261
ep 481: game finished, reward: -1.000000
ep 481: game finished, reward: -1.000000
ep 481: game finished, reward: -1.00000

ep 488: game finished, reward: -1.000000
ep 488: game finished, reward: -1.000000
ep 488: game finished, reward: -1.000000
ep 488: game finished, reward: -1.000000
ep 488: game finished, reward: -1.000000
ep 488: game finished, reward: -1.000000
ep 488: game finished, reward: -1.000000
ep 488: game finished, reward: 1.000000
ep 488: game finished, reward: -1.000000
ep 488: game finished, reward: -1.000000
ep 488: game finished, reward: -1.000000
ep 488: game finished, reward: -1.000000
episode over: 489
1308 1
resetting env. episode reward total was -20.000000. running mean: -20.328513
ep 489: game finished, reward: -1.000000
ep 489: game finished, reward: -1.000000
ep 489: game finished, reward: -1.000000
ep 489: game finished, reward: -1.000000
ep 489: game finished, reward: -1.000000
ep 489: game finished, reward: -1.000000
ep 489: game finished, reward: -1.000000
ep 489: game finished, reward: -1.000000
ep 489: game finished, reward: -1.000000
ep 489: game finished, reward: -1.0000

ep 496: game finished, reward: -1.000000
ep 496: game finished, reward: -1.000000
ep 496: game finished, reward: -1.000000
ep 496: game finished, reward: -1.000000
ep 496: game finished, reward: -1.000000
ep 496: game finished, reward: -1.000000
episode over: 497
1677 1
resetting env. episode reward total was -19.000000. running mean: -20.302257
ep 497: game finished, reward: -1.000000
ep 497: game finished, reward: -1.000000
ep 497: game finished, reward: -1.000000
ep 497: game finished, reward: -1.000000
ep 497: game finished, reward: 1.000000
ep 497: game finished, reward: -1.000000
ep 497: game finished, reward: -1.000000
ep 497: game finished, reward: -1.000000
ep 497: game finished, reward: -1.000000
ep 497: game finished, reward: -1.000000
ep 497: game finished, reward: -1.000000
ep 497: game finished, reward: -1.000000
ep 497: game finished, reward: -1.000000
ep 497: game finished, reward: -1.000000
ep 497: game finished, reward: -1.000000
ep 497: game finished, reward: -1.0000

episode over: 505
1597 1
resetting env. episode reward total was -21.000000. running mean: -20.288813
ep 505: game finished, reward: -1.000000
ep 505: game finished, reward: -1.000000
ep 505: game finished, reward: -1.000000
ep 505: game finished, reward: -1.000000
ep 505: game finished, reward: -1.000000
ep 505: game finished, reward: -1.000000
ep 505: game finished, reward: -1.000000
ep 505: game finished, reward: -1.000000
ep 505: game finished, reward: -1.000000
ep 505: game finished, reward: -1.000000
ep 505: game finished, reward: -1.000000
ep 505: game finished, reward: -1.000000
ep 505: game finished, reward: 1.000000
ep 505: game finished, reward: -1.000000
ep 505: game finished, reward: -1.000000
ep 505: game finished, reward: -1.000000
ep 505: game finished, reward: -1.000000
ep 505: game finished, reward: -1.000000
ep 505: game finished, reward: -1.000000
ep 505: game finished, reward: -1.000000
ep 505: game finished, reward: -1.000000
ep 505: game finished, reward: -1.0000

ep 513: game finished, reward: -1.000000
ep 513: game finished, reward: -1.000000
ep 513: game finished, reward: -1.000000
ep 513: game finished, reward: -1.000000
ep 513: game finished, reward: -1.000000
ep 513: game finished, reward: -1.000000
ep 513: game finished, reward: -1.000000
ep 513: game finished, reward: -1.000000
ep 513: game finished, reward: -1.000000
ep 513: game finished, reward: -1.000000
ep 513: game finished, reward: -1.000000
ep 513: game finished, reward: -1.000000
ep 513: game finished, reward: -1.000000
ep 513: game finished, reward: -1.000000
episode over: 514
1509 1
resetting env. episode reward total was -21.000000. running mean: -20.322356
ep 514: game finished, reward: -1.000000
ep 514: game finished, reward: -1.000000
ep 514: game finished, reward: -1.000000
ep 514: game finished, reward: -1.000000
ep 514: game finished, reward: -1.000000
ep 514: game finished, reward: -1.000000
ep 514: game finished, reward: -1.000000
ep 514: game finished, reward: -1.000

ep 521: game finished, reward: -1.000000
ep 521: game finished, reward: -1.000000
ep 521: game finished, reward: 1.000000
ep 521: game finished, reward: -1.000000
ep 521: game finished, reward: -1.000000
ep 521: game finished, reward: -1.000000
ep 521: game finished, reward: -1.000000
ep 521: game finished, reward: -1.000000
ep 521: game finished, reward: 1.000000
ep 521: game finished, reward: -1.000000
episode over: 522
1490 1
resetting env. episode reward total was -18.000000. running mean: -20.296859
ep 522: game finished, reward: -1.000000
ep 522: game finished, reward: -1.000000
ep 522: game finished, reward: -1.000000
ep 522: game finished, reward: -1.000000
ep 522: game finished, reward: -1.000000
ep 522: game finished, reward: -1.000000
ep 522: game finished, reward: -1.000000
ep 522: game finished, reward: -1.000000
ep 522: game finished, reward: -1.000000
ep 522: game finished, reward: -1.000000
ep 522: game finished, reward: -1.000000
ep 522: game finished, reward: -1.00000

ep 529: game finished, reward: -1.000000
ep 529: game finished, reward: -1.000000
ep 529: game finished, reward: -1.000000
episode over: 530
1423 1
resetting env. episode reward total was -21.000000. running mean: -20.292567
ep 530: game finished, reward: -1.000000
ep 530: game finished, reward: -1.000000
ep 530: game finished, reward: -1.000000
ep 530: game finished, reward: -1.000000
ep 530: game finished, reward: -1.000000
ep 530: game finished, reward: -1.000000
ep 530: game finished, reward: -1.000000
ep 530: game finished, reward: -1.000000
ep 530: game finished, reward: -1.000000
ep 530: game finished, reward: -1.000000
ep 530: game finished, reward: -1.000000
ep 530: game finished, reward: -1.000000
ep 530: game finished, reward: -1.000000
ep 530: game finished, reward: -1.000000
ep 530: game finished, reward: -1.000000
ep 530: game finished, reward: -1.000000
ep 530: game finished, reward: -1.000000
ep 530: game finished, reward: -1.000000
ep 530: game finished, reward: -1.000

ep 538: game finished, reward: -1.000000
ep 538: game finished, reward: -1.000000
ep 538: game finished, reward: -1.000000
ep 538: game finished, reward: -1.000000
ep 538: game finished, reward: -1.000000
ep 538: game finished, reward: -1.000000
ep 538: game finished, reward: -1.000000
ep 538: game finished, reward: -1.000000
ep 538: game finished, reward: -1.000000
ep 538: game finished, reward: -1.000000
ep 538: game finished, reward: 1.000000
ep 538: game finished, reward: -1.000000
ep 538: game finished, reward: -1.000000
ep 538: game finished, reward: -1.000000
ep 538: game finished, reward: -1.000000
ep 538: game finished, reward: -1.000000
ep 538: game finished, reward: -1.000000
ep 538: game finished, reward: -1.000000
episode over: 539
1603 1
resetting env. episode reward total was -19.000000. running mean: -20.305207
ep 539: game finished, reward: -1.000000
ep 539: game finished, reward: -1.000000
ep 539: game finished, reward: -1.000000
ep 539: game finished, reward: -1.0000

ep 546: game finished, reward: -1.000000
ep 546: game finished, reward: -1.000000
ep 546: game finished, reward: -1.000000
ep 546: game finished, reward: -1.000000
ep 546: game finished, reward: -1.000000
ep 546: game finished, reward: -1.000000
ep 546: game finished, reward: 1.000000
ep 546: game finished, reward: -1.000000
ep 546: game finished, reward: -1.000000
ep 546: game finished, reward: -1.000000
episode over: 547
1385 1
resetting env. episode reward total was -20.000000. running mean: -20.310064
ep 547: game finished, reward: -1.000000
ep 547: game finished, reward: -1.000000
ep 547: game finished, reward: -1.000000
ep 547: game finished, reward: -1.000000
ep 547: game finished, reward: -1.000000
ep 547: game finished, reward: -1.000000
ep 547: game finished, reward: -1.000000
ep 547: game finished, reward: -1.000000
ep 547: game finished, reward: -1.000000
ep 547: game finished, reward: -1.000000
ep 547: game finished, reward: -1.000000
ep 547: game finished, reward: -1.0000

ep 554: game finished, reward: -1.000000
ep 554: game finished, reward: -1.000000
ep 554: game finished, reward: -1.000000
ep 554: game finished, reward: -1.000000
ep 554: game finished, reward: -1.000000
episode over: 555
1838 1
resetting env. episode reward total was -19.000000. running mean: -20.276298
ep 555: game finished, reward: -1.000000
ep 555: game finished, reward: -1.000000
ep 555: game finished, reward: -1.000000
ep 555: game finished, reward: -1.000000
ep 555: game finished, reward: -1.000000
ep 555: game finished, reward: -1.000000
ep 555: game finished, reward: -1.000000
ep 555: game finished, reward: -1.000000
ep 555: game finished, reward: -1.000000
ep 555: game finished, reward: -1.000000
ep 555: game finished, reward: -1.000000
ep 555: game finished, reward: -1.000000
ep 555: game finished, reward: -1.000000
ep 555: game finished, reward: -1.000000
ep 555: game finished, reward: -1.000000
ep 555: game finished, reward: -1.000000
ep 555: game finished, reward: -1.000

ep 563: game finished, reward: -1.000000
ep 563: game finished, reward: -1.000000
ep 563: game finished, reward: -1.000000
ep 563: game finished, reward: -1.000000
ep 563: game finished, reward: -1.000000
ep 563: game finished, reward: -1.000000
ep 563: game finished, reward: -1.000000
ep 563: game finished, reward: -1.000000
ep 563: game finished, reward: -1.000000
ep 563: game finished, reward: -1.000000
ep 563: game finished, reward: -1.000000
ep 563: game finished, reward: -1.000000
ep 563: game finished, reward: -1.000000
ep 563: game finished, reward: -1.000000
ep 563: game finished, reward: -1.000000
ep 563: game finished, reward: -1.000000
ep 563: game finished, reward: -1.000000
ep 563: game finished, reward: -1.000000
episode over: 564
1091 1
resetting env. episode reward total was -21.000000. running mean: -20.310355
ep 564: game finished, reward: -1.000000
ep 564: game finished, reward: -1.000000
ep 564: game finished, reward: -1.000000
ep 564: game finished, reward: -1.000

ep 571: game finished, reward: -1.000000
ep 571: game finished, reward: -1.000000
ep 571: game finished, reward: -1.000000
ep 571: game finished, reward: -1.000000
ep 571: game finished, reward: -1.000000
ep 571: game finished, reward: -1.000000
ep 571: game finished, reward: 1.000000
ep 571: game finished, reward: -1.000000
ep 571: game finished, reward: -1.000000
episode over: 572
1709 1
resetting env. episode reward total was -20.000000. running mean: -20.334610
ep 572: game finished, reward: -1.000000
ep 572: game finished, reward: -1.000000
ep 572: game finished, reward: -1.000000
ep 572: game finished, reward: -1.000000
ep 572: game finished, reward: -1.000000
ep 572: game finished, reward: -1.000000
ep 572: game finished, reward: -1.000000
ep 572: game finished, reward: -1.000000
ep 572: game finished, reward: -1.000000
ep 572: game finished, reward: -1.000000
ep 572: game finished, reward: -1.000000
ep 572: game finished, reward: -1.000000
ep 572: game finished, reward: -1.0000

resetting env. episode reward total was -20.000000. running mean: -20.356700
ep 580: game finished, reward: -1.000000
ep 580: game finished, reward: -1.000000
ep 580: game finished, reward: -1.000000
ep 580: game finished, reward: -1.000000
ep 580: game finished, reward: -1.000000
ep 580: game finished, reward: -1.000000
ep 580: game finished, reward: -1.000000
ep 580: game finished, reward: -1.000000
ep 580: game finished, reward: -1.000000
ep 580: game finished, reward: -1.000000
ep 580: game finished, reward: -1.000000
ep 580: game finished, reward: -1.000000
ep 580: game finished, reward: -1.000000
ep 580: game finished, reward: -1.000000
ep 580: game finished, reward: -1.000000
ep 580: game finished, reward: -1.000000
ep 580: game finished, reward: -1.000000
ep 580: game finished, reward: -1.000000
ep 580: game finished, reward: -1.000000
ep 580: game finished, reward: -1.000000
ep 580: game finished, reward: -1.000000
episode over: 581
1260 1
resetting env. episode reward total w

ep 588: game finished, reward: -1.000000
ep 588: game finished, reward: -1.000000
ep 588: game finished, reward: 1.000000
ep 588: game finished, reward: -1.000000
ep 588: game finished, reward: -1.000000
ep 588: game finished, reward: -1.000000
ep 588: game finished, reward: -1.000000
ep 588: game finished, reward: -1.000000
ep 588: game finished, reward: -1.000000
ep 588: game finished, reward: -1.000000
ep 588: game finished, reward: -1.000000
ep 588: game finished, reward: -1.000000
ep 588: game finished, reward: -1.000000
ep 588: game finished, reward: -1.000000
episode over: 589
1774 1
resetting env. episode reward total was -20.000000. running mean: -20.373510
ep 589: game finished, reward: -1.000000
ep 589: game finished, reward: -1.000000
ep 589: game finished, reward: -1.000000
ep 589: game finished, reward: -1.000000
ep 589: game finished, reward: 1.000000
ep 589: game finished, reward: -1.000000
ep 589: game finished, reward: -1.000000
ep 589: game finished, reward: -1.00000

ep 596: game finished, reward: 1.000000
ep 596: game finished, reward: -1.000000
ep 596: game finished, reward: -1.000000
ep 596: game finished, reward: -1.000000
ep 596: game finished, reward: -1.000000
ep 596: game finished, reward: -1.000000
ep 596: game finished, reward: -1.000000
ep 596: game finished, reward: -1.000000
ep 596: game finished, reward: -1.000000
ep 596: game finished, reward: -1.000000
ep 596: game finished, reward: 1.000000
ep 596: game finished, reward: -1.000000
ep 596: game finished, reward: -1.000000
ep 596: game finished, reward: -1.000000
ep 596: game finished, reward: -1.000000
episode over: 597
1525 1
resetting env. episode reward total was -19.000000. running mean: -20.286041
ep 597: game finished, reward: -1.000000
ep 597: game finished, reward: -1.000000
ep 597: game finished, reward: -1.000000
ep 597: game finished, reward: -1.000000
ep 597: game finished, reward: -1.000000
ep 597: game finished, reward: -1.000000
ep 597: game finished, reward: -1.00000

ep 604: game finished, reward: -1.000000
ep 604: game finished, reward: -1.000000
ep 604: game finished, reward: -1.000000
ep 604: game finished, reward: -1.000000
ep 604: game finished, reward: -1.000000
ep 604: game finished, reward: -1.000000
ep 604: game finished, reward: 1.000000
ep 604: game finished, reward: -1.000000
ep 604: game finished, reward: -1.000000
ep 604: game finished, reward: -1.000000
episode over: 605
1802 1
resetting env. episode reward total was -20.000000. running mean: -20.264619
ep 605: game finished, reward: -1.000000
ep 605: game finished, reward: -1.000000
ep 605: game finished, reward: -1.000000
ep 605: game finished, reward: -1.000000
ep 605: game finished, reward: -1.000000
ep 605: game finished, reward: -1.000000
ep 605: game finished, reward: -1.000000
ep 605: game finished, reward: -1.000000
ep 605: game finished, reward: -1.000000
ep 605: game finished, reward: -1.000000
ep 605: game finished, reward: -1.000000
ep 605: game finished, reward: -1.0000

ep 612: game finished, reward: -1.000000
ep 612: game finished, reward: -1.000000
ep 612: game finished, reward: -1.000000
ep 612: game finished, reward: -1.000000
episode over: 613
1858 1
resetting env. episode reward total was -20.000000. running mean: -20.252915
ep 613: game finished, reward: -1.000000
ep 613: game finished, reward: -1.000000
ep 613: game finished, reward: -1.000000
ep 613: game finished, reward: -1.000000
ep 613: game finished, reward: -1.000000
ep 613: game finished, reward: -1.000000
ep 613: game finished, reward: -1.000000
ep 613: game finished, reward: -1.000000
ep 613: game finished, reward: -1.000000
ep 613: game finished, reward: -1.000000
ep 613: game finished, reward: -1.000000
ep 613: game finished, reward: -1.000000
ep 613: game finished, reward: -1.000000
ep 613: game finished, reward: -1.000000
ep 613: game finished, reward: -1.000000
ep 613: game finished, reward: -1.000000
ep 613: game finished, reward: -1.000000
ep 613: game finished, reward: -1.000

ep 621: game finished, reward: -1.000000
ep 621: game finished, reward: -1.000000
ep 621: game finished, reward: -1.000000
ep 621: game finished, reward: -1.000000
ep 621: game finished, reward: -1.000000
ep 621: game finished, reward: -1.000000
ep 621: game finished, reward: -1.000000
ep 621: game finished, reward: -1.000000
ep 621: game finished, reward: -1.000000
ep 621: game finished, reward: -1.000000
ep 621: game finished, reward: -1.000000
ep 621: game finished, reward: -1.000000
ep 621: game finished, reward: -1.000000
ep 621: game finished, reward: -1.000000
ep 621: game finished, reward: -1.000000
ep 621: game finished, reward: -1.000000
ep 621: game finished, reward: -1.000000
ep 621: game finished, reward: -1.000000
ep 621: game finished, reward: -1.000000
ep 621: game finished, reward: 1.000000
ep 621: game finished, reward: -1.000000
episode over: 622
1449 1
resetting env. episode reward total was -20.000000. running mean: -20.259390
ep 622: game finished, reward: -1.0000

ep 629: game finished, reward: -1.000000
ep 629: game finished, reward: -1.000000
ep 629: game finished, reward: -1.000000
ep 629: game finished, reward: -1.000000
ep 629: game finished, reward: -1.000000
ep 629: game finished, reward: -1.000000
ep 629: game finished, reward: -1.000000
ep 629: game finished, reward: -1.000000
ep 629: game finished, reward: -1.000000
ep 629: game finished, reward: -1.000000
ep 629: game finished, reward: -1.000000
ep 629: game finished, reward: -1.000000
ep 629: game finished, reward: -1.000000
ep 629: game finished, reward: -1.000000
ep 629: game finished, reward: -1.000000
episode over: 630
1147 1
resetting env. episode reward total was -20.000000. running mean: -20.249534
ep 630: game finished, reward: -1.000000
ep 630: game finished, reward: -1.000000
ep 630: game finished, reward: -1.000000
ep 630: game finished, reward: 1.000000
ep 630: game finished, reward: -1.000000
ep 630: game finished, reward: -1.000000
ep 630: game finished, reward: -1.0000

ep 637: game finished, reward: -1.000000
ep 637: game finished, reward: -1.000000
ep 637: game finished, reward: -1.000000
ep 637: game finished, reward: -1.000000
ep 637: game finished, reward: -1.000000
ep 637: game finished, reward: 1.000000
ep 637: game finished, reward: -1.000000
ep 637: game finished, reward: -1.000000
ep 637: game finished, reward: -1.000000
ep 637: game finished, reward: -1.000000
ep 637: game finished, reward: -1.000000
ep 637: game finished, reward: -1.000000
episode over: 638
1838 1
resetting env. episode reward total was -19.000000. running mean: -20.209964
ep 638: game finished, reward: -1.000000
ep 638: game finished, reward: -1.000000
ep 638: game finished, reward: -1.000000
ep 638: game finished, reward: -1.000000
ep 638: game finished, reward: -1.000000
ep 638: game finished, reward: -1.000000
ep 638: game finished, reward: -1.000000
ep 638: game finished, reward: -1.000000
ep 638: game finished, reward: -1.000000
ep 638: game finished, reward: -1.0000

ep 645: game finished, reward: -1.000000
ep 645: game finished, reward: -1.000000
ep 645: game finished, reward: -1.000000
ep 645: game finished, reward: -1.000000
ep 645: game finished, reward: -1.000000
episode over: 646
1102 1
resetting env. episode reward total was -21.000000. running mean: -20.213353
ep 646: game finished, reward: -1.000000
ep 646: game finished, reward: -1.000000
ep 646: game finished, reward: -1.000000
ep 646: game finished, reward: -1.000000
ep 646: game finished, reward: -1.000000
ep 646: game finished, reward: -1.000000
ep 646: game finished, reward: -1.000000
ep 646: game finished, reward: -1.000000
ep 646: game finished, reward: -1.000000
ep 646: game finished, reward: -1.000000
ep 646: game finished, reward: -1.000000
ep 646: game finished, reward: -1.000000
ep 646: game finished, reward: -1.000000
ep 646: game finished, reward: -1.000000
ep 646: game finished, reward: -1.000000
ep 646: game finished, reward: -1.000000
ep 646: game finished, reward: -1.000

ep 653: game finished, reward: -1.000000
ep 653: game finished, reward: -1.000000
episode over: 654
1381 1
resetting env. episode reward total was -20.000000. running mean: -20.177175
ep 654: game finished, reward: -1.000000
ep 654: game finished, reward: -1.000000
ep 654: game finished, reward: -1.000000
ep 654: game finished, reward: -1.000000
ep 654: game finished, reward: -1.000000
ep 654: game finished, reward: -1.000000
ep 654: game finished, reward: -1.000000
ep 654: game finished, reward: -1.000000
ep 654: game finished, reward: 1.000000
ep 654: game finished, reward: -1.000000
ep 654: game finished, reward: -1.000000
ep 654: game finished, reward: -1.000000
ep 654: game finished, reward: -1.000000
ep 654: game finished, reward: -1.000000
ep 654: game finished, reward: -1.000000
ep 654: game finished, reward: -1.000000
ep 654: game finished, reward: -1.000000
ep 654: game finished, reward: -1.000000
ep 654: game finished, reward: -1.000000
ep 654: game finished, reward: -1.0000

ep 662: game finished, reward: -1.000000
ep 662: game finished, reward: -1.000000
ep 662: game finished, reward: -1.000000
ep 662: game finished, reward: -1.000000
ep 662: game finished, reward: -1.000000
ep 662: game finished, reward: -1.000000
ep 662: game finished, reward: -1.000000
ep 662: game finished, reward: -1.000000
ep 662: game finished, reward: -1.000000
ep 662: game finished, reward: -1.000000
ep 662: game finished, reward: -1.000000
ep 662: game finished, reward: -1.000000
ep 662: game finished, reward: -1.000000
ep 662: game finished, reward: -1.000000
ep 662: game finished, reward: -1.000000
ep 662: game finished, reward: -1.000000
ep 662: game finished, reward: -1.000000
ep 662: game finished, reward: -1.000000
ep 662: game finished, reward: -1.000000
ep 662: game finished, reward: -1.000000
episode over: 663
1501 1
resetting env. episode reward total was -21.000000. running mean: -20.180984
ep 663: game finished, reward: -1.000000
ep 663: game finished, reward: -1.000

ep 670: game finished, reward: -1.000000
ep 670: game finished, reward: -1.000000
ep 670: game finished, reward: 1.000000
ep 670: game finished, reward: -1.000000
ep 670: game finished, reward: -1.000000
ep 670: game finished, reward: -1.000000
ep 670: game finished, reward: 1.000000
ep 670: game finished, reward: -1.000000
ep 670: game finished, reward: -1.000000
ep 670: game finished, reward: -1.000000
ep 670: game finished, reward: -1.000000
ep 670: game finished, reward: -1.000000
episode over: 671
1807 1
resetting env. episode reward total was -18.000000. running mean: -20.185046
ep 671: game finished, reward: -1.000000
ep 671: game finished, reward: -1.000000
ep 671: game finished, reward: -1.000000
ep 671: game finished, reward: -1.000000
ep 671: game finished, reward: -1.000000
ep 671: game finished, reward: -1.000000
ep 671: game finished, reward: -1.000000
ep 671: game finished, reward: -1.000000
ep 671: game finished, reward: -1.000000
ep 671: game finished, reward: -1.00000

ep 678: game finished, reward: -1.000000
episode over: 679
1246 1
resetting env. episode reward total was -21.000000. running mean: -20.228793
ep 679: game finished, reward: -1.000000
ep 679: game finished, reward: -1.000000
ep 679: game finished, reward: -1.000000
ep 679: game finished, reward: -1.000000
ep 679: game finished, reward: -1.000000
ep 679: game finished, reward: -1.000000
ep 679: game finished, reward: -1.000000
ep 679: game finished, reward: -1.000000
ep 679: game finished, reward: -1.000000
ep 679: game finished, reward: -1.000000
ep 679: game finished, reward: -1.000000
ep 679: game finished, reward: -1.000000
ep 679: game finished, reward: -1.000000
ep 679: game finished, reward: -1.000000
ep 679: game finished, reward: -1.000000
ep 679: game finished, reward: -1.000000
ep 679: game finished, reward: -1.000000
ep 679: game finished, reward: -1.000000
ep 679: game finished, reward: 1.000000
ep 679: game finished, reward: -1.000000
ep 679: game finished, reward: -1.0000

ep 687: game finished, reward: -1.000000
ep 687: game finished, reward: -1.000000
ep 687: game finished, reward: -1.000000
ep 687: game finished, reward: -1.000000
ep 687: game finished, reward: -1.000000
ep 687: game finished, reward: -1.000000
ep 687: game finished, reward: -1.000000
ep 687: game finished, reward: -1.000000
ep 687: game finished, reward: -1.000000
ep 687: game finished, reward: -1.000000
ep 687: game finished, reward: -1.000000
ep 687: game finished, reward: -1.000000
ep 687: game finished, reward: -1.000000
ep 687: game finished, reward: -1.000000
ep 687: game finished, reward: -1.000000
ep 687: game finished, reward: -1.000000
ep 687: game finished, reward: -1.000000
episode over: 688
1381 1
resetting env. episode reward total was -20.000000. running mean: -20.247443
ep 688: game finished, reward: -1.000000
ep 688: game finished, reward: -1.000000
ep 688: game finished, reward: 1.000000
ep 688: game finished, reward: -1.000000
ep 688: game finished, reward: -1.0000

ep 695: game finished, reward: -1.000000
ep 695: game finished, reward: -1.000000
ep 695: game finished, reward: -1.000000
ep 695: game finished, reward: -1.000000
ep 695: game finished, reward: -1.000000
ep 695: game finished, reward: -1.000000
ep 695: game finished, reward: -1.000000
ep 695: game finished, reward: -1.000000
episode over: 696
1664 1
resetting env. episode reward total was -19.000000. running mean: -20.276261
ep 696: game finished, reward: -1.000000
ep 696: game finished, reward: -1.000000
ep 696: game finished, reward: -1.000000
ep 696: game finished, reward: -1.000000
ep 696: game finished, reward: -1.000000
ep 696: game finished, reward: -1.000000
ep 696: game finished, reward: -1.000000
ep 696: game finished, reward: 1.000000
ep 696: game finished, reward: -1.000000
ep 696: game finished, reward: -1.000000
ep 696: game finished, reward: -1.000000
ep 696: game finished, reward: -1.000000
ep 696: game finished, reward: -1.000000
ep 696: game finished, reward: -1.0000

ep 703: game finished, reward: -1.000000
ep 703: game finished, reward: 1.000000
ep 703: game finished, reward: -1.000000
ep 703: game finished, reward: -1.000000
ep 703: game finished, reward: -1.000000
ep 703: game finished, reward: -1.000000
ep 703: game finished, reward: -1.000000
ep 703: game finished, reward: -1.000000
ep 703: game finished, reward: -1.000000
ep 703: game finished, reward: 1.000000
episode over: 704
1845 1
resetting env. episode reward total was -17.000000. running mean: -20.186391
ep 704: game finished, reward: -1.000000
ep 704: game finished, reward: -1.000000
ep 704: game finished, reward: -1.000000
ep 704: game finished, reward: -1.000000
ep 704: game finished, reward: -1.000000
ep 704: game finished, reward: 1.000000
ep 704: game finished, reward: -1.000000
ep 704: game finished, reward: -1.000000
ep 704: game finished, reward: -1.000000
ep 704: game finished, reward: -1.000000
ep 704: game finished, reward: -1.000000
ep 704: game finished, reward: -1.000000

ep 711: game finished, reward: -1.000000
ep 711: game finished, reward: -1.000000
ep 711: game finished, reward: -1.000000
ep 711: game finished, reward: -1.000000
ep 711: game finished, reward: -1.000000
ep 711: game finished, reward: -1.000000
ep 711: game finished, reward: -1.000000
ep 711: game finished, reward: -1.000000
ep 711: game finished, reward: -1.000000
episode over: 712
1346 1
resetting env. episode reward total was -21.000000. running mean: -20.133662
ep 712: game finished, reward: -1.000000
ep 712: game finished, reward: 1.000000
ep 712: game finished, reward: -1.000000
ep 712: game finished, reward: -1.000000
ep 712: game finished, reward: -1.000000
ep 712: game finished, reward: -1.000000
ep 712: game finished, reward: -1.000000
ep 712: game finished, reward: -1.000000
ep 712: game finished, reward: -1.000000
ep 712: game finished, reward: 1.000000
ep 712: game finished, reward: -1.000000
ep 712: game finished, reward: -1.000000
ep 712: game finished, reward: -1.00000

ep 719: game finished, reward: -1.000000
ep 719: game finished, reward: -1.000000
ep 719: game finished, reward: -1.000000
ep 719: game finished, reward: -1.000000
ep 719: game finished, reward: -1.000000
ep 719: game finished, reward: -1.000000
episode over: 720
1620 1
resetting env. episode reward total was -20.000000. running mean: -20.104696
ep 720: game finished, reward: -1.000000
ep 720: game finished, reward: -1.000000
ep 720: game finished, reward: -1.000000
ep 720: game finished, reward: -1.000000
ep 720: game finished, reward: -1.000000
ep 720: game finished, reward: -1.000000
ep 720: game finished, reward: -1.000000
ep 720: game finished, reward: -1.000000
ep 720: game finished, reward: -1.000000
ep 720: game finished, reward: -1.000000
ep 720: game finished, reward: -1.000000
ep 720: game finished, reward: -1.000000
ep 720: game finished, reward: -1.000000
ep 720: game finished, reward: -1.000000
ep 720: game finished, reward: -1.000000
ep 720: game finished, reward: -1.000

ep 728: game finished, reward: -1.000000
ep 728: game finished, reward: -1.000000
ep 728: game finished, reward: -1.000000
ep 728: game finished, reward: -1.000000
ep 728: game finished, reward: -1.000000
ep 728: game finished, reward: -1.000000
ep 728: game finished, reward: -1.000000
ep 728: game finished, reward: -1.000000
ep 728: game finished, reward: -1.000000
ep 728: game finished, reward: -1.000000
ep 728: game finished, reward: -1.000000
ep 728: game finished, reward: -1.000000
ep 728: game finished, reward: -1.000000
ep 728: game finished, reward: -1.000000
ep 728: game finished, reward: -1.000000
ep 728: game finished, reward: -1.000000
ep 728: game finished, reward: -1.000000
ep 728: game finished, reward: -1.000000
ep 728: game finished, reward: -1.000000
episode over: 729
1102 1
resetting env. episode reward total was -21.000000. running mean: -20.163294
ep 729: game finished, reward: -1.000000
ep 729: game finished, reward: -1.000000
ep 729: game finished, reward: -1.000

ep 736: game finished, reward: -1.000000
ep 736: game finished, reward: -1.000000
ep 736: game finished, reward: -1.000000
ep 736: game finished, reward: -1.000000
ep 736: game finished, reward: -1.000000
ep 736: game finished, reward: -1.000000
ep 736: game finished, reward: -1.000000
ep 736: game finished, reward: -1.000000
ep 736: game finished, reward: -1.000000
ep 736: game finished, reward: -1.000000
episode over: 737
1198 1
resetting env. episode reward total was -21.000000. running mean: -20.199109
ep 737: game finished, reward: -1.000000
ep 737: game finished, reward: -1.000000
ep 737: game finished, reward: -1.000000
ep 737: game finished, reward: -1.000000
ep 737: game finished, reward: -1.000000
ep 737: game finished, reward: -1.000000
ep 737: game finished, reward: -1.000000
ep 737: game finished, reward: -1.000000
ep 737: game finished, reward: -1.000000
ep 737: game finished, reward: -1.000000
ep 737: game finished, reward: -1.000000
ep 737: game finished, reward: -1.000

ep 744: game finished, reward: -1.000000
episode over: 745
1708 1
resetting env. episode reward total was -20.000000. running mean: -20.222066
ep 745: game finished, reward: -1.000000
ep 745: game finished, reward: 1.000000
ep 745: game finished, reward: -1.000000
ep 745: game finished, reward: -1.000000
ep 745: game finished, reward: -1.000000
ep 745: game finished, reward: -1.000000
ep 745: game finished, reward: 1.000000
ep 745: game finished, reward: -1.000000
ep 745: game finished, reward: -1.000000
ep 745: game finished, reward: -1.000000
ep 745: game finished, reward: -1.000000
ep 745: game finished, reward: -1.000000
ep 745: game finished, reward: -1.000000
ep 745: game finished, reward: -1.000000
ep 745: game finished, reward: -1.000000
ep 745: game finished, reward: -1.000000
ep 745: game finished, reward: -1.000000
ep 745: game finished, reward: -1.000000
ep 745: game finished, reward: -1.000000
ep 745: game finished, reward: -1.000000
ep 745: game finished, reward: -1.00000

ep 753: game finished, reward: -1.000000
ep 753: game finished, reward: 1.000000
ep 753: game finished, reward: -1.000000
ep 753: game finished, reward: -1.000000
ep 753: game finished, reward: -1.000000
ep 753: game finished, reward: -1.000000
ep 753: game finished, reward: -1.000000
ep 753: game finished, reward: -1.000000
ep 753: game finished, reward: -1.000000
ep 753: game finished, reward: -1.000000
ep 753: game finished, reward: -1.000000
ep 753: game finished, reward: -1.000000
ep 753: game finished, reward: -1.000000
ep 753: game finished, reward: -1.000000
ep 753: game finished, reward: -1.000000
ep 753: game finished, reward: -1.000000
ep 753: game finished, reward: -1.000000
ep 753: game finished, reward: -1.000000
ep 753: game finished, reward: -1.000000
ep 753: game finished, reward: -1.000000
ep 753: game finished, reward: -1.000000
episode over: 754
1233 1
resetting env. episode reward total was -20.000000. running mean: -20.203914
ep 754: game finished, reward: -1.0000

ep 761: game finished, reward: -1.000000
ep 761: game finished, reward: -1.000000
ep 761: game finished, reward: -1.000000
ep 761: game finished, reward: -1.000000
ep 761: game finished, reward: -1.000000
ep 761: game finished, reward: -1.000000
ep 761: game finished, reward: -1.000000
ep 761: game finished, reward: -1.000000
ep 761: game finished, reward: -1.000000
ep 761: game finished, reward: 1.000000
ep 761: game finished, reward: -1.000000
ep 761: game finished, reward: -1.000000
ep 761: game finished, reward: -1.000000
ep 761: game finished, reward: -1.000000
episode over: 762
1835 1
resetting env. episode reward total was -19.000000. running mean: -20.197466
ep 762: game finished, reward: -1.000000
ep 762: game finished, reward: -1.000000
ep 762: game finished, reward: -1.000000
ep 762: game finished, reward: -1.000000
ep 762: game finished, reward: -1.000000
ep 762: game finished, reward: -1.000000
ep 762: game finished, reward: -1.000000
ep 762: game finished, reward: 1.00000

ep 769: game finished, reward: -1.000000
ep 769: game finished, reward: -1.000000
ep 769: game finished, reward: -1.000000
ep 769: game finished, reward: -1.000000
ep 769: game finished, reward: -1.000000
ep 769: game finished, reward: -1.000000
ep 769: game finished, reward: -1.000000
ep 769: game finished, reward: -1.000000
ep 769: game finished, reward: -1.000000
episode over: 770
1633 1
resetting env. episode reward total was -20.000000. running mean: -20.173759
ep 770: game finished, reward: -1.000000
ep 770: game finished, reward: -1.000000
ep 770: game finished, reward: -1.000000
ep 770: game finished, reward: -1.000000
ep 770: game finished, reward: -1.000000
ep 770: game finished, reward: -1.000000
ep 770: game finished, reward: -1.000000
ep 770: game finished, reward: -1.000000
ep 770: game finished, reward: -1.000000
ep 770: game finished, reward: -1.000000
ep 770: game finished, reward: -1.000000
ep 770: game finished, reward: -1.000000
ep 770: game finished, reward: -1.000

ep 777: game finished, reward: -1.000000
episode over: 778
1269 1
resetting env. episode reward total was -21.000000. running mean: -20.189166
ep 778: game finished, reward: -1.000000
ep 778: game finished, reward: -1.000000
ep 778: game finished, reward: -1.000000
ep 778: game finished, reward: -1.000000
ep 778: game finished, reward: -1.000000
ep 778: game finished, reward: -1.000000
ep 778: game finished, reward: -1.000000
ep 778: game finished, reward: -1.000000
ep 778: game finished, reward: -1.000000
ep 778: game finished, reward: -1.000000
ep 778: game finished, reward: -1.000000
ep 778: game finished, reward: -1.000000
ep 778: game finished, reward: -1.000000
ep 778: game finished, reward: -1.000000
ep 778: game finished, reward: -1.000000
ep 778: game finished, reward: -1.000000
ep 778: game finished, reward: -1.000000
ep 778: game finished, reward: -1.000000
ep 778: game finished, reward: -1.000000
ep 778: game finished, reward: 1.000000
ep 778: game finished, reward: -1.0000

ep 786: game finished, reward: -1.000000
ep 786: game finished, reward: -1.000000
ep 786: game finished, reward: -1.000000
ep 786: game finished, reward: -1.000000
ep 786: game finished, reward: -1.000000
ep 786: game finished, reward: -1.000000
ep 786: game finished, reward: -1.000000
ep 786: game finished, reward: -1.000000
ep 786: game finished, reward: -1.000000
ep 786: game finished, reward: -1.000000
ep 786: game finished, reward: -1.000000
ep 786: game finished, reward: -1.000000
ep 786: game finished, reward: -1.000000
ep 786: game finished, reward: -1.000000
ep 786: game finished, reward: -1.000000
ep 786: game finished, reward: -1.000000
episode over: 787
1180 1
resetting env. episode reward total was -21.000000. running mean: -20.221625
ep 787: game finished, reward: -1.000000
ep 787: game finished, reward: -1.000000
ep 787: game finished, reward: -1.000000
ep 787: game finished, reward: -1.000000
ep 787: game finished, reward: -1.000000
ep 787: game finished, reward: -1.000

ep 794: game finished, reward: -1.000000
ep 794: game finished, reward: 1.000000
ep 794: game finished, reward: -1.000000
ep 794: game finished, reward: -1.000000
ep 794: game finished, reward: -1.000000
ep 794: game finished, reward: -1.000000
ep 794: game finished, reward: -1.000000
ep 794: game finished, reward: -1.000000
episode over: 795
1752 1
resetting env. episode reward total was -19.000000. running mean: -20.252056
ep 795: game finished, reward: -1.000000
ep 795: game finished, reward: -1.000000
ep 795: game finished, reward: -1.000000
ep 795: game finished, reward: -1.000000
ep 795: game finished, reward: -1.000000
ep 795: game finished, reward: -1.000000
ep 795: game finished, reward: -1.000000
ep 795: game finished, reward: -1.000000
ep 795: game finished, reward: -1.000000
ep 795: game finished, reward: -1.000000
ep 795: game finished, reward: -1.000000
ep 795: game finished, reward: -1.000000
ep 795: game finished, reward: -1.000000
ep 795: game finished, reward: -1.0000

ep 802: game finished, reward: -1.000000
ep 802: game finished, reward: -1.000000
ep 802: game finished, reward: -1.000000
ep 802: game finished, reward: -1.000000
ep 802: game finished, reward: -1.000000
episode over: 803
1640 1
resetting env. episode reward total was -20.000000. running mean: -20.213469
ep 803: game finished, reward: -1.000000
ep 803: game finished, reward: -1.000000
ep 803: game finished, reward: -1.000000
ep 803: game finished, reward: -1.000000
ep 803: game finished, reward: -1.000000
ep 803: game finished, reward: -1.000000
ep 803: game finished, reward: -1.000000
ep 803: game finished, reward: -1.000000
ep 803: game finished, reward: -1.000000
ep 803: game finished, reward: -1.000000
ep 803: game finished, reward: -1.000000
ep 803: game finished, reward: -1.000000
ep 803: game finished, reward: -1.000000
ep 803: game finished, reward: -1.000000
ep 803: game finished, reward: -1.000000
ep 803: game finished, reward: -1.000000
ep 803: game finished, reward: -1.000

ep 811: game finished, reward: -1.000000
ep 811: game finished, reward: 1.000000
ep 811: game finished, reward: -1.000000
ep 811: game finished, reward: -1.000000
ep 811: game finished, reward: -1.000000
ep 811: game finished, reward: -1.000000
ep 811: game finished, reward: -1.000000
ep 811: game finished, reward: -1.000000
ep 811: game finished, reward: -1.000000
ep 811: game finished, reward: -1.000000
ep 811: game finished, reward: -1.000000
ep 811: game finished, reward: 1.000000
ep 811: game finished, reward: -1.000000
ep 811: game finished, reward: -1.000000
ep 811: game finished, reward: -1.000000
ep 811: game finished, reward: -1.000000
ep 811: game finished, reward: -1.000000
ep 811: game finished, reward: -1.000000
ep 811: game finished, reward: -1.000000
ep 811: game finished, reward: -1.000000
ep 811: game finished, reward: -1.000000
episode over: 812
1515 1
resetting env. episode reward total was -19.000000. running mean: -20.231890
ep 812: game finished, reward: -1.00000

ep 819: game finished, reward: -1.000000
ep 819: game finished, reward: -1.000000
ep 819: game finished, reward: -1.000000
ep 819: game finished, reward: -1.000000
ep 819: game finished, reward: -1.000000
ep 819: game finished, reward: -1.000000
ep 819: game finished, reward: -1.000000
ep 819: game finished, reward: -1.000000
ep 819: game finished, reward: -1.000000
ep 819: game finished, reward: -1.000000
ep 819: game finished, reward: -1.000000
ep 819: game finished, reward: -1.000000
ep 819: game finished, reward: -1.000000
ep 819: game finished, reward: -1.000000
ep 819: game finished, reward: -1.000000
ep 819: game finished, reward: -1.000000
ep 819: game finished, reward: -1.000000
ep 819: game finished, reward: -1.000000
ep 819: game finished, reward: -1.000000
episode over: 820
1182 1
resetting env. episode reward total was -21.000000. running mean: -20.185440
ep 820: game finished, reward: -1.000000
ep 820: game finished, reward: -1.000000
ep 820: game finished, reward: -1.000

ep 827: game finished, reward: -1.000000
ep 827: game finished, reward: -1.000000
ep 827: game finished, reward: -1.000000
ep 827: game finished, reward: -1.000000
ep 827: game finished, reward: -1.000000
ep 827: game finished, reward: -1.000000
ep 827: game finished, reward: -1.000000
ep 827: game finished, reward: -1.000000
ep 827: game finished, reward: -1.000000
ep 827: game finished, reward: -1.000000
ep 827: game finished, reward: -1.000000
ep 827: game finished, reward: -1.000000
ep 827: game finished, reward: -1.000000
ep 827: game finished, reward: -1.000000
ep 827: game finished, reward: -1.000000
ep 827: game finished, reward: -1.000000
episode over: 828
1390 1
resetting env. episode reward total was -21.000000. running mean: -20.151031
ep 828: game finished, reward: -1.000000
ep 828: game finished, reward: -1.000000
ep 828: game finished, reward: -1.000000
ep 828: game finished, reward: -1.000000
ep 828: game finished, reward: -1.000000
ep 828: game finished, reward: -1.000

ep 835: game finished, reward: -1.000000
ep 835: game finished, reward: -1.000000
ep 835: game finished, reward: -1.000000
ep 835: game finished, reward: -1.000000
ep 835: game finished, reward: -1.000000
ep 835: game finished, reward: -1.000000
ep 835: game finished, reward: -1.000000
ep 835: game finished, reward: -1.000000
ep 835: game finished, reward: -1.000000
ep 835: game finished, reward: -1.000000
episode over: 836
1547 1
resetting env. episode reward total was -20.000000. running mean: -20.149460
ep 836: game finished, reward: -1.000000
ep 836: game finished, reward: -1.000000
ep 836: game finished, reward: -1.000000
ep 836: game finished, reward: -1.000000
ep 836: game finished, reward: -1.000000
ep 836: game finished, reward: 1.000000
ep 836: game finished, reward: -1.000000
ep 836: game finished, reward: -1.000000
ep 836: game finished, reward: -1.000000
ep 836: game finished, reward: -1.000000
ep 836: game finished, reward: -1.000000
ep 836: game finished, reward: -1.0000

ep 843: game finished, reward: -1.000000
ep 843: game finished, reward: -1.000000
ep 843: game finished, reward: -1.000000
episode over: 844
1227 1
resetting env. episode reward total was -20.000000. running mean: -20.166928
ep 844: game finished, reward: -1.000000
ep 844: game finished, reward: -1.000000
ep 844: game finished, reward: -1.000000
ep 844: game finished, reward: -1.000000
ep 844: game finished, reward: -1.000000
ep 844: game finished, reward: -1.000000
ep 844: game finished, reward: -1.000000
ep 844: game finished, reward: -1.000000
ep 844: game finished, reward: -1.000000
ep 844: game finished, reward: -1.000000
ep 844: game finished, reward: -1.000000
ep 844: game finished, reward: -1.000000
ep 844: game finished, reward: -1.000000
ep 844: game finished, reward: -1.000000
ep 844: game finished, reward: -1.000000
ep 844: game finished, reward: -1.000000
ep 844: game finished, reward: -1.000000
ep 844: game finished, reward: -1.000000
ep 844: game finished, reward: -1.000

ep 852: game finished, reward: -1.000000
ep 852: game finished, reward: -1.000000
ep 852: game finished, reward: -1.000000
ep 852: game finished, reward: -1.000000
ep 852: game finished, reward: -1.000000
ep 852: game finished, reward: -1.000000
ep 852: game finished, reward: -1.000000
ep 852: game finished, reward: -1.000000
ep 852: game finished, reward: -1.000000
ep 852: game finished, reward: -1.000000
ep 852: game finished, reward: -1.000000
ep 852: game finished, reward: -1.000000
ep 852: game finished, reward: -1.000000
ep 852: game finished, reward: -1.000000
ep 852: game finished, reward: -1.000000
ep 852: game finished, reward: -1.000000
ep 852: game finished, reward: -1.000000
ep 852: game finished, reward: -1.000000
ep 852: game finished, reward: -1.000000
episode over: 853
1404 1
resetting env. episode reward total was -21.000000. running mean: -20.190842
ep 853: game finished, reward: -1.000000
ep 853: game finished, reward: -1.000000
ep 853: game finished, reward: -1.000

ep 860: game finished, reward: -1.000000
ep 860: game finished, reward: -1.000000
ep 860: game finished, reward: -1.000000
ep 860: game finished, reward: 1.000000
ep 860: game finished, reward: -1.000000
ep 860: game finished, reward: -1.000000
ep 860: game finished, reward: -1.000000
ep 860: game finished, reward: -1.000000
ep 860: game finished, reward: -1.000000
ep 860: game finished, reward: -1.000000
ep 860: game finished, reward: -1.000000
ep 860: game finished, reward: -1.000000
ep 860: game finished, reward: -1.000000
ep 860: game finished, reward: -1.000000
ep 860: game finished, reward: -1.000000
ep 860: game finished, reward: -1.000000
ep 860: game finished, reward: -1.000000
episode over: 861
1219 1
resetting env. episode reward total was -20.000000. running mean: -20.147849
ep 861: game finished, reward: -1.000000
ep 861: game finished, reward: -1.000000
ep 861: game finished, reward: -1.000000
ep 861: game finished, reward: -1.000000
ep 861: game finished, reward: -1.0000

ep 868: game finished, reward: -1.000000
ep 868: game finished, reward: -1.000000
ep 868: game finished, reward: -1.000000
ep 868: game finished, reward: -1.000000
ep 868: game finished, reward: -1.000000
ep 868: game finished, reward: -1.000000
ep 868: game finished, reward: -1.000000
ep 868: game finished, reward: -1.000000
ep 868: game finished, reward: -1.000000
ep 868: game finished, reward: -1.000000
ep 868: game finished, reward: 1.000000
episode over: 869
1460 1
resetting env. episode reward total was -20.000000. running mean: -20.145845
ep 869: game finished, reward: -1.000000
ep 869: game finished, reward: -1.000000
ep 869: game finished, reward: -1.000000
ep 869: game finished, reward: -1.000000
ep 869: game finished, reward: -1.000000
ep 869: game finished, reward: -1.000000
ep 869: game finished, reward: -1.000000
ep 869: game finished, reward: -1.000000
ep 869: game finished, reward: -1.000000
ep 869: game finished, reward: -1.000000
ep 869: game finished, reward: -1.0000

ep 876: game finished, reward: -1.000000
ep 876: game finished, reward: -1.000000
ep 876: game finished, reward: -1.000000
ep 876: game finished, reward: -1.000000
ep 876: game finished, reward: -1.000000
ep 876: game finished, reward: -1.000000
ep 876: game finished, reward: -1.000000
episode over: 877
1388 1
resetting env. episode reward total was -20.000000. running mean: -20.124873
ep 877: game finished, reward: -1.000000
ep 877: game finished, reward: -1.000000
ep 877: game finished, reward: -1.000000
ep 877: game finished, reward: -1.000000
ep 877: game finished, reward: -1.000000
ep 877: game finished, reward: -1.000000
ep 877: game finished, reward: -1.000000
ep 877: game finished, reward: -1.000000
ep 877: game finished, reward: -1.000000
ep 877: game finished, reward: -1.000000
ep 877: game finished, reward: -1.000000
ep 877: game finished, reward: -1.000000
ep 877: game finished, reward: -1.000000
ep 877: game finished, reward: -1.000000
ep 877: game finished, reward: -1.000

ep 884: game finished, reward: -1.000000
ep 884: game finished, reward: -1.000000
ep 884: game finished, reward: -1.000000
ep 884: game finished, reward: -1.000000
ep 884: game finished, reward: -1.000000
ep 884: game finished, reward: -1.000000
episode over: 885
1301 1
resetting env. episode reward total was -20.000000. running mean: -20.076311
ep 885: game finished, reward: -1.000000
ep 885: game finished, reward: -1.000000
ep 885: game finished, reward: -1.000000
ep 885: game finished, reward: -1.000000
ep 885: game finished, reward: -1.000000
ep 885: game finished, reward: -1.000000
ep 885: game finished, reward: -1.000000
ep 885: game finished, reward: -1.000000
ep 885: game finished, reward: -1.000000
ep 885: game finished, reward: -1.000000
ep 885: game finished, reward: -1.000000
ep 885: game finished, reward: -1.000000
ep 885: game finished, reward: -1.000000
ep 885: game finished, reward: -1.000000
ep 885: game finished, reward: -1.000000
ep 885: game finished, reward: -1.000

ep 892: game finished, reward: 1.000000
episode over: 893
2348 1
resetting env. episode reward total was -18.000000. running mean: -20.069728
ep 893: game finished, reward: -1.000000
ep 893: game finished, reward: -1.000000
ep 893: game finished, reward: -1.000000
ep 893: game finished, reward: -1.000000
ep 893: game finished, reward: -1.000000
ep 893: game finished, reward: -1.000000
ep 893: game finished, reward: -1.000000
ep 893: game finished, reward: -1.000000
ep 893: game finished, reward: -1.000000
ep 893: game finished, reward: -1.000000
ep 893: game finished, reward: -1.000000
ep 893: game finished, reward: -1.000000
ep 893: game finished, reward: -1.000000
ep 893: game finished, reward: -1.000000
ep 893: game finished, reward: -1.000000
ep 893: game finished, reward: -1.000000
ep 893: game finished, reward: -1.000000
ep 893: game finished, reward: -1.000000
ep 893: game finished, reward: -1.000000
ep 893: game finished, reward: -1.000000
ep 893: game finished, reward: -1.0000

ep 901: game finished, reward: -1.000000
ep 901: game finished, reward: -1.000000
ep 901: game finished, reward: -1.000000
ep 901: game finished, reward: -1.000000
ep 901: game finished, reward: -1.000000
ep 901: game finished, reward: -1.000000
ep 901: game finished, reward: -1.000000
ep 901: game finished, reward: -1.000000
ep 901: game finished, reward: -1.000000
ep 901: game finished, reward: -1.000000
ep 901: game finished, reward: -1.000000
ep 901: game finished, reward: -1.000000
ep 901: game finished, reward: -1.000000
ep 901: game finished, reward: -1.000000
ep 901: game finished, reward: -1.000000
ep 901: game finished, reward: -1.000000
ep 901: game finished, reward: -1.000000
episode over: 902
1480 1
resetting env. episode reward total was -21.000000. running mean: -20.102429
ep 902: game finished, reward: -1.000000
ep 902: game finished, reward: -1.000000
ep 902: game finished, reward: -1.000000
ep 902: game finished, reward: -1.000000
ep 902: game finished, reward: -1.000

ep 909: game finished, reward: -1.000000
ep 909: game finished, reward: -1.000000
ep 909: game finished, reward: -1.000000
ep 909: game finished, reward: -1.000000
ep 909: game finished, reward: -1.000000
ep 909: game finished, reward: -1.000000
ep 909: game finished, reward: -1.000000
ep 909: game finished, reward: -1.000000
ep 909: game finished, reward: -1.000000
ep 909: game finished, reward: -1.000000
episode over: 910
1508 1
resetting env. episode reward total was -21.000000. running mean: -20.114419
ep 910: game finished, reward: -1.000000
ep 910: game finished, reward: -1.000000
ep 910: game finished, reward: -1.000000
ep 910: game finished, reward: -1.000000
ep 910: game finished, reward: -1.000000
ep 910: game finished, reward: -1.000000
ep 910: game finished, reward: -1.000000
ep 910: game finished, reward: -1.000000
ep 910: game finished, reward: 1.000000
ep 910: game finished, reward: -1.000000
ep 910: game finished, reward: -1.000000
ep 910: game finished, reward: -1.0000

ep 917: game finished, reward: 1.000000
ep 917: game finished, reward: -1.000000
ep 917: game finished, reward: -1.000000
ep 917: game finished, reward: -1.000000
ep 917: game finished, reward: -1.000000
ep 917: game finished, reward: 1.000000
ep 917: game finished, reward: -1.000000
ep 917: game finished, reward: -1.000000
ep 917: game finished, reward: -1.000000
ep 917: game finished, reward: -1.000000
ep 917: game finished, reward: -1.000000
ep 917: game finished, reward: -1.000000
ep 917: game finished, reward: -1.000000
episode over: 918
1815 1
resetting env. episode reward total was -18.000000. running mean: -20.028417
ep 918: game finished, reward: -1.000000
ep 918: game finished, reward: -1.000000
ep 918: game finished, reward: -1.000000
ep 918: game finished, reward: -1.000000
ep 918: game finished, reward: -1.000000
ep 918: game finished, reward: -1.000000
ep 918: game finished, reward: -1.000000
ep 918: game finished, reward: -1.000000
ep 918: game finished, reward: -1.00000

ep 925: game finished, reward: -1.000000
ep 925: game finished, reward: -1.000000
ep 925: game finished, reward: -1.000000
ep 925: game finished, reward: -1.000000
ep 925: game finished, reward: -1.000000
ep 925: game finished, reward: -1.000000
ep 925: game finished, reward: -1.000000
episode over: 926
1108 1
resetting env. episode reward total was -21.000000. running mean: -20.045837
ep 926: game finished, reward: -1.000000
ep 926: game finished, reward: -1.000000
ep 926: game finished, reward: -1.000000
ep 926: game finished, reward: -1.000000
ep 926: game finished, reward: 1.000000
ep 926: game finished, reward: -1.000000
ep 926: game finished, reward: -1.000000
ep 926: game finished, reward: -1.000000
ep 926: game finished, reward: -1.000000
ep 926: game finished, reward: -1.000000
ep 926: game finished, reward: -1.000000
ep 926: game finished, reward: -1.000000
ep 926: game finished, reward: -1.000000
ep 926: game finished, reward: -1.000000
ep 926: game finished, reward: -1.0000

ep 933: game finished, reward: -1.000000
ep 933: game finished, reward: -1.000000
episode over: 934
1674 1
resetting env. episode reward total was -19.000000. running mean: -20.051416
ep 934: game finished, reward: -1.000000
ep 934: game finished, reward: -1.000000
ep 934: game finished, reward: -1.000000
ep 934: game finished, reward: -1.000000
ep 934: game finished, reward: 1.000000
ep 934: game finished, reward: -1.000000
ep 934: game finished, reward: -1.000000
ep 934: game finished, reward: -1.000000
ep 934: game finished, reward: -1.000000
ep 934: game finished, reward: -1.000000
ep 934: game finished, reward: -1.000000
ep 934: game finished, reward: -1.000000
ep 934: game finished, reward: -1.000000
ep 934: game finished, reward: -1.000000
ep 934: game finished, reward: -1.000000
ep 934: game finished, reward: -1.000000
ep 934: game finished, reward: -1.000000
ep 934: game finished, reward: -1.000000
ep 934: game finished, reward: -1.000000
ep 934: game finished, reward: -1.0000

ep 942: game finished, reward: -1.000000
ep 942: game finished, reward: -1.000000
ep 942: game finished, reward: 1.000000
ep 942: game finished, reward: -1.000000
ep 942: game finished, reward: -1.000000
ep 942: game finished, reward: -1.000000
ep 942: game finished, reward: -1.000000
ep 942: game finished, reward: -1.000000
ep 942: game finished, reward: -1.000000
ep 942: game finished, reward: -1.000000
ep 942: game finished, reward: -1.000000
ep 942: game finished, reward: -1.000000
ep 942: game finished, reward: -1.000000
ep 942: game finished, reward: -1.000000
ep 942: game finished, reward: -1.000000
ep 942: game finished, reward: -1.000000
ep 942: game finished, reward: -1.000000
ep 942: game finished, reward: -1.000000
ep 942: game finished, reward: -1.000000
episode over: 943
1484 1
resetting env. episode reward total was -20.000000. running mean: -20.066475
ep 943: game finished, reward: -1.000000
ep 943: game finished, reward: -1.000000
ep 943: game finished, reward: -1.0000

ep 950: game finished, reward: -1.000000
ep 950: game finished, reward: -1.000000
ep 950: game finished, reward: -1.000000
ep 950: game finished, reward: -1.000000
ep 950: game finished, reward: -1.000000
ep 950: game finished, reward: -1.000000
ep 950: game finished, reward: -1.000000
ep 950: game finished, reward: -1.000000
ep 950: game finished, reward: 1.000000
ep 950: game finished, reward: -1.000000
ep 950: game finished, reward: -1.000000
ep 950: game finished, reward: 1.000000
ep 950: game finished, reward: -1.000000
ep 950: game finished, reward: 1.000000
ep 950: game finished, reward: -1.000000
ep 950: game finished, reward: -1.000000
ep 950: game finished, reward: -1.000000
ep 950: game finished, reward: -1.000000
ep 950: game finished, reward: -1.000000
ep 950: game finished, reward: 1.000000
episode over: 951
1809 1
resetting env. episode reward total was -16.000000. running mean: -19.993000
ep 951: game finished, reward: -1.000000
ep 951: game finished, reward: -1.000000


ep 958: game finished, reward: -1.000000
ep 958: game finished, reward: -1.000000
ep 958: game finished, reward: -1.000000
ep 958: game finished, reward: -1.000000
ep 958: game finished, reward: -1.000000
ep 958: game finished, reward: -1.000000
ep 958: game finished, reward: -1.000000
ep 958: game finished, reward: -1.000000
ep 958: game finished, reward: -1.000000
ep 958: game finished, reward: -1.000000
episode over: 959
1651 1
resetting env. episode reward total was -21.000000. running mean: -20.032840
ep 959: game finished, reward: -1.000000
ep 959: game finished, reward: -1.000000
ep 959: game finished, reward: -1.000000
ep 959: game finished, reward: -1.000000
ep 959: game finished, reward: -1.000000
ep 959: game finished, reward: -1.000000
ep 959: game finished, reward: -1.000000
ep 959: game finished, reward: -1.000000
ep 959: game finished, reward: -1.000000
ep 959: game finished, reward: -1.000000
ep 959: game finished, reward: -1.000000
ep 959: game finished, reward: -1.000

episode over: 967
1336 1
resetting env. episode reward total was -21.000000. running mean: -20.078732
ep 967: game finished, reward: -1.000000
ep 967: game finished, reward: -1.000000
ep 967: game finished, reward: -1.000000
ep 967: game finished, reward: -1.000000
ep 967: game finished, reward: -1.000000
ep 967: game finished, reward: -1.000000
ep 967: game finished, reward: -1.000000
ep 967: game finished, reward: 1.000000
ep 967: game finished, reward: -1.000000
ep 967: game finished, reward: -1.000000
ep 967: game finished, reward: -1.000000
ep 967: game finished, reward: 1.000000
ep 967: game finished, reward: -1.000000
ep 967: game finished, reward: -1.000000
ep 967: game finished, reward: -1.000000
ep 967: game finished, reward: -1.000000
ep 967: game finished, reward: -1.000000
ep 967: game finished, reward: -1.000000
ep 967: game finished, reward: -1.000000
ep 967: game finished, reward: -1.000000
ep 967: game finished, reward: -1.000000
ep 967: game finished, reward: -1.00000

ep 975: game finished, reward: -1.000000
ep 975: game finished, reward: -1.000000
ep 975: game finished, reward: -1.000000
ep 975: game finished, reward: -1.000000
ep 975: game finished, reward: -1.000000
ep 975: game finished, reward: -1.000000
ep 975: game finished, reward: -1.000000
ep 975: game finished, reward: -1.000000
ep 975: game finished, reward: -1.000000
ep 975: game finished, reward: -1.000000
ep 975: game finished, reward: -1.000000
ep 975: game finished, reward: -1.000000
ep 975: game finished, reward: -1.000000
ep 975: game finished, reward: -1.000000
ep 975: game finished, reward: -1.000000
ep 975: game finished, reward: -1.000000
episode over: 976
1661 1
resetting env. episode reward total was -21.000000. running mean: -20.111609
ep 976: game finished, reward: -1.000000
ep 976: game finished, reward: -1.000000
ep 976: game finished, reward: -1.000000
ep 976: game finished, reward: -1.000000
ep 976: game finished, reward: -1.000000
ep 976: game finished, reward: -1.000

ep 983: game finished, reward: -1.000000
ep 983: game finished, reward: -1.000000
ep 983: game finished, reward: -1.000000
ep 983: game finished, reward: -1.000000
ep 983: game finished, reward: -1.000000
ep 983: game finished, reward: -1.000000
ep 983: game finished, reward: -1.000000
episode over: 984
1423 1
resetting env. episode reward total was -21.000000. running mean: -20.141427
ep 984: game finished, reward: -1.000000
ep 984: game finished, reward: -1.000000
ep 984: game finished, reward: -1.000000
ep 984: game finished, reward: -1.000000
ep 984: game finished, reward: -1.000000
ep 984: game finished, reward: -1.000000
ep 984: game finished, reward: -1.000000
ep 984: game finished, reward: -1.000000
ep 984: game finished, reward: -1.000000
ep 984: game finished, reward: -1.000000
ep 984: game finished, reward: -1.000000
ep 984: game finished, reward: -1.000000
ep 984: game finished, reward: -1.000000
ep 984: game finished, reward: -1.000000
ep 984: game finished, reward: -1.000

ep 991: game finished, reward: -1.000000
ep 991: game finished, reward: -1.000000
ep 991: game finished, reward: -1.000000
ep 991: game finished, reward: -1.000000
ep 991: game finished, reward: -1.000000
ep 991: game finished, reward: -1.000000
ep 991: game finished, reward: -1.000000
ep 991: game finished, reward: -1.000000
episode over: 992
2174 1
resetting env. episode reward total was -21.000000. running mean: -20.082945
ep 992: game finished, reward: -1.000000
ep 992: game finished, reward: -1.000000
ep 992: game finished, reward: -1.000000
ep 992: game finished, reward: -1.000000
ep 992: game finished, reward: -1.000000
ep 992: game finished, reward: -1.000000
ep 992: game finished, reward: -1.000000
ep 992: game finished, reward: -1.000000
ep 992: game finished, reward: -1.000000
ep 992: game finished, reward: -1.000000
ep 992: game finished, reward: -1.000000
ep 992: game finished, reward: -1.000000
ep 992: game finished, reward: -1.000000
ep 992: game finished, reward: -1.000

ep 1000: game finished, reward: -1.000000
ep 1000: game finished, reward: -1.000000
ep 1000: game finished, reward: -1.000000
ep 1000: game finished, reward: -1.000000
ep 1000: game finished, reward: -1.000000
ep 1000: game finished, reward: -1.000000
ep 1000: game finished, reward: -1.000000
ep 1000: game finished, reward: -1.000000
ep 1000: game finished, reward: -1.000000
ep 1000: game finished, reward: -1.000000
ep 1000: game finished, reward: -1.000000
ep 1000: game finished, reward: -1.000000
ep 1000: game finished, reward: -1.000000
ep 1000: game finished, reward: -1.000000
ep 1000: game finished, reward: -1.000000
ep 1000: game finished, reward: -1.000000
ep 1000: game finished, reward: -1.000000
ep 1000: game finished, reward: -1.000000
ep 1000: game finished, reward: -1.000000
ep 1000: game finished, reward: -1.000000
episode over: 1001
1345 1
resetting env. episode reward total was -21.000000. running mean: -20.152934
ep 1001: game finished, reward: -1.000000
ep 1001: game f

ep 1008: game finished, reward: -1.000000
ep 1008: game finished, reward: -1.000000
ep 1008: game finished, reward: -1.000000
ep 1008: game finished, reward: -1.000000
ep 1008: game finished, reward: -1.000000
ep 1008: game finished, reward: -1.000000
ep 1008: game finished, reward: -1.000000
ep 1008: game finished, reward: -1.000000
ep 1008: game finished, reward: -1.000000
ep 1008: game finished, reward: -1.000000
ep 1008: game finished, reward: -1.000000
ep 1008: game finished, reward: 1.000000
ep 1008: game finished, reward: -1.000000
ep 1008: game finished, reward: -1.000000
ep 1008: game finished, reward: -1.000000
ep 1008: game finished, reward: -1.000000
ep 1008: game finished, reward: -1.000000
ep 1008: game finished, reward: -1.000000
episode over: 1009
1311 1
resetting env. episode reward total was -20.000000. running mean: -20.160441
ep 1009: game finished, reward: -1.000000
ep 1009: game finished, reward: -1.000000
ep 1009: game finished, reward: -1.000000
ep 1009: game fi

ep 1016: game finished, reward: -1.000000
ep 1016: game finished, reward: -1.000000
ep 1016: game finished, reward: -1.000000
ep 1016: game finished, reward: -1.000000
ep 1016: game finished, reward: -1.000000
ep 1016: game finished, reward: -1.000000
ep 1016: game finished, reward: -1.000000
ep 1016: game finished, reward: -1.000000
ep 1016: game finished, reward: -1.000000
ep 1016: game finished, reward: -1.000000
ep 1016: game finished, reward: -1.000000
ep 1016: game finished, reward: -1.000000
ep 1016: game finished, reward: -1.000000
episode over: 1017
1267 1
resetting env. episode reward total was -21.000000. running mean: -20.187067
ep 1017: game finished, reward: -1.000000
ep 1017: game finished, reward: -1.000000
ep 1017: game finished, reward: -1.000000
ep 1017: game finished, reward: -1.000000
ep 1017: game finished, reward: 1.000000
ep 1017: game finished, reward: 1.000000
ep 1017: game finished, reward: -1.000000
ep 1017: game finished, reward: -1.000000
ep 1017: game fin

ep 1024: game finished, reward: -1.000000
ep 1024: game finished, reward: -1.000000
ep 1024: game finished, reward: -1.000000
ep 1024: game finished, reward: -1.000000
ep 1024: game finished, reward: -1.000000
ep 1024: game finished, reward: -1.000000
ep 1024: game finished, reward: -1.000000
ep 1024: game finished, reward: -1.000000
ep 1024: game finished, reward: -1.000000
ep 1024: game finished, reward: -1.000000
ep 1024: game finished, reward: -1.000000
ep 1024: game finished, reward: -1.000000
ep 1024: game finished, reward: -1.000000
ep 1024: game finished, reward: -1.000000
episode over: 1025
1113 1
resetting env. episode reward total was -21.000000. running mean: -20.163778
ep 1025: game finished, reward: -1.000000
ep 1025: game finished, reward: -1.000000
ep 1025: game finished, reward: -1.000000
ep 1025: game finished, reward: -1.000000
ep 1025: game finished, reward: -1.000000
ep 1025: game finished, reward: -1.000000
ep 1025: game finished, reward: -1.000000
ep 1025: game f

ep 1032: game finished, reward: -1.000000
ep 1032: game finished, reward: -1.000000
ep 1032: game finished, reward: -1.000000
ep 1032: game finished, reward: -1.000000
ep 1032: game finished, reward: -1.000000
ep 1032: game finished, reward: -1.000000
ep 1032: game finished, reward: -1.000000
ep 1032: game finished, reward: -1.000000
ep 1032: game finished, reward: -1.000000
episode over: 1033
1263 1
resetting env. episode reward total was -21.000000. running mean: -20.199560
ep 1033: game finished, reward: -1.000000
ep 1033: game finished, reward: -1.000000
ep 1033: game finished, reward: -1.000000
ep 1033: game finished, reward: -1.000000
ep 1033: game finished, reward: -1.000000
ep 1033: game finished, reward: -1.000000
ep 1033: game finished, reward: -1.000000
ep 1033: game finished, reward: -1.000000
ep 1033: game finished, reward: -1.000000
ep 1033: game finished, reward: -1.000000
ep 1033: game finished, reward: -1.000000
ep 1033: game finished, reward: -1.000000
ep 1033: game f

ep 1040: game finished, reward: -1.000000
ep 1040: game finished, reward: -1.000000
episode over: 1041
1666 1
resetting env. episode reward total was -21.000000. running mean: -20.242083
ep 1041: game finished, reward: -1.000000
ep 1041: game finished, reward: -1.000000
ep 1041: game finished, reward: -1.000000
ep 1041: game finished, reward: -1.000000
ep 1041: game finished, reward: -1.000000
ep 1041: game finished, reward: -1.000000
ep 1041: game finished, reward: -1.000000
ep 1041: game finished, reward: -1.000000
ep 1041: game finished, reward: -1.000000
ep 1041: game finished, reward: -1.000000
ep 1041: game finished, reward: -1.000000
ep 1041: game finished, reward: 1.000000
ep 1041: game finished, reward: -1.000000
ep 1041: game finished, reward: -1.000000
ep 1041: game finished, reward: -1.000000
ep 1041: game finished, reward: -1.000000
ep 1041: game finished, reward: -1.000000
ep 1041: game finished, reward: -1.000000
ep 1041: game finished, reward: -1.000000
ep 1041: game fi

ep 1048: game finished, reward: -1.000000
episode over: 1049
1588 1
resetting env. episode reward total was -21.000000. running mean: -20.233571
ep 1049: game finished, reward: -1.000000
ep 1049: game finished, reward: -1.000000
ep 1049: game finished, reward: -1.000000
ep 1049: game finished, reward: -1.000000
ep 1049: game finished, reward: -1.000000
ep 1049: game finished, reward: -1.000000
ep 1049: game finished, reward: -1.000000
ep 1049: game finished, reward: -1.000000
ep 1049: game finished, reward: -1.000000
ep 1049: game finished, reward: 1.000000
ep 1049: game finished, reward: 1.000000
ep 1049: game finished, reward: -1.000000
ep 1049: game finished, reward: -1.000000
ep 1049: game finished, reward: -1.000000
ep 1049: game finished, reward: -1.000000
ep 1049: game finished, reward: -1.000000
ep 1049: game finished, reward: -1.000000
ep 1049: game finished, reward: -1.000000
ep 1049: game finished, reward: -1.000000
ep 1049: game finished, reward: -1.000000
ep 1049: game fin

ep 1056: game finished, reward: -1.000000
ep 1056: game finished, reward: -1.000000
episode over: 1057
1632 1
resetting env. episode reward total was -20.000000. running mean: -20.206209
ep 1057: game finished, reward: -1.000000
ep 1057: game finished, reward: -1.000000
ep 1057: game finished, reward: -1.000000
ep 1057: game finished, reward: -1.000000
ep 1057: game finished, reward: -1.000000
ep 1057: game finished, reward: -1.000000
ep 1057: game finished, reward: -1.000000
ep 1057: game finished, reward: -1.000000
ep 1057: game finished, reward: -1.000000
ep 1057: game finished, reward: -1.000000
ep 1057: game finished, reward: -1.000000
ep 1057: game finished, reward: -1.000000
ep 1057: game finished, reward: -1.000000
ep 1057: game finished, reward: -1.000000
ep 1057: game finished, reward: -1.000000
ep 1057: game finished, reward: -1.000000
ep 1057: game finished, reward: -1.000000
ep 1057: game finished, reward: -1.000000
ep 1057: game finished, reward: -1.000000
ep 1057: game f

episode over: 1065
1476 1
resetting env. episode reward total was -20.000000. running mean: -20.208821
ep 1065: game finished, reward: -1.000000
ep 1065: game finished, reward: -1.000000
ep 1065: game finished, reward: -1.000000
ep 1065: game finished, reward: -1.000000
ep 1065: game finished, reward: -1.000000
ep 1065: game finished, reward: -1.000000
ep 1065: game finished, reward: -1.000000
ep 1065: game finished, reward: -1.000000
ep 1065: game finished, reward: -1.000000
ep 1065: game finished, reward: -1.000000
ep 1065: game finished, reward: -1.000000
ep 1065: game finished, reward: -1.000000
ep 1065: game finished, reward: -1.000000
ep 1065: game finished, reward: 1.000000
ep 1065: game finished, reward: -1.000000
ep 1065: game finished, reward: -1.000000
ep 1065: game finished, reward: -1.000000
ep 1065: game finished, reward: -1.000000
ep 1065: game finished, reward: -1.000000
ep 1065: game finished, reward: -1.000000
ep 1065: game finished, reward: -1.000000
ep 1065: game fi

ep 1073: game finished, reward: -1.000000
ep 1073: game finished, reward: -1.000000
ep 1073: game finished, reward: -1.000000
ep 1073: game finished, reward: -1.000000
ep 1073: game finished, reward: -1.000000
ep 1073: game finished, reward: 1.000000
ep 1073: game finished, reward: -1.000000
ep 1073: game finished, reward: -1.000000
ep 1073: game finished, reward: -1.000000
ep 1073: game finished, reward: -1.000000
ep 1073: game finished, reward: -1.000000
ep 1073: game finished, reward: -1.000000
ep 1073: game finished, reward: -1.000000
ep 1073: game finished, reward: -1.000000
ep 1073: game finished, reward: -1.000000
ep 1073: game finished, reward: -1.000000
ep 1073: game finished, reward: -1.000000
ep 1073: game finished, reward: -1.000000
ep 1073: game finished, reward: -1.000000
ep 1073: game finished, reward: -1.000000
episode over: 1074
1476 1
resetting env. episode reward total was -20.000000. running mean: -20.229004
ep 1074: game finished, reward: -1.000000
ep 1074: game fi

ep 1081: game finished, reward: -1.000000
ep 1081: game finished, reward: -1.000000
ep 1081: game finished, reward: -1.000000
ep 1081: game finished, reward: -1.000000
ep 1081: game finished, reward: -1.000000
ep 1081: game finished, reward: -1.000000
ep 1081: game finished, reward: -1.000000
ep 1081: game finished, reward: -1.000000
ep 1081: game finished, reward: -1.000000
ep 1081: game finished, reward: -1.000000
ep 1081: game finished, reward: -1.000000
ep 1081: game finished, reward: -1.000000
ep 1081: game finished, reward: -1.000000
ep 1081: game finished, reward: -1.000000
ep 1081: game finished, reward: -1.000000
ep 1081: game finished, reward: -1.000000
ep 1081: game finished, reward: -1.000000
ep 1081: game finished, reward: -1.000000
ep 1081: game finished, reward: -1.000000
episode over: 1082
1420 1
resetting env. episode reward total was -21.000000. running mean: -20.221410
ep 1082: game finished, reward: -1.000000
ep 1082: game finished, reward: -1.000000
ep 1082: game f

ep 1089: game finished, reward: -1.000000
ep 1089: game finished, reward: -1.000000
ep 1089: game finished, reward: -1.000000
ep 1089: game finished, reward: -1.000000
ep 1089: game finished, reward: -1.000000
ep 1089: game finished, reward: -1.000000
ep 1089: game finished, reward: -1.000000
ep 1089: game finished, reward: -1.000000
ep 1089: game finished, reward: -1.000000
ep 1089: game finished, reward: -1.000000
ep 1089: game finished, reward: -1.000000
ep 1089: game finished, reward: -1.000000
ep 1089: game finished, reward: -1.000000
ep 1089: game finished, reward: -1.000000
ep 1089: game finished, reward: -1.000000
ep 1089: game finished, reward: -1.000000
ep 1089: game finished, reward: -1.000000
ep 1089: game finished, reward: -1.000000
ep 1089: game finished, reward: -1.000000
episode over: 1090
1228 1
resetting env. episode reward total was -20.000000. running mean: -20.204401
ep 1090: game finished, reward: -1.000000
ep 1090: game finished, reward: -1.000000
ep 1090: game f

ep 1097: game finished, reward: -1.000000
ep 1097: game finished, reward: -1.000000
ep 1097: game finished, reward: -1.000000
ep 1097: game finished, reward: -1.000000
ep 1097: game finished, reward: -1.000000
ep 1097: game finished, reward: -1.000000
ep 1097: game finished, reward: -1.000000
ep 1097: game finished, reward: -1.000000
ep 1097: game finished, reward: -1.000000
ep 1097: game finished, reward: -1.000000
ep 1097: game finished, reward: -1.000000
ep 1097: game finished, reward: -1.000000
ep 1097: game finished, reward: -1.000000
ep 1097: game finished, reward: -1.000000
ep 1097: game finished, reward: -1.000000
ep 1097: game finished, reward: -1.000000
ep 1097: game finished, reward: -1.000000
ep 1097: game finished, reward: 1.000000
ep 1097: game finished, reward: -1.000000
ep 1097: game finished, reward: -1.000000
ep 1097: game finished, reward: -1.000000
episode over: 1098
1473 1
resetting env. episode reward total was -20.000000. running mean: -20.159002
ep 1098: game fi

ep 1104: game finished, reward: -1.000000
episode over: 1105
2121 1
resetting env. episode reward total was -16.000000. running mean: -20.098495
ep 1105: game finished, reward: -1.000000
ep 1105: game finished, reward: -1.000000
ep 1105: game finished, reward: -1.000000
ep 1105: game finished, reward: -1.000000
ep 1105: game finished, reward: -1.000000
ep 1105: game finished, reward: 1.000000
ep 1105: game finished, reward: -1.000000
ep 1105: game finished, reward: 1.000000
ep 1105: game finished, reward: -1.000000
ep 1105: game finished, reward: -1.000000
ep 1105: game finished, reward: -1.000000
ep 1105: game finished, reward: -1.000000
ep 1105: game finished, reward: -1.000000
ep 1105: game finished, reward: -1.000000
ep 1105: game finished, reward: -1.000000
ep 1105: game finished, reward: -1.000000
ep 1105: game finished, reward: -1.000000
ep 1105: game finished, reward: -1.000000
ep 1105: game finished, reward: -1.000000
ep 1105: game finished, reward: -1.000000
ep 1105: game fin

ep 1112: game finished, reward: -1.000000
ep 1112: game finished, reward: -1.000000
ep 1112: game finished, reward: -1.000000
ep 1112: game finished, reward: -1.000000
episode over: 1113
1469 1
resetting env. episode reward total was -20.000000. running mean: -20.062935
ep 1113: game finished, reward: -1.000000
ep 1113: game finished, reward: -1.000000
ep 1113: game finished, reward: -1.000000
ep 1113: game finished, reward: -1.000000
ep 1113: game finished, reward: -1.000000
ep 1113: game finished, reward: -1.000000
ep 1113: game finished, reward: -1.000000
ep 1113: game finished, reward: -1.000000
ep 1113: game finished, reward: -1.000000
ep 1113: game finished, reward: -1.000000
ep 1113: game finished, reward: -1.000000
ep 1113: game finished, reward: -1.000000
ep 1113: game finished, reward: -1.000000
ep 1113: game finished, reward: -1.000000
ep 1113: game finished, reward: -1.000000
ep 1113: game finished, reward: 1.000000
ep 1113: game finished, reward: -1.000000
ep 1113: game fi

ep 1120: game finished, reward: -1.000000
ep 1120: game finished, reward: -1.000000
ep 1120: game finished, reward: -1.000000
episode over: 1121
1584 1
resetting env. episode reward total was -21.000000. running mean: -20.067971
ep 1121: game finished, reward: -1.000000
ep 1121: game finished, reward: -1.000000
ep 1121: game finished, reward: -1.000000
ep 1121: game finished, reward: -1.000000
ep 1121: game finished, reward: -1.000000
ep 1121: game finished, reward: -1.000000
ep 1121: game finished, reward: -1.000000
ep 1121: game finished, reward: -1.000000
ep 1121: game finished, reward: -1.000000
ep 1121: game finished, reward: -1.000000
ep 1121: game finished, reward: 1.000000
ep 1121: game finished, reward: 1.000000
ep 1121: game finished, reward: 1.000000
ep 1121: game finished, reward: -1.000000
ep 1121: game finished, reward: -1.000000
ep 1121: game finished, reward: -1.000000
ep 1121: game finished, reward: 1.000000
ep 1121: game finished, reward: -1.000000
ep 1121: game finis

ep 1128: game finished, reward: -1.000000
ep 1128: game finished, reward: -1.000000
ep 1128: game finished, reward: -1.000000
ep 1128: game finished, reward: -1.000000
ep 1128: game finished, reward: -1.000000
ep 1128: game finished, reward: -1.000000
ep 1128: game finished, reward: -1.000000
ep 1128: game finished, reward: -1.000000
ep 1128: game finished, reward: 1.000000
ep 1128: game finished, reward: -1.000000
episode over: 1129
2247 1
resetting env. episode reward total was -19.000000. running mean: -19.996124
ep 1129: game finished, reward: -1.000000
ep 1129: game finished, reward: -1.000000
ep 1129: game finished, reward: -1.000000
ep 1129: game finished, reward: -1.000000
ep 1129: game finished, reward: -1.000000
ep 1129: game finished, reward: -1.000000
ep 1129: game finished, reward: -1.000000
ep 1129: game finished, reward: -1.000000
ep 1129: game finished, reward: 1.000000
ep 1129: game finished, reward: -1.000000
ep 1129: game finished, reward: -1.000000
ep 1129: game fin

ep 1136: game finished, reward: 1.000000
ep 1136: game finished, reward: -1.000000
ep 1136: game finished, reward: -1.000000
ep 1136: game finished, reward: -1.000000
ep 1136: game finished, reward: -1.000000
ep 1136: game finished, reward: -1.000000
ep 1136: game finished, reward: -1.000000
ep 1136: game finished, reward: -1.000000
ep 1136: game finished, reward: 1.000000
ep 1136: game finished, reward: -1.000000
ep 1136: game finished, reward: -1.000000
episode over: 1137
1512 1
resetting env. episode reward total was -19.000000. running mean: -19.976233
ep 1137: game finished, reward: -1.000000
ep 1137: game finished, reward: -1.000000
ep 1137: game finished, reward: -1.000000
ep 1137: game finished, reward: -1.000000
ep 1137: game finished, reward: -1.000000
ep 1137: game finished, reward: -1.000000
ep 1137: game finished, reward: -1.000000
ep 1137: game finished, reward: -1.000000
ep 1137: game finished, reward: -1.000000
ep 1137: game finished, reward: -1.000000
ep 1137: game fin

ep 1144: game finished, reward: -1.000000
ep 1144: game finished, reward: -1.000000
ep 1144: game finished, reward: -1.000000
ep 1144: game finished, reward: -1.000000
ep 1144: game finished, reward: -1.000000
ep 1144: game finished, reward: -1.000000
ep 1144: game finished, reward: -1.000000
ep 1144: game finished, reward: -1.000000
ep 1144: game finished, reward: -1.000000
episode over: 1145
1610 1
resetting env. episode reward total was -20.000000. running mean: -19.997669
ep 1145: game finished, reward: -1.000000
ep 1145: game finished, reward: -1.000000
ep 1145: game finished, reward: -1.000000
ep 1145: game finished, reward: -1.000000
ep 1145: game finished, reward: -1.000000
ep 1145: game finished, reward: -1.000000
ep 1145: game finished, reward: -1.000000
ep 1145: game finished, reward: -1.000000
ep 1145: game finished, reward: -1.000000
ep 1145: game finished, reward: -1.000000
ep 1145: game finished, reward: -1.000000
ep 1145: game finished, reward: -1.000000
ep 1145: game f

ep 1152: game finished, reward: -1.000000
ep 1152: game finished, reward: -1.000000
ep 1152: game finished, reward: -1.000000
ep 1152: game finished, reward: -1.000000
ep 1152: game finished, reward: -1.000000
ep 1152: game finished, reward: -1.000000
ep 1152: game finished, reward: -1.000000
episode over: 1153
1506 1
resetting env. episode reward total was -21.000000. running mean: -20.016492
ep 1153: game finished, reward: -1.000000
ep 1153: game finished, reward: -1.000000
ep 1153: game finished, reward: -1.000000
ep 1153: game finished, reward: -1.000000
ep 1153: game finished, reward: -1.000000
ep 1153: game finished, reward: -1.000000
ep 1153: game finished, reward: -1.000000
ep 1153: game finished, reward: -1.000000
ep 1153: game finished, reward: -1.000000
ep 1153: game finished, reward: -1.000000
ep 1153: game finished, reward: -1.000000
ep 1153: game finished, reward: -1.000000
ep 1153: game finished, reward: -1.000000
ep 1153: game finished, reward: -1.000000
ep 1153: game f

ep 1160: game finished, reward: -1.000000
ep 1160: game finished, reward: -1.000000
ep 1160: game finished, reward: -1.000000
ep 1160: game finished, reward: -1.000000
ep 1160: game finished, reward: -1.000000
ep 1160: game finished, reward: -1.000000
ep 1160: game finished, reward: -1.000000
ep 1160: game finished, reward: -1.000000
ep 1160: game finished, reward: -1.000000
ep 1160: game finished, reward: -1.000000
episode over: 1161
1927 1
resetting env. episode reward total was -21.000000. running mean: -19.986406
ep 1161: game finished, reward: -1.000000
ep 1161: game finished, reward: -1.000000
ep 1161: game finished, reward: -1.000000
ep 1161: game finished, reward: -1.000000
ep 1161: game finished, reward: -1.000000
ep 1161: game finished, reward: -1.000000
ep 1161: game finished, reward: -1.000000
ep 1161: game finished, reward: -1.000000
ep 1161: game finished, reward: -1.000000
ep 1161: game finished, reward: -1.000000
ep 1161: game finished, reward: -1.000000
ep 1161: game f

ep 1168: game finished, reward: -1.000000
ep 1168: game finished, reward: -1.000000
ep 1168: game finished, reward: -1.000000
ep 1168: game finished, reward: -1.000000
ep 1168: game finished, reward: -1.000000
ep 1168: game finished, reward: -1.000000
ep 1168: game finished, reward: -1.000000
ep 1168: game finished, reward: -1.000000
ep 1168: game finished, reward: -1.000000
ep 1168: game finished, reward: -1.000000
ep 1168: game finished, reward: -1.000000
ep 1168: game finished, reward: -1.000000
episode over: 1169
1342 1
resetting env. episode reward total was -21.000000. running mean: -19.977572
ep 1169: game finished, reward: -1.000000
ep 1169: game finished, reward: -1.000000
ep 1169: game finished, reward: -1.000000
ep 1169: game finished, reward: 1.000000
ep 1169: game finished, reward: -1.000000
ep 1169: game finished, reward: -1.000000
ep 1169: game finished, reward: -1.000000
ep 1169: game finished, reward: -1.000000
ep 1169: game finished, reward: -1.000000
ep 1169: game fi

ep 1176: game finished, reward: -1.000000
ep 1176: game finished, reward: -1.000000
ep 1176: game finished, reward: -1.000000
ep 1176: game finished, reward: -1.000000
ep 1176: game finished, reward: -1.000000
ep 1176: game finished, reward: -1.000000
ep 1176: game finished, reward: -1.000000
ep 1176: game finished, reward: -1.000000
ep 1176: game finished, reward: -1.000000
ep 1176: game finished, reward: -1.000000
ep 1176: game finished, reward: -1.000000
ep 1176: game finished, reward: 1.000000
ep 1176: game finished, reward: -1.000000
ep 1176: game finished, reward: -1.000000
episode over: 1177
1643 1
resetting env. episode reward total was -20.000000. running mean: -19.960278
ep 1177: game finished, reward: -1.000000
ep 1177: game finished, reward: -1.000000
ep 1177: game finished, reward: -1.000000
ep 1177: game finished, reward: -1.000000
ep 1177: game finished, reward: 1.000000
ep 1177: game finished, reward: -1.000000
ep 1177: game finished, reward: -1.000000
ep 1177: game fin

ep 1184: game finished, reward: -1.000000
ep 1184: game finished, reward: -1.000000
ep 1184: game finished, reward: -1.000000
ep 1184: game finished, reward: -1.000000
ep 1184: game finished, reward: -1.000000
ep 1184: game finished, reward: -1.000000
ep 1184: game finished, reward: -1.000000
ep 1184: game finished, reward: -1.000000
ep 1184: game finished, reward: 1.000000
ep 1184: game finished, reward: -1.000000
ep 1184: game finished, reward: 1.000000
ep 1184: game finished, reward: -1.000000
ep 1184: game finished, reward: -1.000000
ep 1184: game finished, reward: -1.000000
ep 1184: game finished, reward: -1.000000
ep 1184: game finished, reward: -1.000000
ep 1184: game finished, reward: 1.000000
episode over: 1185
1793 1
resetting env. episode reward total was -17.000000. running mean: -19.933444
ep 1185: game finished, reward: -1.000000
ep 1185: game finished, reward: -1.000000
ep 1185: game finished, reward: -1.000000
ep 1185: game finished, reward: -1.000000
ep 1185: game fini

ep 1192: game finished, reward: -1.000000
ep 1192: game finished, reward: -1.000000
ep 1192: game finished, reward: -1.000000
ep 1192: game finished, reward: -1.000000
ep 1192: game finished, reward: -1.000000
ep 1192: game finished, reward: -1.000000
ep 1192: game finished, reward: -1.000000
ep 1192: game finished, reward: -1.000000
ep 1192: game finished, reward: -1.000000
ep 1192: game finished, reward: -1.000000
ep 1192: game finished, reward: -1.000000
ep 1192: game finished, reward: -1.000000
ep 1192: game finished, reward: -1.000000
episode over: 1193
1894 1
resetting env. episode reward total was -21.000000. running mean: -19.987402
ep 1193: game finished, reward: -1.000000
ep 1193: game finished, reward: -1.000000
ep 1193: game finished, reward: -1.000000
ep 1193: game finished, reward: -1.000000
ep 1193: game finished, reward: -1.000000
ep 1193: game finished, reward: -1.000000
ep 1193: game finished, reward: -1.000000
ep 1193: game finished, reward: -1.000000
ep 1193: game f

ep 1200: game finished, reward: -1.000000
ep 1200: game finished, reward: -1.000000
ep 1200: game finished, reward: 1.000000
ep 1200: game finished, reward: -1.000000
ep 1200: game finished, reward: -1.000000
ep 1200: game finished, reward: -1.000000
ep 1200: game finished, reward: -1.000000
ep 1200: game finished, reward: -1.000000
ep 1200: game finished, reward: -1.000000
ep 1200: game finished, reward: -1.000000
episode over: 1201
1381 1
resetting env. episode reward total was -20.000000. running mean: -20.017301
ep 1201: game finished, reward: -1.000000
ep 1201: game finished, reward: -1.000000
ep 1201: game finished, reward: -1.000000
ep 1201: game finished, reward: -1.000000
ep 1201: game finished, reward: -1.000000
ep 1201: game finished, reward: -1.000000
ep 1201: game finished, reward: -1.000000
ep 1201: game finished, reward: -1.000000
ep 1201: game finished, reward: -1.000000
ep 1201: game finished, reward: -1.000000
ep 1201: game finished, reward: -1.000000
ep 1201: game fi

ep 1208: game finished, reward: -1.000000
ep 1208: game finished, reward: -1.000000
ep 1208: game finished, reward: -1.000000
ep 1208: game finished, reward: -1.000000
ep 1208: game finished, reward: -1.000000
ep 1208: game finished, reward: -1.000000
ep 1208: game finished, reward: -1.000000
episode over: 1209
1671 1
resetting env. episode reward total was -21.000000. running mean: -20.034901
ep 1209: game finished, reward: -1.000000
ep 1209: game finished, reward: -1.000000
ep 1209: game finished, reward: -1.000000
ep 1209: game finished, reward: -1.000000
ep 1209: game finished, reward: -1.000000
ep 1209: game finished, reward: -1.000000
ep 1209: game finished, reward: 1.000000
ep 1209: game finished, reward: -1.000000
ep 1209: game finished, reward: -1.000000
ep 1209: game finished, reward: -1.000000
ep 1209: game finished, reward: 1.000000
ep 1209: game finished, reward: -1.000000
ep 1209: game finished, reward: -1.000000
ep 1209: game finished, reward: 1.000000
ep 1209: game fini

ep 1216: game finished, reward: -1.000000
ep 1216: game finished, reward: -1.000000
ep 1216: game finished, reward: 1.000000
ep 1216: game finished, reward: -1.000000
ep 1216: game finished, reward: -1.000000
ep 1216: game finished, reward: -1.000000
ep 1216: game finished, reward: -1.000000
ep 1216: game finished, reward: -1.000000
ep 1216: game finished, reward: -1.000000
ep 1216: game finished, reward: -1.000000
ep 1216: game finished, reward: -1.000000
ep 1216: game finished, reward: 1.000000
ep 1216: game finished, reward: -1.000000
ep 1216: game finished, reward: -1.000000
ep 1216: game finished, reward: -1.000000
episode over: 1217
1428 1
resetting env. episode reward total was -19.000000. running mean: -19.955424
ep 1217: game finished, reward: -1.000000
ep 1217: game finished, reward: -1.000000
ep 1217: game finished, reward: -1.000000
ep 1217: game finished, reward: -1.000000
ep 1217: game finished, reward: -1.000000
ep 1217: game finished, reward: -1.000000
ep 1217: game fin

ep 1224: game finished, reward: -1.000000
ep 1224: game finished, reward: -1.000000
ep 1224: game finished, reward: -1.000000
ep 1224: game finished, reward: -1.000000
ep 1224: game finished, reward: 1.000000
ep 1224: game finished, reward: -1.000000
ep 1224: game finished, reward: 1.000000
ep 1224: game finished, reward: -1.000000
ep 1224: game finished, reward: -1.000000
ep 1224: game finished, reward: 1.000000
ep 1224: game finished, reward: -1.000000
ep 1224: game finished, reward: -1.000000
ep 1224: game finished, reward: -1.000000
ep 1224: game finished, reward: -1.000000
ep 1224: game finished, reward: -1.000000
ep 1224: game finished, reward: -1.000000
ep 1224: game finished, reward: -1.000000
ep 1224: game finished, reward: -1.000000
episode over: 1225
1800 1
resetting env. episode reward total was -18.000000. running mean: -19.929065
ep 1225: game finished, reward: -1.000000
ep 1225: game finished, reward: -1.000000
ep 1225: game finished, reward: -1.000000
ep 1225: game fini

ep 1232: game finished, reward: -1.000000
ep 1232: game finished, reward: -1.000000
ep 1232: game finished, reward: -1.000000
ep 1232: game finished, reward: -1.000000
ep 1232: game finished, reward: -1.000000
ep 1232: game finished, reward: -1.000000
ep 1232: game finished, reward: -1.000000
ep 1232: game finished, reward: 1.000000
ep 1232: game finished, reward: -1.000000
ep 1232: game finished, reward: -1.000000
ep 1232: game finished, reward: -1.000000
ep 1232: game finished, reward: -1.000000
ep 1232: game finished, reward: 1.000000
ep 1232: game finished, reward: -1.000000
ep 1232: game finished, reward: -1.000000
ep 1232: game finished, reward: -1.000000
ep 1232: game finished, reward: -1.000000
ep 1232: game finished, reward: -1.000000
ep 1232: game finished, reward: -1.000000
ep 1232: game finished, reward: -1.000000
ep 1232: game finished, reward: -1.000000
ep 1232: game finished, reward: -1.000000
episode over: 1233
1449 1
resetting env. episode reward total was -19.000000. 

ep 1240: game finished, reward: -1.000000
ep 1240: game finished, reward: -1.000000
ep 1240: game finished, reward: -1.000000
ep 1240: game finished, reward: -1.000000
ep 1240: game finished, reward: -1.000000
ep 1240: game finished, reward: -1.000000
ep 1240: game finished, reward: -1.000000
ep 1240: game finished, reward: -1.000000
ep 1240: game finished, reward: -1.000000
ep 1240: game finished, reward: -1.000000
ep 1240: game finished, reward: -1.000000
ep 1240: game finished, reward: -1.000000
ep 1240: game finished, reward: -1.000000
ep 1240: game finished, reward: -1.000000
ep 1240: game finished, reward: -1.000000
ep 1240: game finished, reward: -1.000000
ep 1240: game finished, reward: -1.000000
ep 1240: game finished, reward: -1.000000
ep 1240: game finished, reward: -1.000000
ep 1240: game finished, reward: -1.000000
episode over: 1241
1267 1
resetting env. episode reward total was -21.000000. running mean: -19.904722
ep 1241: game finished, reward: -1.000000
ep 1241: game f

ep 1247: game finished, reward: -1.000000
episode over: 1248
1304 1
resetting env. episode reward total was -20.000000. running mean: -19.852000
ep 1248: game finished, reward: -1.000000
ep 1248: game finished, reward: -1.000000
ep 1248: game finished, reward: -1.000000
ep 1248: game finished, reward: -1.000000
ep 1248: game finished, reward: -1.000000
ep 1248: game finished, reward: -1.000000
ep 1248: game finished, reward: -1.000000
ep 1248: game finished, reward: -1.000000
ep 1248: game finished, reward: -1.000000
ep 1248: game finished, reward: -1.000000
ep 1248: game finished, reward: -1.000000
ep 1248: game finished, reward: -1.000000
ep 1248: game finished, reward: -1.000000
ep 1248: game finished, reward: -1.000000
ep 1248: game finished, reward: -1.000000
ep 1248: game finished, reward: -1.000000
ep 1248: game finished, reward: -1.000000
ep 1248: game finished, reward: -1.000000
ep 1248: game finished, reward: -1.000000
ep 1248: game finished, reward: -1.000000
ep 1248: game f