In [1]:
import random, os, numpy as np, tensorflow as tf
from tf_graph import FlappyGraph
from config import *

In [2]:
def add_jumps_to_training(training_images, last_jumps):
    print("Parsing data...")
    iter_counter = 0
    X_data = []
    # (game frames, height, width)
    for i, game in enumerate(training_images):
        X_data.append([])
        for j, image in enumerate(game):
            X_data[iter_counter].append(np.append(image.ravel(), last_jumps[i][j]))
        X_data[iter_counter] = np.array(X_data[iter_counter], dtype=np.float32)
        iter_counter += 1

    return np.array(X_data)

In [3]:
flappy_graph = FlappyGraph(int((CANVAS_WIDTH * IMG_SCALE_FACTOR) * round(CANVAS_HEIGHT * IMG_SCALE_FACTOR)) + 1)
init = tf.global_variables_initializer()
global sess
sess = tf.Session()
sess.run(init)

In [4]:
global saver
saver = tf.train.Saver()
def save_model():
    if not os.path.exists(MODEL_DIR):
        os.makedirs(MODEL_DIR)
    saver.save(sess, MODEL_PATH)

In [5]:
def train_iteration():
    print("Loading data...")
    training_images = np.load(os.path.join(DATA_DIR, "images.npy"))
    actions = np.load(os.path.join(DATA_DIR, "actions.npy"))
    rewards = np.load(os.path.join(DATA_DIR, "adjusted_rewards.npy"))
    last_jumps = np.load(os.path.join(DATA_DIR, "last_jumps.npy"))
    X_data = add_jumps_to_training(training_images = training_images, last_jumps = last_jumps)

    for i in range(NUM_GAMES):
        for j in range(5):
            print(sess.run([tf.shape(flappy_graph.y_logits), tf.shape(flappy_graph.sigmoid), tf.shape(flappy_graph.actions)], feed_dict={
                            flappy_graph.inputs: X_data[i], 
                            flappy_graph.actions: actions[i], 
                            flappy_graph.rewards: rewards[i], 
                            flappy_graph.lr: 1e-4}))
            rwds, new_prob, _, train_loss = sess.run([flappy_graph.rewards, flappy_graph.new_prob, flappy_graph.train_step, flappy_graph.loss], 
                        feed_dict={
                            flappy_graph.inputs: X_data[i], 
                            flappy_graph.actions: actions[i], 
                            flappy_graph.rewards: rewards[i], 
                            flappy_graph.lr: 1e-4}
                        )
            print(new_prob.shape, rwds.shape)
            print("loss", train_loss, "new_prob and rewards: ", list(zip(new_prob, rwds)))

In [6]:
import run_agent

In [None]:
save_model()

In [None]:
for i in range(100):
    run_agent.run()
    train_iteration()
    save_model()

INFO:tensorflow:Restoring parameters from ./models/trained_flappy
[[ 0.99013019]] [[ 0.72911358]]
[[ 0.99013019]] [[ 0.72911358]]
[[ 0.99013019]] [[ 0.72911358]]
[[ 0.99015504]] [[ 0.72911853]]
[[ 0.99020153]] [[ 0.72912771]]
[[ 0.99019194]] [[ 0.72912586]]
[[ 0.99013227]] [[ 0.72911406]]
[[ 0.99006379]] [[ 0.72910053]]
[[ 0.990013]] [[ 0.72909045]]
[[ 0.98990262]] [[ 0.7290687]]
[[ 0.98987204]] [[ 0.72906268]]
[[ 0.99014592]] [[ 0.7291168]]
[[ 0.99021709]] [[ 0.72913086]]
[[ 0.99023026]] [[ 0.72913337]]
[[ 0.99041164]] [[ 0.72916925]]
[[ 0.9902643]] [[ 0.7291401]]
[[ 0.99020523]] [[ 0.72912842]]
[[ 0.99016243]] [[ 0.72912002]]
[[ 0.99004352]] [[ 0.72909647]]
[[ 0.98994184]] [[ 0.72907645]]
[[ 0.99009407]] [[ 0.72910649]]
[[ 0.99002409]] [[ 0.72909272]]
[[ 0.99005717]] [[ 0.72909921]]
[[ 0.99004143]] [[ 0.72909611]]
[[ 0.99005854]] [[ 0.72909945]]
[[ 0.99009657]] [[ 0.72910702]]
[[ 0.99030012]] [[ 0.7291472]]
[[ 0.99030739]] [[ 0.72914863]]
[[ 0.99026299]] [[ 0.72913986]]
[[ 0.99021631

[array([39,  1], dtype=int32), array([39,  1], dtype=int32), array([39], dtype=int32)]
(39, 1) (39,)
loss 0.862886 new_prob and rewards:  [(array([ 0.27210909], dtype=float32), -0.36510921), (array([ 0.27211964], dtype=float32), -0.37889817), (array([ 0.27208877], dtype=float32), -0.39282644), (array([ 0.27208906], dtype=float32), -0.4068954), (array([ 0.2721104], dtype=float32), -0.42110646), (array([ 0.2721135], dtype=float32), -0.43546107), (array([ 0.27213186], dtype=float32), -0.44996068), (array([ 0.27211815], dtype=float32), -0.46460673), (array([ 0.27211857], dtype=float32), -0.47940075), (array([ 0.27206498], dtype=float32), -0.4943442), (array([ 0.27205807], dtype=float32), -0.50943857), (array([ 0.27206576], dtype=float32), -0.52468544), (array([ 0.27207702], dtype=float32), -0.54008627), (array([ 0.27208775], dtype=float32), -0.55564272), (array([ 0.27208441], dtype=float32), -0.5713563), (array([ 0.27208728], dtype=float32), -0.5872286), (array([ 0.27210337], dtype=float32

(40, 1) (40,)
loss 0.84381 new_prob and rewards:  [(array([ 0.2758562], dtype=float32), -0.3514581), (array([ 0.27586156], dtype=float32), -0.36510921), (array([ 0.27584553], dtype=float32), -0.37889817), (array([ 0.27583414], dtype=float32), -0.39282644), (array([ 0.27583355], dtype=float32), -0.4068954), (array([ 0.2758373], dtype=float32), -0.42110646), (array([ 0.27586019], dtype=float32), -0.43546107), (array([ 0.27586585], dtype=float32), -0.44996068), (array([ 0.27584594], dtype=float32), -0.46460673), (array([ 0.2758041], dtype=float32), -0.47940075), (array([ 0.27578443], dtype=float32), -0.4943442), (array([ 0.27582055], dtype=float32), -0.50943857), (array([ 0.2758022], dtype=float32), -0.52468544), (array([ 0.27582312], dtype=float32), -0.54008627), (array([ 0.27580494], dtype=float32), -0.55564272), (array([ 0.27583158], dtype=float32), -0.5713563), (array([ 0.27581942], dtype=float32), -0.5872286), (array([ 0.27585453], dtype=float32), -0.60326117), (array([ 0.27582723], 

(39, 1) (39,)
loss 0.842794 new_prob and rewards:  [(array([ 0.28054476], dtype=float32), -0.36510921), (array([ 0.28054756], dtype=float32), -0.37889817), (array([ 0.28052884], dtype=float32), -0.39282644), (array([ 0.28050834], dtype=float32), -0.4068954), (array([ 0.28048688], dtype=float32), -0.42110646), (array([ 0.28049093], dtype=float32), -0.43546107), (array([ 0.2805261], dtype=float32), -0.44996068), (array([ 0.28051609], dtype=float32), -0.46460673), (array([ 0.2804985], dtype=float32), -0.47940075), (array([ 0.28044045], dtype=float32), -0.4943442), (array([ 0.28041935], dtype=float32), -0.50943857), (array([ 0.28046596], dtype=float32), -0.52468544), (array([ 0.28043979], dtype=float32), -0.54008627), (array([ 0.28046036], dtype=float32), -0.55564272), (array([ 0.28041047], dtype=float32), -0.5713563), (array([ 0.28043771], dtype=float32), -0.5872286), (array([ 0.28043044], dtype=float32), -0.60326117), (array([ 0.2804777], dtype=float32), -0.61945575), (array([ 0.28046602

(42, 1) (42,)
loss 0.801364 new_prob and rewards:  [(array([ 0.28586829], dtype=float32), -0.3245641), (array([ 0.28582442], dtype=float32), -0.33794352), (array([ 0.28584057], dtype=float32), -0.3514581), (array([ 0.28581733], dtype=float32), -0.36510921), (array([ 0.28577036], dtype=float32), -0.37889817), (array([ 0.28578544], dtype=float32), -0.39282644), (array([ 0.28579992], dtype=float32), -0.4068954), (array([ 0.28581494], dtype=float32), -0.42110646), (array([ 0.28582579], dtype=float32), -0.43546107), (array([ 0.28574556], dtype=float32), -0.44996068), (array([ 0.28571582], dtype=float32), -0.46460673), (array([ 0.28573221], dtype=float32), -0.47940075), (array([ 0.28567255], dtype=float32), -0.4943442), (array([ 0.28574848], dtype=float32), -0.50943857), (array([ 0.28573555], dtype=float32), -0.52468544), (array([ 0.28573412], dtype=float32), -0.54008627), (array([ 0.28577816], dtype=float32), -0.55564272), (array([ 0.28577369], dtype=float32), -0.5713563), (array([ 0.285749

[[ 0.90355814]] [[ 0.71168011]]
[[ 0.90375364]] [[ 0.71172023]]
[[ 0.90391028]] [[ 0.71175241]]
[[ 0.90410483]] [[ 0.71179235]]
[[ 0.90414459]] [[ 0.71180046]]
[[ 0.90378499]] [[ 0.71172673]]
[[ 0.90379745]] [[ 0.71172923]]
[[ 0.90368491]] [[ 0.71170616]]
[[ 0.90403801]] [[ 0.71177858]]
[[ 0.90382385]] [[ 0.71173465]]
[[ 0.90389848]] [[ 0.71174997]]
[[ 0.90364408]] [[ 0.71169776]]
[[ 0.90374595]] [[ 0.71171868]]
[[ 0.90353483]] [[ 0.71167535]]
[[ 0.90364683]] [[ 0.71169835]]
[[ 0.90363717]] [[ 0.71169639]]
[[ 0.90392882]] [[ 0.71175623]]
[[ 0.90412056]] [[ 0.71179557]]
[[ 0.90432233]] [[ 0.71183693]]
[[ 0.90411538]] [[ 0.7117945]]
[[ 0.90408909]] [[ 0.71178907]]
[[ 0.9040637]] [[ 0.71178389]]
[[ 0.90411478]] [[ 0.71179432]]
[[ 0.90380251]] [[ 0.71173024]]
[[ 0.90389425]] [[ 0.71174908]]
[[ 0.90407681]] [[ 0.71178657]]
[[ 0.90397513]] [[ 0.71176571]]
[[ 0.90425384]] [[ 0.71182293]]
[[ 0.90405101]] [[ 0.71178126]]
[[ 0.90416527]] [[ 0.71180475]]
[[ 0.90418923]] [[ 0.71180964]]
[[ 0.90396

(40, 1) (40,)
loss 0.808811 new_prob and rewards:  [(array([ 0.29117465], dtype=float32), -0.3514581), (array([ 0.29117346], dtype=float32), -0.36510921), (array([ 0.2911371], dtype=float32), -0.37889817), (array([ 0.29109478], dtype=float32), -0.39282644), (array([ 0.2910462], dtype=float32), -0.4068954), (array([ 0.29104042], dtype=float32), -0.42110646), (array([ 0.2911132], dtype=float32), -0.43546107), (array([ 0.29110062], dtype=float32), -0.44996068), (array([ 0.29104382], dtype=float32), -0.46460673), (array([ 0.29095984], dtype=float32), -0.47940075), (array([ 0.2909013], dtype=float32), -0.4943442), (array([ 0.29092926], dtype=float32), -0.50943857), (array([ 0.29094338], dtype=float32), -0.52468544), (array([ 0.29097444], dtype=float32), -0.54008627), (array([ 0.29097623], dtype=float32), -0.55564272), (array([ 0.29095393], dtype=float32), -0.5713563), (array([ 0.29098088], dtype=float32), -0.5872286), (array([ 0.29095811], dtype=float32), -0.60326117), (array([ 0.29098845],

[array([41,  1], dtype=int32), array([41,  1], dtype=int32), array([41], dtype=int32)]
(41, 1) (41,)
loss 0.782236 new_prob and rewards:  [(array([ 0.29903877], dtype=float32), -0.33794352), (array([ 0.29903013], dtype=float32), -0.3514581), (array([ 0.29890651], dtype=float32), -0.36510921), (array([ 0.29889607], dtype=float32), -0.37889817), (array([ 0.29880154], dtype=float32), -0.39282644), (array([ 0.29879212], dtype=float32), -0.4068954), (array([ 0.2989012], dtype=float32), -0.42110646), (array([ 0.29884034], dtype=float32), -0.43546107), (array([ 0.29878074], dtype=float32), -0.44996068), (array([ 0.29868501], dtype=float32), -0.46460673), (array([ 0.29862499], dtype=float32), -0.47940075), (array([ 0.2986626], dtype=float32), -0.4943442), (array([ 0.29866284], dtype=float32), -0.50943857), (array([ 0.29871118], dtype=float32), -0.52468544), (array([ 0.29876608], dtype=float32), -0.54008627), (array([ 0.29873264], dtype=float32), -0.55564272), (array([ 0.29878008], dtype=float3

(39, 1) (39,)
loss 0.78189 new_prob and rewards:  [(array([ 0.30783629], dtype=float32), -0.36510921), (array([ 0.30782926], dtype=float32), -0.37889817), (array([ 0.3077662], dtype=float32), -0.39282644), (array([ 0.30773342], dtype=float32), -0.4068954), (array([ 0.30758828], dtype=float32), -0.42110646), (array([ 0.30760181], dtype=float32), -0.43546107), (array([ 0.30771911], dtype=float32), -0.44996068), (array([ 0.307607], dtype=float32), -0.46460673), (array([ 0.30753177], dtype=float32), -0.47940075), (array([ 0.30738008], dtype=float32), -0.4943442), (array([ 0.30733919], dtype=float32), -0.50943857), (array([ 0.30748713], dtype=float32), -0.52468544), (array([ 0.3074913], dtype=float32), -0.54008627), (array([ 0.30751282], dtype=float32), -0.55564272), (array([ 0.30732423], dtype=float32), -0.5713563), (array([ 0.30740809], dtype=float32), -0.5872286), (array([ 0.30739343], dtype=float32), -0.60326117), (array([ 0.30746293], dtype=float32), -0.61945575), (array([ 0.3074953], 

(41, 1) (41,)
loss 0.733316 new_prob and rewards:  [(array([ 0.32278961], dtype=float32), -0.33794352), (array([ 0.32265455], dtype=float32), -0.3514581), (array([ 0.32266772], dtype=float32), -0.36510921), (array([ 0.32260597], dtype=float32), -0.37889817), (array([ 0.32236791], dtype=float32), -0.39282644), (array([ 0.32240611], dtype=float32), -0.4068954), (array([ 0.32251322], dtype=float32), -0.42110646), (array([ 0.3224839], dtype=float32), -0.43546107), (array([ 0.32258445], dtype=float32), -0.44996068), (array([ 0.32225758], dtype=float32), -0.46460673), (array([ 0.32210618], dtype=float32), -0.47940075), (array([ 0.32219601], dtype=float32), -0.4943442), (array([ 0.32211655], dtype=float32), -0.50943857), (array([ 0.32227975], dtype=float32), -0.52468544), (array([ 0.32216769], dtype=float32), -0.54008627), (array([ 0.32214314], dtype=float32), -0.55564272), (array([ 0.32206953], dtype=float32), -0.5713563), (array([ 0.32212198], dtype=float32), -0.5872286), (array([ 0.3220747

[[ 0.72031122]] [[ 0.67267555]]
[[ 0.72045755]] [[ 0.6727078]]
[[ 0.72020996]] [[ 0.67265326]]
[[ 0.72042286]] [[ 0.67270011]]
[[ 0.72095686]] [[ 0.67281765]]
[[ 0.72089875]] [[ 0.67280489]]
[[ 0.72171688]] [[ 0.67298496]]
[[ 0.72088069]] [[ 0.6728009]]
[[ 0.72072631]] [[ 0.67276692]]
[[ 0.72105443]] [[ 0.67283916]]
[[ 0.72173035]] [[ 0.67298794]]
[[ 0.72193849]] [[ 0.67303377]]
-1
Game 3 over; alive frames: 40
[[ 0.72334063]] [[ 0.67334223]]
[[ 0.71528554]] [[ 0.67156804]]
[[ 0.71528423]] [[ 0.67156774]]
[[ 0.71574664]] [[ 0.67166972]]
[[ 0.71576107]] [[ 0.67167288]]
[[ 0.71702492]] [[ 0.67195153]]
[[ 0.71708953]] [[ 0.67196578]]
[[ 0.71657938]] [[ 0.6718533]]
[[ 0.71740723]] [[ 0.67203581]]
[[ 0.71748173]] [[ 0.67205226]]
[[ 0.71893245]] [[ 0.67237192]]
[[ 0.71878773]] [[ 0.67233998]]
[[ 0.71873772]] [[ 0.67232901]]
[[ 0.71842015]] [[ 0.67225903]]
[[ 0.7178244]] [[ 0.67212772]]
[[ 0.71817076]] [[ 0.67220408]]
[[ 0.71876979]] [[ 0.67233604]]
[[ 0.71822727]] [[ 0.67221653]]
[[ 0.718521

(41, 1) (41,)
loss 0.659871 new_prob and rewards:  [(array([ 0.36248571], dtype=float32), -0.33794352), (array([ 0.36249685], dtype=float32), -0.3514581), (array([ 0.36216444], dtype=float32), -0.36510921), (array([ 0.36207229], dtype=float32), -0.37889817), (array([ 0.36172777], dtype=float32), -0.39282644), (array([ 0.3617596], dtype=float32), -0.4068954), (array([ 0.36187023], dtype=float32), -0.42110646), (array([ 0.36195534], dtype=float32), -0.43546107), (array([ 0.36183399], dtype=float32), -0.44996068), (array([ 0.3609581], dtype=float32), -0.46460673), (array([ 0.36097002], dtype=float32), -0.47940075), (array([ 0.36103952], dtype=float32), -0.4943442), (array([ 0.36109227], dtype=float32), -0.50943857), (array([ 0.36114794], dtype=float32), -0.52468544), (array([ 0.36083359], dtype=float32), -0.54008627), (array([ 0.36075914], dtype=float32), -0.55564272), (array([ 0.36093885], dtype=float32), -0.5713563), (array([ 0.36074007], dtype=float32), -0.5872286), (array([ 0.3608712]

[array([40,  1], dtype=int32), array([40,  1], dtype=int32), array([40], dtype=int32)]
(40, 1) (40,)
loss 0.589447 new_prob and rewards:  [(array([ 0.40928692], dtype=float32), -0.3514581), (array([ 0.40929133], dtype=float32), -0.36510921), (array([ 0.4087348], dtype=float32), -0.37889817), (array([ 0.40857351], dtype=float32), -0.39282644), (array([ 0.40800631], dtype=float32), -0.4068954), (array([ 0.40799868], dtype=float32), -0.42110646), (array([ 0.40853322], dtype=float32), -0.43546107), (array([ 0.40813738], dtype=float32), -0.44996068), (array([ 0.40791541], dtype=float32), -0.46460673), (array([ 0.40698665], dtype=float32), -0.47940075), (array([ 0.40686113], dtype=float32), -0.4943442), (array([ 0.40744656], dtype=float32), -0.50943857), (array([ 0.40718007], dtype=float32), -0.52468544), (array([ 0.40720356], dtype=float32), -0.54008627), (array([ 0.4066959], dtype=float32), -0.55564272), (array([ 0.40675509], dtype=float32), -0.5713563), (array([ 0.40670604], dtype=float32

loss 0.49427 new_prob and rewards:  [(array([ 0.47405839], dtype=float32), -0.3514581), (array([ 0.47406274], dtype=float32), -0.36510921), (array([ 0.47347313], dtype=float32), -0.37889817), (array([ 0.47343993], dtype=float32), -0.39282644), (array([ 0.47243071], dtype=float32), -0.4068954), (array([ 0.47238475], dtype=float32), -0.42110646), (array([ 0.47281587], dtype=float32), -0.43546107), (array([ 0.47206408], dtype=float32), -0.44996068), (array([ 0.47210681], dtype=float32), -0.46460673), (array([ 0.47079533], dtype=float32), -0.47940075), (array([ 0.47101986], dtype=float32), -0.4943442), (array([ 0.47112858], dtype=float32), -0.50943857), (array([ 0.47110802], dtype=float32), -0.52468544), (array([ 0.47153866], dtype=float32), -0.54008627), (array([ 0.47098821], dtype=float32), -0.55564272), (array([ 0.47060895], dtype=float32), -0.5713563), (array([ 0.47110826], dtype=float32), -0.5872286), (array([ 0.47063935], dtype=float32), -0.60326117), (array([ 0.47111881], dtype=floa

INFO:tensorflow:Restoring parameters from ./models/trained_flappy
[[-0.34739161]] [[ 0.41401508]]
[[-0.34739161]] [[ 0.41401508]]
[[-0.34739161]] [[ 0.41401508]]
[[-0.3463608]] [[ 0.41426519]]
[[-0.34218895]] [[ 0.41527787]]
[[-0.34188366]] [[ 0.41535199]]
[[-0.33598006]] [[ 0.41678631]]
[[-0.33679628]] [[ 0.41658795]]
[[-0.33982682]] [[ 0.41585153]]
[[-0.33509469]] [[ 0.41700155]]
[[-0.33481407]] [[ 0.41706973]]
[[-0.32943416]] [[ 0.41837829]]
[[-0.33130598]] [[ 0.41792285]]
[[-0.3326894]] [[ 0.41758642]]
[[-0.33369303]] [[ 0.41734236]]
[[-0.33342969]] [[ 0.41740635]]
[[-0.32807875]] [[ 0.41870818]]
[[-0.32947195]] [[ 0.41836908]]
[[-0.32919693]] [[ 0.41843602]]
[[-0.32723761]] [[ 0.41891292]]
[[-0.32835579]] [[ 0.41864076]]
[[-0.32520521]] [[ 0.41940776]]
[[-0.32542574]] [[ 0.41935399]]
[[-0.32346058]] [[ 0.41983259]]
[[-0.32019639]] [[ 0.42062789]]
[[-0.31809807]] [[ 0.42113933]]
[[-0.31726277]] [[ 0.42134297]]
[[-0.31669557]] [[ 0.42148125]]
[[-0.32008016]] [[ 0.4206562]]
[[-0.3161

(43, 1) (43,)
loss 0.253583 new_prob and rewards:  [(array([ 0.6768378], dtype=float32), -0.31131846), (array([ 0.67671216], dtype=float32), -0.3245641), (array([ 0.67551762], dtype=float32), -0.33794352), (array([ 0.67565119], dtype=float32), -0.3514581), (array([ 0.67380643], dtype=float32), -0.36510921), (array([ 0.67381233], dtype=float32), -0.37889817), (array([ 0.67442048], dtype=float32), -0.39282644), (array([ 0.67351919], dtype=float32), -0.4068954), (array([ 0.67284191], dtype=float32), -0.42110646), (array([ 0.67158544], dtype=float32), -0.43546107), (array([ 0.67190707], dtype=float32), -0.44996068), (array([ 0.67210019], dtype=float32), -0.46460673), (array([ 0.67231905], dtype=float32), -0.47940075), (array([ 0.67234343], dtype=float32), -0.4943442), (array([ 0.67120248], dtype=float32), -0.50943857), (array([ 0.67128253], dtype=float32), -0.52468544), (array([ 0.67124051], dtype=float32), -0.54008627), (array([ 0.67119896], dtype=float32), -0.55564272), (array([ 0.671048

[array([43,  1], dtype=int32), array([43,  1], dtype=int32), array([43], dtype=int32)]
(43, 1) (43,)
loss 0.15225 new_prob and rewards:  [(array([ 0.79300195], dtype=float32), -0.31131846), (array([ 0.7930035], dtype=float32), -0.3245641), (array([ 0.79177368], dtype=float32), -0.33794352), (array([ 0.79136252], dtype=float32), -0.3514581), (array([ 0.79012853], dtype=float32), -0.36510921), (array([ 0.79005098], dtype=float32), -0.37889817), (array([ 0.79096329], dtype=float32), -0.39282644), (array([ 0.79004306], dtype=float32), -0.4068954), (array([ 0.78953362], dtype=float32), -0.42110646), (array([ 0.78794265], dtype=float32), -0.43546107), (array([ 0.78761816], dtype=float32), -0.44996068), (array([ 0.78893518], dtype=float32), -0.46460673), (array([ 0.78773254], dtype=float32), -0.47940075), (array([ 0.78846228], dtype=float32), -0.4943442), (array([ 0.78728139], dtype=float32), -0.50943857), (array([ 0.7873826], dtype=float32), -0.52468544), (array([ 0.78726995], dtype=float32)

(42, 1) (42,)
loss 0.0997513 new_prob and rewards:  [(array([ 0.86225313], dtype=float32), -0.3245641), (array([ 0.86227238], dtype=float32), -0.33794352), (array([ 0.86120069], dtype=float32), -0.3514581), (array([ 0.86118835], dtype=float32), -0.36510921), (array([ 0.85971367], dtype=float32), -0.37889817), (array([ 0.8598237], dtype=float32), -0.39282644), (array([ 0.86005276], dtype=float32), -0.4068954), (array([ 0.85914171], dtype=float32), -0.42110646), (array([ 0.85922343], dtype=float32), -0.43546107), (array([ 0.85756332], dtype=float32), -0.44996068), (array([ 0.85795182], dtype=float32), -0.46460673), (array([ 0.85802543], dtype=float32), -0.47940075), (array([ 0.85812342], dtype=float32), -0.4943442), (array([ 0.8579365], dtype=float32), -0.50943857), (array([ 0.85736191], dtype=float32), -0.52468544), (array([ 0.8573339], dtype=float32), -0.54008627), (array([ 0.85718161], dtype=float32), -0.55564272), (array([ 0.85702401], dtype=float32), -0.5713563), (array([ 0.85723662

(48, 1) (48,)
loss 0.0631449 new_prob and rewards:  [(array([ 0.90556264], dtype=float32), -0.24705079), (array([ 0.90543932], dtype=float32), -0.25964728), (array([ 0.90448511], dtype=float32), -0.27237099), (array([ 0.90441078], dtype=float32), -0.28522322), (array([ 0.90320969], dtype=float32), -0.29820526), (array([ 0.90337908], dtype=float32), -0.31131846), (array([ 0.90396667], dtype=float32), -0.3245641), (array([ 0.90302676], dtype=float32), -0.33794352), (array([ 0.90301466], dtype=float32), -0.3514581), (array([ 0.90190434], dtype=float32), -0.36510921), (array([ 0.90225881], dtype=float32), -0.37889817), (array([ 0.90258038], dtype=float32), -0.39282644), (array([ 0.90253919], dtype=float32), -0.4068954), (array([ 0.90254092], dtype=float32), -0.42110646), (array([ 0.9013927], dtype=float32), -0.43546107), (array([ 0.90165633], dtype=float32), -0.44996068), (array([ 0.90171438], dtype=float32), -0.46460673), (array([ 0.90117413], dtype=float32), -0.47940075), (array([ 0.9013

INFO:tensorflow:Restoring parameters from ./models/trained_flappy
[[-2.59561253]] [[ 0.06942133]]
[[-2.59561276]] [[ 0.06942131]]
[[-2.59561324]] [[ 0.06942128]]
[[-2.59414053]] [[ 0.06951648]]
[[-2.5803206]] [[ 0.07041574]]
[[-2.57474566]] [[ 0.07078154]]
[[-2.5595336]] [[ 0.07178862]]
[[-2.56240082]] [[ 0.07159779]]
[[-2.56830573]] [[ 0.07120628]]
[[-2.56785202]] [[ 0.07123629]]
[[-2.56276536]] [[ 0.07157356]]
[[-2.54417038]] [[ 0.07281911]]
[[-2.54491091]] [[ 0.07276912]]
[[-2.55274272]] [[ 0.07224244]]
[[-2.54753494]] [[ 0.07259227]]
[[-2.5491333]] [[ 0.07248473]]
[[-2.53564644]] [[ 0.07339671]]
[[-2.53695369]] [[ 0.07330785]]
[[-2.54126143]] [[ 0.07301575]]
[[-2.5357399]] [[ 0.07339035]]
[[-2.53666639]] [[ 0.07332737]]
[[-2.52717447]] [[ 0.07397497]]
[[-2.52371359]] [[ 0.0742124]]
[[-2.52889991]] [[ 0.07385686]]
[[-2.52044296]] [[ 0.07443742]]
[[-2.51597667]] [[ 0.07474572]]
[[-2.50003576]] [[ 0.07585567]]
[[-2.50153136]] [[ 0.07575089]]
[[-2.50675917]] [[ 0.07538569]]
[[-2.498937

[[-2.38124347]] [[ 0.0846142]]
[[-2.38660359]] [[ 0.08419996]]
[[-2.38422298]] [[ 0.08438371]]
[[-2.38496757]] [[ 0.0843262]]
[[-2.37298107]] [[ 0.08525637]]
[[-2.368191]] [[ 0.08563067]]
[[-2.37692165]] [[ 0.08494955]]
[[-2.37321997]] [[ 0.08523774]]
[[-2.3845222]] [[ 0.0843606]]
[[-2.39352393]] [[ 0.08366786]]
[[-2.39004207]] [[ 0.0839352]]
[[-2.38880181]] [[ 0.08403061]]
[[-2.3945682]] [[ 0.08358784]]
[[-2.39318061]] [[ 0.08369419]]
[[-2.38821554]] [[ 0.08407575]]
[[-2.39055943]] [[ 0.08389542]]
[[-2.3883059]] [[ 0.08406878]]
[[-2.39178729]] [[ 0.08380111]]
[[-2.38974643]] [[ 0.08395793]]
[[-2.39211464]] [[ 0.08377597]]
[[-2.38782907]] [[ 0.08410551]]
[[-2.39526868]] [[ 0.0835342]]
[[-2.38219261]] [[ 0.08454072]]
[[-2.388901]] [[ 0.08402298]]
[[-2.38680911]] [[ 0.08418411]]
[[-2.38476872]] [[ 0.08434156]]
-1
Game 2 over; alive frames: 142
[[-2.3957715]] [[ 0.08349571]]
[[-2.59594488]] [[ 0.06939986]]
[[-2.59571981]] [[ 0.06941439]]
[[-2.58288884]] [[ 0.07024781]]
[[-2.5796051]] [[ 0

[[-2.39831519]] [[ 0.08330126]]
[[-2.39747286]] [[ 0.0833656]]
[[-2.39643359]] [[ 0.08344506]]
[[-2.39326072]] [[ 0.08368805]]
[[-2.39689112]] [[ 0.08341007]]
[[-2.39666939]] [[ 0.08342703]]
[[-2.39014578]] [[ 0.08392722]]
[[-2.38993239]] [[ 0.08394364]]
[[-2.39059329]] [[ 0.08389282]]
[[-2.38138175]] [[ 0.08460349]]
[[-2.37736154]] [[ 0.08491536]]
[[-2.38335586]] [[ 0.08445073]]
[[-2.38934851]] [[ 0.08398854]]
[[-2.39333057]] [[ 0.08368269]]
[[-2.37925982]] [[ 0.08476797]]
[[-2.38042474]] [[ 0.08467764]]
[[-2.38653469]] [[ 0.08420527]]
[[-2.38665867]] [[ 0.08419571]]
[[-2.38569188]] [[ 0.08427029]]
[[-2.39749908]] [[ 0.0833636]]
[[-2.3963654]] [[ 0.08345027]]
[[-2.38426709]] [[ 0.08438031]]
[[-2.38137507]] [[ 0.08460401]]
[[-2.38121343]] [[ 0.08461653]]
[[-2.38639116]] [[ 0.08421634]]
[[-2.38786817]] [[ 0.0841025]]
[[-2.37730622]] [[ 0.08491966]]
[[-2.36846089]] [[ 0.08560954]]
[[-2.3697021]] [[ 0.08551243]]
[[-2.37848949]] [[ 0.08482776]]
[[-2.3868854]] [[ 0.08417823]]
[[-2.39628744]

(148, 1) (148,)
loss 0.00385461 new_prob and rewards:  [(array([ 0.93091702], dtype=float32), 0.54353905), (array([ 0.93071747], dtype=float32), 0.53892833), (array([ 0.92996186], dtype=float32), 0.53427106), (array([ 0.92952752], dtype=float32), 0.52956671), (array([ 0.92818975], dtype=float32), 0.52481484), (array([ 0.92852783], dtype=float32), 0.520015), (array([ 0.92870146], dtype=float32), 0.5151667), (array([ 0.92832822], dtype=float32), 0.5102694), (array([ 0.92855901], dtype=float32), 0.50532264), (array([ 0.92734122], dtype=float32), 0.50032586), (array([ 0.92749852], dtype=float32), 0.49527866), (array([ 0.92824781], dtype=float32), 0.49018046), (array([ 0.92742729], dtype=float32), 0.48503077), (array([ 0.92742443], dtype=float32), 0.47982907), (array([ 0.92687535], dtype=float32), 0.4745748), (array([ 0.92666006], dtype=float32), 0.46926749), (array([ 0.92707384], dtype=float32), 0.46390656), (array([ 0.92686671], dtype=float32), 0.45849147), (array([ 0.92654294], dtype=flo

loss 0.00380323 new_prob and rewards:  [(array([ 0.93185502], dtype=float32), 0.54353905), (array([ 0.93165749], dtype=float32), 0.53892833), (array([ 0.93090802], dtype=float32), 0.53427106), (array([ 0.93047756], dtype=float32), 0.52956671), (array([ 0.9291513], dtype=float32), 0.52481484), (array([ 0.92948627], dtype=float32), 0.520015), (array([ 0.92965877], dtype=float32), 0.5151667), (array([ 0.92928845], dtype=float32), 0.5102694), (array([ 0.92951709], dtype=float32), 0.50532264), (array([ 0.9283098], dtype=float32), 0.50032586), (array([ 0.92846584], dtype=float32), 0.49527866), (array([ 0.9292087], dtype=float32), 0.49018046), (array([ 0.92839509], dtype=float32), 0.48503077), (array([ 0.92839229], dtype=float32), 0.47982907), (array([ 0.92784721], dtype=float32), 0.4745748), (array([ 0.92763394], dtype=float32), 0.46926749), (array([ 0.92804432], dtype=float32), 0.46390656), (array([ 0.92783856], dtype=float32), 0.45849147), (array([ 0.92751753], dtype=float32), 0.45302168),

loss 0.00569155 new_prob and rewards:  [(array([ 0.93309295], dtype=float32), 0.5151667), (array([ 0.93309313], dtype=float32), 0.5102694), (array([ 0.93228728], dtype=float32), 0.50532264), (array([ 0.93198526], dtype=float32), 0.50032586), (array([ 0.93113309], dtype=float32), 0.49527866), (array([ 0.9313435], dtype=float32), 0.49018046), (array([ 0.93159837], dtype=float32), 0.48503077), (array([ 0.93143094], dtype=float32), 0.47982907), (array([ 0.93145144], dtype=float32), 0.4745748), (array([ 0.92995566], dtype=float32), 0.46926749), (array([ 0.93012667], dtype=float32), 0.46390656), (array([ 0.93054527], dtype=float32), 0.45849147), (array([ 0.92997485], dtype=float32), 0.45302168), (array([ 0.93011254], dtype=float32), 0.44749665), (array([ 0.92950451], dtype=float32), 0.44191581), (array([ 0.92965209], dtype=float32), 0.43627858), (array([ 0.92993045], dtype=float32), 0.43058443), (array([ 0.92933184], dtype=float32), 0.42483276), (array([ 0.92947233], dtype=float32), 0.419023

(142, 1) (142,)
loss 0.00555847 new_prob and rewards:  [(array([ 0.93489802], dtype=float32), 0.5151667), (array([ 0.93488407], dtype=float32), 0.5102694), (array([ 0.93408179], dtype=float32), 0.50532264), (array([ 0.93387586], dtype=float32), 0.50032586), (array([ 0.93278003], dtype=float32), 0.49527866), (array([ 0.93302721], dtype=float32), 0.49018046), (array([ 0.93314534], dtype=float32), 0.48503077), (array([ 0.93277538], dtype=float32), 0.47982907), (array([ 0.93301487], dtype=float32), 0.4745748), (array([ 0.93220657], dtype=float32), 0.46926749), (array([ 0.93237162], dtype=float32), 0.46390656), (array([ 0.93250757], dtype=float32), 0.45849147), (array([ 0.932271], dtype=float32), 0.45302168), (array([ 0.93247682], dtype=float32), 0.44749665), (array([ 0.931651], dtype=float32), 0.44191581), (array([ 0.93156689], dtype=float32), 0.43627858), (array([ 0.93204361], dtype=float32), 0.43058443), (array([ 0.9317084], dtype=float32), 0.42483276), (array([ 0.93165106], dtype=float3

loss 0.00544889 new_prob and rewards:  [(array([ 0.93620753], dtype=float32), 0.5151667), (array([ 0.9361937], dtype=float32), 0.5102694), (array([ 0.93540132], dtype=float32), 0.50532264), (array([ 0.93519819], dtype=float32), 0.50032586), (array([ 0.93411636], dtype=float32), 0.49527866), (array([ 0.93436021], dtype=float32), 0.49018046), (array([ 0.93447739], dtype=float32), 0.48503077), (array([ 0.93411165], dtype=float32), 0.47982907), (array([ 0.93434793], dtype=float32), 0.4745748), (array([ 0.93354976], dtype=float32), 0.46926749), (array([ 0.93371272], dtype=float32), 0.46390656), (array([ 0.93384743], dtype=float32), 0.45849147), (array([ 0.93361282], dtype=float32), 0.45302168), (array([ 0.93381602], dtype=float32), 0.44749665), (array([ 0.93300045], dtype=float32), 0.44191581), (array([ 0.93291759], dtype=float32), 0.43627858), (array([ 0.93338823], dtype=float32), 0.43058443), (array([ 0.93305665], dtype=float32), 0.42483276), (array([ 0.93300009], dtype=float32), 0.419023

loss 0.00533247 new_prob and rewards:  [(array([ 0.93747759], dtype=float32), 0.5151667), (array([ 0.93746793], dtype=float32), 0.5102694), (array([ 0.93668777], dtype=float32), 0.50532264), (array([ 0.93642312], dtype=float32), 0.50032586), (array([ 0.93557084], dtype=float32), 0.49527866), (array([ 0.93567652], dtype=float32), 0.49018046), (array([ 0.93588656], dtype=float32), 0.48503077), (array([ 0.93537807], dtype=float32), 0.47982907), (array([ 0.93529058], dtype=float32), 0.4745748), (array([ 0.93392497], dtype=float32), 0.46926749), (array([ 0.93427926], dtype=float32), 0.46390656), (array([ 0.93504614], dtype=float32), 0.45849147), (array([ 0.93465728], dtype=float32), 0.45302168), (array([ 0.93475026], dtype=float32), 0.44749665), (array([ 0.93388408], dtype=float32), 0.44191581), (array([ 0.93395418], dtype=float32), 0.43627858), (array([ 0.93419844], dtype=float32), 0.43058443), (array([ 0.93380916], dtype=float32), 0.42483276), (array([ 0.9338693], dtype=float32), 0.419023

loss 0.00524165 new_prob and rewards:  [(array([ 0.93869042], dtype=float32), 0.5151667), (array([ 0.93860507], dtype=float32), 0.5102694), (array([ 0.93777108], dtype=float32), 0.50532264), (array([ 0.93743384], dtype=float32), 0.50032586), (array([ 0.93650246], dtype=float32), 0.49527866), (array([ 0.93667787], dtype=float32), 0.49018046), (array([ 0.93704027], dtype=float32), 0.48503077), (array([ 0.93700588], dtype=float32), 0.47982907), (array([ 0.93669808], dtype=float32), 0.4745748), (array([ 0.9355486], dtype=float32), 0.46926749), (array([ 0.93559837], dtype=float32), 0.46390656), (array([ 0.93608445], dtype=float32), 0.45849147), (array([ 0.93575472], dtype=float32), 0.45302168), (array([ 0.93585348], dtype=float32), 0.44749665), (array([ 0.93500811], dtype=float32), 0.44191581), (array([ 0.93509054], dtype=float32), 0.43627858), (array([ 0.93536198], dtype=float32), 0.43058443), (array([ 0.93501151], dtype=float32), 0.42483276), (array([ 0.93506861], dtype=float32), 0.419023

[array([142,   1], dtype=int32), array([142,   1], dtype=int32), array([142], dtype=int32)]
(142, 1) (142,)
loss 0.0051086 new_prob and rewards:  [(array([ 0.94028115], dtype=float32), 0.5151667), (array([ 0.94019777], dtype=float32), 0.5102694), (array([ 0.93937737], dtype=float32), 0.50532264), (array([ 0.93904603], dtype=float32), 0.50032586), (array([ 0.93813038], dtype=float32), 0.49527866), (array([ 0.93830264], dtype=float32), 0.49018046), (array([ 0.93865907], dtype=float32), 0.48503077), (array([ 0.93862402], dtype=float32), 0.47982907), (array([ 0.93832219], dtype=float32), 0.4745748), (array([ 0.93719244], dtype=float32), 0.46926749), (array([ 0.93724215], dtype=float32), 0.46390656), (array([ 0.93771988], dtype=float32), 0.45849147), (array([ 0.9373945], dtype=float32), 0.45302168), (array([ 0.93749148], dtype=float32), 0.44749665), (array([ 0.93665993], dtype=float32), 0.44191581), (array([ 0.93674099], dtype=float32), 0.43627858), (array([ 0.93700814], dtype=float32), 0.4

[[-2.67170954]] [[ 0.06466349]]
[[-2.67240477]] [[ 0.06462146]]
[[-2.67597151]] [[ 0.0644062]]
[[-2.66833782]] [[ 0.06486773]]
[[-2.67048383]] [[ 0.06473767]]
[[-2.66622019]] [[ 0.0649963]]
[[-2.66639161]] [[ 0.06498588]]
[[-2.67195415]] [[ 0.0646487]]
[[-2.65623283]] [[ 0.06560589]]
[[-2.65371466]] [[ 0.06576043]]
[[-2.65452099]] [[ 0.06571091]]
[[-2.65673304]] [[ 0.06557523]]
[[-2.65545988]] [[ 0.06565329]]
[[-2.64032173]] [[ 0.06658804]]
[[-2.63714623]] [[ 0.06678568]]
[[-2.61842132]] [[ 0.06796223]]
[[-2.62061906]] [[ 0.06782314]]
[[-2.62475777]] [[ 0.06756195]]
[[-2.61414838]] [[ 0.06823339]]
[[-2.61302996]] [[ 0.06830453]]
[[-2.60061455]] [[ 0.06909888]]
[[-2.60034156]] [[ 0.06911644]]
[[-2.60143447]] [[ 0.06904615]]
[[-2.59467697]] [[ 0.06948179]]
[[-2.59319568]] [[ 0.06957762]]
[[-2.57817459]] [[ 0.07055634]]
[[-2.5757184]] [[ 0.07071759]]
[[-2.57000852]] [[ 0.07109374]]
[[-2.57025886]] [[ 0.07107721]]
[[-2.57310033]] [[ 0.07088983]]
[[-2.57520413]] [[ 0.07075139]]
[[-2.5771691

[[-2.56529951]] [[ 0.07140535]]
[[-2.57152605]] [[ 0.07099359]]
[[-2.57149553]] [[ 0.0709956]]
[[-2.57508516]] [[ 0.07075921]]
[[-2.57226682]] [[ 0.07094475]]
[[-2.5678792]] [[ 0.07123449]]
[[-2.57301617]] [[ 0.07089537]]
[[-2.5696156]] [[ 0.07111969]]
[[-2.5638833]] [[ 0.07149931]]
[[-2.5590229]] [[ 0.07182265]]
[[-2.55917549]] [[ 0.07181248]]
[[-2.5713582]] [[ 0.07100466]]
[[-2.57141185]] [[ 0.07100112]]
[[-2.56659746]] [[ 0.07131933]]
[[-2.56637359]] [[ 0.07133417]]
[[-2.56160903]] [[ 0.07165045]]
[[-2.5672183]] [[ 0.07127823]]
[[-2.5604465]] [[ 0.07172781]]
[[-2.56901145]] [[ 0.07115962]]
[[-2.56797552]] [[ 0.07122812]]
[[-2.56819129]] [[ 0.07121384]]
[[-2.56934214]] [[ 0.07113776]]
[[-2.56667471]] [[ 0.07131422]]
[[-2.57854271]] [[ 0.0705322]]
[[-2.57835579]] [[ 0.07054447]]
[[-2.58126402]] [[ 0.07035401]]
[[-2.58305168]] [[ 0.07023718]]
[[-2.58102083]] [[ 0.07036992]]
[[-2.57351446]] [[ 0.07086255]]
[[-2.56510019]] [[ 0.07141857]]
[[-2.56558943]] [[ 0.07138613]]
[[-2.57567215]] [

[[-2.52554083]] [[ 0.07408696]]
[[-2.53578925]] [[ 0.073387]]
[[-2.54042768]] [[ 0.0730722]]
[[-2.53853965]] [[ 0.07320018]]
[[-2.54865599]] [[ 0.07251683]]
[[-2.54115701]] [[ 0.07302281]]
[[-2.54671907]] [[ 0.07264721]]
[[-2.54040313]] [[ 0.07307386]]
[[-2.53756809]] [[ 0.07326613]]
[[-2.54908586]] [[ 0.07248792]]
[[-2.53923011]] [[ 0.07315336]]
[[-2.5469749]] [[ 0.07262998]]
[[-2.53634]] [[ 0.07334955]]
[[-2.53271627]] [[ 0.07359624]]
[[-2.52798867]] [[ 0.07391921]]
[[-2.53324962]] [[ 0.07355988]]
[[-2.53854704]] [[ 0.07319968]]
[[-2.52966285]] [[ 0.07380469]]
[[-2.52136922]] [[ 0.07437363]]
[[-2.53138041]] [[ 0.07368737]]
[[-2.52826428]] [[ 0.07390035]]
[[-2.53905725]] [[ 0.07316508]]
[[-2.51837111]] [[ 0.07458029]]
[[-2.51546979]] [[ 0.07478078]]
[[-2.51978254]] [[ 0.07448293]]
[[-2.52071404]] [[ 0.07441875]]
[[-2.53218961]] [[ 0.07363215]]
[[-2.52811337]] [[ 0.07391068]]
[[-2.52408814]] [[ 0.07418667]]
[[-2.52825379]] [[ 0.07390107]]
[[-2.52766013]] [[ 0.07394171]]
[[-2.53758121]]

[[-2.60325623]] [[ 0.06892915]]
[[-2.60284042]] [[ 0.06895584]]
[[-2.58212233]] [[ 0.0702979]]
[[-2.5806756]] [[ 0.0703925]]
[[-2.578264]] [[ 0.07055048]]
[[-2.57878995]] [[ 0.070516]]
[[-2.5814805]] [[ 0.07033986]]
[[-2.57784104]] [[ 0.07057822]]
[[-2.58026886]] [[ 0.07041913]]
[[-2.57609749]] [[ 0.07069268]]
[[-2.57495236]] [[ 0.07076795]]
[[-2.58175302]] [[ 0.07032204]]
[[-2.57665062]] [[ 0.07065635]]
[[-2.57480741]] [[ 0.07077748]]
[[-2.57684183]] [[ 0.07064379]]
[[-2.57110333]] [[ 0.07102147]]
[[-2.5762012]] [[ 0.07068586]]
[[-2.57756567]] [[ 0.07059629]]
[[-2.57813048]] [[ 0.07055923]]
[[-2.58342671]] [[ 0.0702127]]
[[-2.57820225]] [[ 0.07055453]]
[[-2.57826996]] [[ 0.07055009]]
[[-2.57642555]] [[ 0.07067113]]
[[-2.57678223]] [[ 0.07064771]]
[[-2.58075714]] [[ 0.07038717]]
[[-2.57635474]] [[ 0.07067578]]
[[-2.57347178]] [[ 0.07086537]]
[[-2.56191516]] [[ 0.07163008]]
[[-2.55692101]] [[ 0.0719629]]
[[-2.56024694]] [[ 0.0717411]]
[[-2.56050277]] [[ 0.07172406]]
[[-2.56015682]] [[ 0

[array([142,   1], dtype=int32), array([142,   1], dtype=int32), array([142], dtype=int32)]
(142, 1) (142,)
loss 0.00503085 new_prob and rewards:  [(array([ 0.94107199], dtype=float32), 0.5151667), (array([ 0.94098932], dtype=float32), 0.5102694), (array([ 0.94040304], dtype=float32), 0.50532264), (array([ 0.94012439], dtype=float32), 0.50032586), (array([ 0.93928301], dtype=float32), 0.49527866), (array([ 0.9394294], dtype=float32), 0.49018046), (array([ 0.93964368], dtype=float32), 0.48503077), (array([ 0.93931729], dtype=float32), 0.47982907), (array([ 0.93952185], dtype=float32), 0.4745748), (array([ 0.93843603], dtype=float32), 0.46926749), (array([ 0.93818814], dtype=float32), 0.46390656), (array([ 0.93855727], dtype=float32), 0.45849147), (array([ 0.93829882], dtype=float32), 0.45302168), (array([ 0.93840653], dtype=float32), 0.44749665), (array([ 0.93760455], dtype=float32), 0.44191581), (array([ 0.93784529], dtype=float32), 0.43627858), (array([ 0.93806666], dtype=float32), 0.

[array([142,   1], dtype=int32), array([142,   1], dtype=int32), array([142], dtype=int32)]
(142, 1) (142,)
loss 0.00493666 new_prob and rewards:  [(array([ 0.94220048], dtype=float32), 0.5151667), (array([ 0.94211888], dtype=float32), 0.5102694), (array([ 0.94153953], dtype=float32), 0.50532264), (array([ 0.94126451], dtype=float32), 0.50032586), (array([ 0.94043332], dtype=float32), 0.49527866), (array([ 0.94057786], dtype=float32), 0.49018046), (array([ 0.94078982], dtype=float32), 0.48503077), (array([ 0.94046706], dtype=float32), 0.47982907), (array([ 0.94066894], dtype=float32), 0.4745748), (array([ 0.93959701], dtype=float32), 0.46926749), (array([ 0.93935281], dtype=float32), 0.46390656), (array([ 0.93971735], dtype=float32), 0.45849147), (array([ 0.93946153], dtype=float32), 0.45302168), (array([ 0.9395678], dtype=float32), 0.44749665), (array([ 0.9387756], dtype=float32), 0.44191581), (array([ 0.93901306], dtype=float32), 0.43627858), (array([ 0.93923199], dtype=float32), 0.4

[array([536,   1], dtype=int32), array([536,   1], dtype=int32), array([536], dtype=int32)]
(536, 1) (536,)
loss -0.0816322 new_prob and rewards:  [(array([ 0.93126023], dtype=float32), 1.0883375), (array([ 0.93103915], dtype=float32), 1.0892298), (array([ 0.93009192], dtype=float32), 1.0901312), (array([ 0.92989409], dtype=float32), 1.0910416), (array([ 0.92849702], dtype=float32), 1.0919613), (array([ 0.92870283], dtype=float32), 1.0928901), (array([ 0.9289313], dtype=float32), 1.0938284), (array([ 0.92860514], dtype=float32), 1.0947762), (array([ 0.92893273], dtype=float32), 1.0957335), (array([ 0.92778152], dtype=float32), 1.0967005), (array([ 0.92801464], dtype=float32), 1.0976772), (array([ 0.92864585], dtype=float32), 1.0986639), (array([ 0.92780197], dtype=float32), 1.0996605), (array([ 0.92789459], dtype=float32), 1.1006672), (array([ 0.92709172], dtype=float32), 1.101684), (array([ 0.92709184], dtype=float32), 1.1027111), (array([ 0.92738974], dtype=float32), 1.1037487), (arr

(536, 1) (536,)
loss -0.0896716 new_prob and rewards:  [(array([ 0.92436165], dtype=float32), 1.0883375), (array([ 0.9241249], dtype=float32), 1.0892298), (array([ 0.92312086], dtype=float32), 1.0901312), (array([ 0.9229123], dtype=float32), 1.0910416), (array([ 0.92143041], dtype=float32), 1.0919613), (array([ 0.92164898], dtype=float32), 1.0928901), (array([ 0.92188841], dtype=float32), 1.0938284), (array([ 0.92154574], dtype=float32), 1.0947762), (array([ 0.92189407], dtype=float32), 1.0957335), (array([ 0.92067444], dtype=float32), 1.0967005), (array([ 0.92092174], dtype=float32), 1.0976772), (array([ 0.92159092], dtype=float32), 1.0986639), (array([ 0.92069584], dtype=float32), 1.0996605), (array([ 0.92079574], dtype=float32), 1.1006672), (array([ 0.91994774], dtype=float32), 1.101684), (array([ 0.91994584], dtype=float32), 1.1027111), (array([ 0.92026132], dtype=float32), 1.1037487), (array([ 0.92008859], dtype=float32), 1.1047966), (array([ 0.9202708], dtype=float32), 1.1058551)

loss -0.0989265 new_prob and rewards:  [(array([ 0.91643757], dtype=float32), 1.0883375), (array([ 0.91618377], dtype=float32), 1.0892298), (array([ 0.91511905], dtype=float32), 1.0901312), (array([ 0.91489917], dtype=float32), 1.0910416), (array([ 0.91332686], dtype=float32), 1.0919613), (array([ 0.91355914], dtype=float32), 1.0928901), (array([ 0.91380978], dtype=float32), 1.0938284), (array([ 0.91344988], dtype=float32), 1.0947762), (array([ 0.91382051], dtype=float32), 1.0957335), (array([ 0.91252792], dtype=float32), 1.0967005), (array([ 0.91279042], dtype=float32), 1.0976772), (array([ 0.91350019], dtype=float32), 1.0986639), (array([ 0.91255021], dtype=float32), 1.0996605), (array([ 0.9126581], dtype=float32), 1.1006672), (array([ 0.91176254], dtype=float32), 1.101684), (array([ 0.91175818], dtype=float32), 1.1027111), (array([ 0.91209239], dtype=float32), 1.1037487), (array([ 0.91191471], dtype=float32), 1.1047966), (array([ 0.91211104], dtype=float32), 1.1058551), (array([ 0.9

(88, 1) (88,)
loss 0.0325306 new_prob and rewards:  [(array([ 0.91687703], dtype=float32), 0.16575822), (array([ 0.91664958], dtype=float32), 0.15733154), (array([ 0.91581929], dtype=float32), 0.14881974), (array([ 0.91578329], dtype=float32), 0.14022195), (array([ 0.91481149], dtype=float32), 0.13153733), (array([ 0.91522378], dtype=float32), 0.12276498), (array([ 0.91560757], dtype=float32), 0.11390402), (array([ 0.91473085], dtype=float32), 0.10495356), (array([ 0.91486895], dtype=float32), 0.095912687), (array([ 0.91370398], dtype=float32), 0.086780496), (array([ 0.91387063], dtype=float32), 0.077556051), (array([ 0.91418636], dtype=float32), 0.068238437), (array([ 0.91366947], dtype=float32), 0.058826704), (array([ 0.91386002], dtype=float32), 0.049319904), (array([ 0.91296065], dtype=float32), 0.039717074), (array([ 0.91319597], dtype=float32), 0.030017247), (array([ 0.91347289], dtype=float32), 0.020219443), (array([ 0.91310894], dtype=float32), 0.01032267), (array([ 0.91315258]

[array([142,   1], dtype=int32), array([142,   1], dtype=int32), array([142], dtype=int32)]
(142, 1) (142,)
loss 0.00648374 new_prob and rewards:  [(array([ 0.92363441], dtype=float32), 0.5151667), (array([ 0.92342037], dtype=float32), 0.5102694), (array([ 0.92246532], dtype=float32), 0.50532264), (array([ 0.92229044], dtype=float32), 0.50032586), (array([ 0.92112207], dtype=float32), 0.49527866), (array([ 0.9214083], dtype=float32), 0.49018046), (array([ 0.92179233], dtype=float32), 0.48503077), (array([ 0.92150545], dtype=float32), 0.47982907), (array([ 0.92115259], dtype=float32), 0.4745748), (array([ 0.92002225], dtype=float32), 0.46926749), (array([ 0.9204095], dtype=float32), 0.46390656), (array([ 0.92066652], dtype=float32), 0.45849147), (array([ 0.92038745], dtype=float32), 0.45302168), (array([ 0.92077518], dtype=float32), 0.44749665), (array([ 0.91988522], dtype=float32), 0.44191581), (array([ 0.91998613], dtype=float32), 0.43627858), (array([ 0.92031199], dtype=float32), 0.4

[array([142,   1], dtype=int32), array([142,   1], dtype=int32), array([142], dtype=int32)]
(142, 1) (142,)
loss 0.00635614 new_prob and rewards:  [(array([ 0.92528021], dtype=float32), 0.5151667), (array([ 0.92489499], dtype=float32), 0.5102694), (array([ 0.92456782], dtype=float32), 0.50532264), (array([ 0.92433959], dtype=float32), 0.50032586), (array([ 0.92325693], dtype=float32), 0.49527866), (array([ 0.92333633], dtype=float32), 0.49018046), (array([ 0.9237076], dtype=float32), 0.48503077), (array([ 0.92328602], dtype=float32), 0.47982907), (array([ 0.92319566], dtype=float32), 0.4745748), (array([ 0.92213649], dtype=float32), 0.46926749), (array([ 0.92225355], dtype=float32), 0.46390656), (array([ 0.92270207], dtype=float32), 0.45849147), (array([ 0.9218064], dtype=float32), 0.45302168), (array([ 0.92203242], dtype=float32), 0.44749665), (array([ 0.92120808], dtype=float32), 0.44191581), (array([ 0.92116398], dtype=float32), 0.43627858), (array([ 0.92159438], dtype=float32), 0.4

[array([142,   1], dtype=int32), array([142,   1], dtype=int32), array([142], dtype=int32)]
(142, 1) (142,)
loss 0.00622231 new_prob and rewards:  [(array([ 0.92687732], dtype=float32), 0.5151667), (array([ 0.926498], dtype=float32), 0.5102694), (array([ 0.92617363], dtype=float32), 0.50532264), (array([ 0.92594844], dtype=float32), 0.50032586), (array([ 0.92487967], dtype=float32), 0.49527866), (array([ 0.92495817], dtype=float32), 0.49018046), (array([ 0.92532486], dtype=float32), 0.48503077), (array([ 0.92490828], dtype=float32), 0.47982907), (array([ 0.92481941], dtype=float32), 0.4745748), (array([ 0.92377424], dtype=float32), 0.46926749), (array([ 0.92388994], dtype=float32), 0.46390656), (array([ 0.92433268], dtype=float32), 0.45849147), (array([ 0.92344874], dtype=float32), 0.45302168), (array([ 0.92367154], dtype=float32), 0.44749665), (array([ 0.92285752], dtype=float32), 0.44191581), (array([ 0.92281419], dtype=float32), 0.43627858), (array([ 0.92323911], dtype=float32), 0.4

INFO:tensorflow:Restoring parameters from ./models/trained_flappy
[[-2.55509257]] [[ 0.0720851]]
[[-2.55509257]] [[ 0.0720851]]
[[-2.55509329]] [[ 0.07208506]]
[[-2.55360961]] [[ 0.07218436]]
[[-2.542032]] [[ 0.07296361]]
[[-2.53966928]] [[ 0.07312359]]
[[-2.52347565]] [[ 0.07422875]]
[[-2.52632642]] [[ 0.07403309]]
[[-2.52943373]] [[ 0.07382035]]
[[-2.5235014]] [[ 0.07422698]]
[[-2.51878023]] [[ 0.07455206]]
[[-2.5057857]] [[ 0.07545357]]
[[-2.5073576]] [[ 0.075344]]
[[-2.51464224]] [[ 0.07483806]]
[[-2.51150966]] [[ 0.07505523]]
[[-2.51300812]] [[ 0.07495128]]
[[-2.50097895]] [[ 0.07578959]]
[[-2.50305915]] [[ 0.075644]]
[[-2.5065639]] [[ 0.0753993]]
[[-2.5027988]] [[ 0.07566221]]
[[-2.50485969]] [[ 0.0755182]]
[[-2.49170041]] [[ 0.07644206]]
[[-2.48745537]] [[ 0.0767423]]
[[-2.49298429]] [[ 0.07635147]]
[[-2.48265624]] [[ 0.07708302]]
[[-2.47986484]] [[ 0.07728184]]
[[-2.46747303]] [[ 0.07817014]]
[[-2.47261333]] [[ 0.07780053]]
[[-2.47402]] [[ 0.07769966]]
[[-2.4649806]] [[ 0.07834

[[-2.364012]] [[ 0.08595845]]
[[-2.35759568]] [[ 0.08646391]]
[[-2.35570192]] [[ 0.08661362]]
[[-2.35496306]] [[ 0.08667209]]
[[-2.35738587]] [[ 0.0864805]]
[[-2.35743213]] [[ 0.08647683]]
[[-2.35429668]] [[ 0.08672486]]
[[-2.35351968]] [[ 0.08678641]]
[[-2.34844422]] [[ 0.08718952]]
[[-2.36039305]] [[ 0.08624322]]
[[-2.3610723]] [[ 0.0861897]]
[[-2.35680676]] [[ 0.08652625]]
[[-2.35377192]] [[ 0.08676643]]
[[-2.35305285]] [[ 0.08682342]]
[[-2.35774016]] [[ 0.08645251]]
[[-2.35624027]] [[ 0.08657104]]
[[-2.35802293]] [[ 0.08643018]]
[[-2.35349011]] [[ 0.08678876]]
[[-2.35147166]] [[ 0.08694887]]
[[-2.35205579]] [[ 0.08690251]]
[[-2.34016585]] [[ 0.08785062]]
[[-2.35259604]] [[ 0.08685964]]
[[-2.35419679]] [[ 0.08673277]]
[[-2.3560605]] [[ 0.08658525]]
[[-2.34638238]] [[ 0.08735375]]
[[-2.34103227]] [[ 0.08778122]]
[[-2.35192299]] [[ 0.08691305]]
[[-2.34715915]] [[ 0.08729184]]
[[-2.34774876]] [[ 0.08724488]]
[[-2.35779881]] [[ 0.08644788]]
[[-2.35040283]] [[ 0.08703376]]
[[-2.34072161]

[[-2.32515383]] [[ 0.08906104]]
[[-2.3433671]] [[ 0.08759443]]
[[-2.34952044]] [[ 0.0871039]]
[[-2.36198187]] [[ 0.08611809]]
[[-2.35824251]] [[ 0.08641284]]
[[-2.3586731]] [[ 0.08637886]]
[[-2.36347008]] [[ 0.08600104]]
[[-2.36439085]] [[ 0.08592869]]
[[-2.36548662]] [[ 0.08584266]]
[[-2.35600781]] [[ 0.08658943]]
[[-2.36237383]] [[ 0.08608725]]
[[-2.3733685]] [[ 0.08522616]]
[[-2.37893128]] [[ 0.08479346]]
[[-2.38657141]] [[ 0.08420245]]
[[-2.38624454]] [[ 0.08422765]]
[[-2.38539481]] [[ 0.08429322]]
[[-2.40412927]] [[ 0.08285836]]
[[-2.40874386]] [[ 0.08250836]]
[[-2.41341019]] [[ 0.0821558]]
[[-2.4069283]] [[ 0.0826459]]
[[-2.40516615]] [[ 0.0827796]]
[[-2.42211676]] [[ 0.08150166]]
[[-2.42735386]] [[ 0.08111047]]
[[-2.43292189]] [[ 0.08069644]]
[[-2.42652416]] [[ 0.08117232]]
[[-2.43851733]] [[ 0.08028232]]
[[-2.44366574]] [[ 0.079903]]
[[-2.44742203]] [[ 0.07962728]]
[[-2.45530391]] [[ 0.07905155]]
[[-2.45102715]] [[ 0.07936347]]
[[-2.45239902]] [[ 0.07926329]]
[[-2.45903039]] [[

[[-2.3688066]] [[ 0.08558249]]
[[-2.36998034]] [[ 0.08549068]]
[[-2.36687469]] [[ 0.08573379]]
[[-2.35955882]] [[ 0.08630898]]
[[-2.35507464]] [[ 0.08666325]]
[[-2.35969543]] [[ 0.0862982]]
[[-2.35659122]] [[ 0.08654329]]
[[-2.35207796]] [[ 0.08690074]]
[[-2.34761763]] [[ 0.08725532]]
[[-2.35191607]] [[ 0.08691359]]
[[-2.34870934]] [[ 0.08716842]]
[[-2.35441184]] [[ 0.08671574]]
[[-2.35949683]] [[ 0.08631387]]
[[-2.35401368]] [[ 0.08674727]]
[[-2.35395336]] [[ 0.08675205]]
[[-2.35413313]] [[ 0.08673781]]
[[-2.35596347]] [[ 0.08659294]]
[[-2.35849524]] [[ 0.08639289]]
[[-2.35236812]] [[ 0.08687773]]
[[-2.34887934]] [[ 0.08715489]]
[[-2.3518672]] [[ 0.08691747]]
[[-2.3444171]] [[ 0.08751056]]
[[-2.34373426]] [[ 0.08756509]]
[[-2.34700918]] [[ 0.08730379]]
[[-2.34808922]] [[ 0.08721777]]
[[-2.35076022]] [[ 0.08700536]]
[[-2.34074664]] [[ 0.08780409]]
[[-2.34263706]] [[ 0.0876528]]
[[-2.34716892]] [[ 0.08729107]]
[[-2.34694266]] [[ 0.08730909]]
[[-2.34743118]] [[ 0.08727017]]
[[-2.35645008

[[-2.33833575]] [[ 0.08799739]]
[[-2.34625125]] [[ 0.0873642]]
[[-2.35304546]] [[ 0.08682401]]
[[-2.34738278]] [[ 0.08727403]]
[[-2.35370016]] [[ 0.08677211]]
[[-2.35707307]] [[ 0.08650521]]
[[-2.3598609]] [[ 0.08628516]]
[[-2.36528563]] [[ 0.08585843]]
[[-2.3564291]] [[ 0.08655611]]
[[-2.36476254]] [[ 0.08589949]]
[[-2.37616634]] [[ 0.08500828]]
[[-2.38395452]] [[ 0.08440446]]
[[-2.38593173]] [[ 0.08425178]]
[[-2.38311982]] [[ 0.08446898]]
[[-2.38577557]] [[ 0.08426383]]
[[-2.40313411]] [[ 0.08293401]]
[[-2.40676689]] [[ 0.08265814]]
[[-2.40635061]] [[ 0.08268971]]
[[-2.40276694]] [[ 0.08296195]]
[[-2.40309381]] [[ 0.08293708]]
[[-2.41822934]] [[ 0.08179314]]
[[-2.42271757]] [[ 0.08145669]]
[[-2.43192983]] [[ 0.08077007]]
[[-2.42949843]] [[ 0.08095077]]
[[-2.43464589]] [[ 0.08056864]]
[[-2.44043756]] [[ 0.08014065]]
[[-2.44491148]] [[ 0.07981146]]
[[-2.45106125]] [[ 0.07936098]]
[[-2.44621563]] [[ 0.07971574]]
[[-2.44602132]] [[ 0.07972999]]
[[-2.45233631]] [[ 0.07926787]]
[[-2.455408

[array([272,   1], dtype=int32), array([272,   1], dtype=int32), array([272], dtype=int32)]
(272, 1) (272,)
loss -0.0680248 new_prob and rewards:  [(array([ 0.91213846], dtype=float32), 0.95835793), (array([ 0.91211814], dtype=float32), 0.9579373), (array([ 0.91146922], dtype=float32), 0.95751244), (array([ 0.91131765], dtype=float32), 0.95708323), (array([ 0.91010755], dtype=float32), 0.95664972), (array([ 0.91014743], dtype=float32), 0.95621186), (array([ 0.91047561], dtype=float32), 0.95576954), (array([ 0.9097591], dtype=float32), 0.9553228), (array([ 0.90980059], dtype=float32), 0.95487148), (array([ 0.90880257], dtype=float32), 0.95441568), (array([ 0.90924412], dtype=float32), 0.95395523), (array([ 0.90954238], dtype=float32), 0.95349014), (array([ 0.90887982], dtype=float32), 0.95302033), (array([ 0.90916306], dtype=float32), 0.95254576), (array([ 0.90818477], dtype=float32), 0.95206642), (array([ 0.90839809], dtype=float32), 0.95158225), (array([ 0.90876591], dtype=float32), 0

(142, 1) (142,)
loss 0.00858708 new_prob and rewards:  [(array([ 0.89881617], dtype=float32), 0.5151667), (array([ 0.89873737], dtype=float32), 0.5102694), (array([ 0.89737701], dtype=float32), 0.50532264), (array([ 0.89712375], dtype=float32), 0.50032586), (array([ 0.89579642], dtype=float32), 0.49527866), (array([ 0.89601779], dtype=float32), 0.49018046), (array([ 0.89657676], dtype=float32), 0.48503077), (array([ 0.89580679], dtype=float32), 0.47982907), (array([ 0.89577031], dtype=float32), 0.4745748), (array([ 0.89467669], dtype=float32), 0.46926749), (array([ 0.89487082], dtype=float32), 0.46390656), (array([ 0.89539158], dtype=float32), 0.45849147), (array([ 0.89461088], dtype=float32), 0.45302168), (array([ 0.89463246], dtype=float32), 0.44749665), (array([ 0.89416057], dtype=float32), 0.44191581), (array([ 0.89426231], dtype=float32), 0.43627858), (array([ 0.8944062], dtype=float32), 0.43058443), (array([ 0.89377242], dtype=float32), 0.42483276), (array([ 0.89386576], dtype=fl

(142, 1) (142,)
loss 0.008381 new_prob and rewards:  [(array([ 0.90124989], dtype=float32), 0.5151667), (array([ 0.90117222], dtype=float32), 0.5102694), (array([ 0.89982986], dtype=float32), 0.50532264), (array([ 0.89957941], dtype=float32), 0.50032586), (array([ 0.89826888], dtype=float32), 0.49527866), (array([ 0.89848733], dtype=float32), 0.49018046), (array([ 0.89903939], dtype=float32), 0.48503077), (array([ 0.89827931], dtype=float32), 0.47982907), (array([ 0.89824343], dtype=float32), 0.4745748), (array([ 0.89716375], dtype=float32), 0.46926749), (array([ 0.8973555], dtype=float32), 0.46390656), (array([ 0.89786971], dtype=float32), 0.45849147), (array([ 0.89709896], dtype=float32), 0.45302168), (array([ 0.89712018], dtype=float32), 0.44749665), (array([ 0.89665228], dtype=float32), 0.44191581), (array([ 0.89675266], dtype=float32), 0.43627858), (array([ 0.89689559], dtype=float32), 0.43058443), (array([ 0.8962695], dtype=float32), 0.42483276), (array([ 0.89636105], dtype=float

loss -0.0853499 new_prob and rewards:  [(array([ 0.90282869], dtype=float32), 1.0466082), (array([ 0.90280753], dtype=float32), 1.047079), (array([ 0.90177506], dtype=float32), 1.0475545), (array([ 0.90143174], dtype=float32), 1.0480349), (array([ 0.90053922], dtype=float32), 1.0485201), (array([ 0.90039158], dtype=float32), 1.0490102), (array([ 0.90096146], dtype=float32), 1.0495052), (array([ 0.89987731], dtype=float32), 1.0500053), (array([ 0.90013468], dtype=float32), 1.0505104), (array([ 0.89902037], dtype=float32), 1.0510206), (array([ 0.89935964], dtype=float32), 1.051536), (array([ 0.89973092], dtype=float32), 1.0520566), (array([ 0.89896947], dtype=float32), 1.0525824), (array([ 0.89916766], dtype=float32), 1.0531135), (array([ 0.89790988], dtype=float32), 1.05365), (array([ 0.89841568], dtype=float32), 1.0541919), (array([ 0.89855951], dtype=float32), 1.0547394), (array([ 0.8985526], dtype=float32), 1.0552922), (array([ 0.89854401], dtype=float32), 1.0558507), (array([ 0.8978

(383, 1) (383,)
loss -0.100655 new_prob and rewards:  [(array([ 0.88535845], dtype=float32), 1.0466082), (array([ 0.88533568], dtype=float32), 1.047079), (array([ 0.88421845], dtype=float32), 1.0475545), (array([ 0.88384044], dtype=float32), 1.0480349), (array([ 0.88288081], dtype=float32), 1.0485201), (array([ 0.88271379], dtype=float32), 1.0490102), (array([ 0.88333398], dtype=float32), 1.0495052), (array([ 0.8821497], dtype=float32), 1.0500053), (array([ 0.88243186], dtype=float32), 1.0505104), (array([ 0.88122207], dtype=float32), 1.0510206), (array([ 0.88158935], dtype=float32), 1.051536), (array([ 0.88199329], dtype=float32), 1.0520566), (array([ 0.88116968], dtype=float32), 1.0525824), (array([ 0.88138783], dtype=float32), 1.0531135), (array([ 0.88002038], dtype=float32), 1.05365), (array([ 0.88057762], dtype=float32), 1.0541919), (array([ 0.88072592], dtype=float32), 1.0547394), (array([ 0.88073778], dtype=float32), 1.0552922), (array([ 0.88072872], dtype=float32), 1.0558507), 

[array([383,   1], dtype=int32), array([383,   1], dtype=int32), array([383], dtype=int32)]
(383, 1) (383,)
loss -0.119896 new_prob and rewards:  [(array([ 0.86363912], dtype=float32), 1.0466082), (array([ 0.86361468], dtype=float32), 1.047079), (array([ 0.86241442], dtype=float32), 1.0475545), (array([ 0.86199975], dtype=float32), 1.0480349), (array([ 0.8609764], dtype=float32), 1.0485201), (array([ 0.86078674], dtype=float32), 1.0490102), (array([ 0.86145878], dtype=float32), 1.0495052), (array([ 0.86017168], dtype=float32), 1.0500053), (array([ 0.86047816], dtype=float32), 1.0505104), (array([ 0.85917449], dtype=float32), 1.0510206), (array([ 0.85956949], dtype=float32), 1.051536), (array([ 0.86000633], dtype=float32), 1.0520566), (array([ 0.85912144], dtype=float32), 1.0525824), (array([ 0.85936075], dtype=float32), 1.0531135), (array([ 0.85788274], dtype=float32), 1.05365), (array([ 0.85849434], dtype=float32), 1.0541919), (array([ 0.85864437], dtype=float32), 1.0547394), (array([

loss 0.0125804 new_prob and rewards:  [(array([ 0.8522346], dtype=float32), 0.5151667), (array([ 0.85218757], dtype=float32), 0.5102694), (array([ 0.85108519], dtype=float32), 0.50532264), (array([ 0.85079253], dtype=float32), 0.50032586), (array([ 0.84941244], dtype=float32), 0.49527866), (array([ 0.84936053], dtype=float32), 0.49018046), (array([ 0.84986806], dtype=float32), 0.48503077), (array([ 0.84933472], dtype=float32), 0.47982907), (array([ 0.84920746], dtype=float32), 0.4745748), (array([ 0.84765965], dtype=float32), 0.46926749), (array([ 0.84803104], dtype=float32), 0.46390656), (array([ 0.84852791], dtype=float32), 0.45849147), (array([ 0.84784079], dtype=float32), 0.45302168), (array([ 0.84743762], dtype=float32), 0.44749665), (array([ 0.84631312], dtype=float32), 0.44191581), (array([ 0.84642094], dtype=float32), 0.43627858), (array([ 0.84685045], dtype=float32), 0.43058443), (array([ 0.84687048], dtype=float32), 0.42483276), (array([ 0.84689975], dtype=float32), 0.4190230

[array([173,   1], dtype=int32), array([173,   1], dtype=int32), array([173], dtype=int32)]
(173, 1) (173,)
loss -0.00816742 new_prob and rewards:  [(array([ 0.85740668], dtype=float32), 0.64495492), (array([ 0.8572346], dtype=float32), 0.64136863), (array([ 0.85618502], dtype=float32), 0.6377461), (array([ 0.85595691], dtype=float32), 0.63408697), (array([ 0.85438085], dtype=float32), 0.63039088), (array([ 0.85467058], dtype=float32), 0.62665743), (array([ 0.85494953], dtype=float32), 0.6228863), (array([ 0.8544063], dtype=float32), 0.61907709), (array([ 0.85389626], dtype=float32), 0.61522937), (array([ 0.85263598], dtype=float32), 0.61134279), (array([ 0.85276049], dtype=float32), 0.60741693), (array([ 0.85348523], dtype=float32), 0.60345149), (array([ 0.85326999], dtype=float32), 0.59944594), (array([ 0.85341287], dtype=float32), 0.59539992), (array([ 0.8522507], dtype=float32), 0.59131306), (array([ 0.85245728], dtype=float32), 0.58718491), (array([ 0.85278696], dtype=float32), 0.

[array([173,   1], dtype=int32), array([173,   1], dtype=int32), array([173], dtype=int32)]
(173, 1) (173,)
loss -0.00831804 new_prob and rewards:  [(array([ 0.85482597], dtype=float32), 0.64495492), (array([ 0.85465163], dtype=float32), 0.64136863), (array([ 0.85359639], dtype=float32), 0.6377461), (array([ 0.85336667], dtype=float32), 0.63408697), (array([ 0.85177898], dtype=float32), 0.63039088), (array([ 0.8520714], dtype=float32), 0.62665743), (array([ 0.85235143], dtype=float32), 0.6228863), (array([ 0.85180533], dtype=float32), 0.61907709), (array([ 0.85129017], dtype=float32), 0.61522937), (array([ 0.85002148], dtype=float32), 0.61134279), (array([ 0.85014594], dtype=float32), 0.60741693), (array([ 0.85087603], dtype=float32), 0.60345149), (array([ 0.85066223], dtype=float32), 0.59944594), (array([ 0.850806], dtype=float32), 0.59539992), (array([ 0.84963644], dtype=float32), 0.59131306), (array([ 0.8498444], dtype=float32), 0.58718491), (array([ 0.85017598], dtype=float32), 0.5

INFO:tensorflow:Restoring parameters from ./models/trained_flappy
[[-1.7589252]] [[ 0.146925]]
[[-1.75892472]] [[ 0.14692506]]
[[-1.75892496]] [[ 0.14692503]]
[[-1.75752234]] [[ 0.14710091]]
[[-1.74747849]] [[ 0.14836551]]
[[-1.7432363]] [[ 0.14890233]]
[[-1.72998881]] [[ 0.15058902]]
[[-1.73212814]] [[ 0.15031557]]
[[-1.73799443]] [[ 0.14956786]]
[[-1.73146152]] [[ 0.15040073]]
[[-1.73010421]] [[ 0.15057425]]
[[-1.72133017]] [[ 0.1516999]]
[[-1.72271705]] [[ 0.15152152]]
[[-1.72782969]] [[ 0.15086541]]
[[-1.72510982]] [[ 0.15121417]]
[[-1.72591877]] [[ 0.15111035]]
[[-1.71576118]] [[ 0.15241796]]
[[-1.71735501]] [[ 0.15221217]]
[[-1.72178817]] [[ 0.15164098]]
[[-1.7162497]] [[ 0.15235487]]
[[-1.71626115]] [[ 0.15235338]]
[[-1.71360564]] [[ 0.15269662]]
[[-1.71264267]] [[ 0.15282126]]
[[-1.71060777]] [[ 0.1530849]]
[[-1.70592737]] [[ 0.15369271]]
[[-1.70451355]] [[ 0.15387669]]
[[-1.69065261]] [[ 0.15569004]]
[[-1.69135094]] [[ 0.15559827]]
[[-1.6942606]] [[ 0.15521635]]
[[-1.68836594]

[[-1.62577415]] [[ 0.16441008]]
[[-1.62960815]] [[ 0.16388404]]
[[-1.6277225]] [[ 0.16414259]]
[[-1.6251564]] [[ 0.16449498]]
[[-1.62998652]] [[ 0.1638322]]
[[-1.62280083]] [[ 0.16481896]]
[[-1.62802243]] [[ 0.16410145]]
[[-1.62894869]] [[ 0.16397443]]
[[-1.61949992]] [[ 0.16527386]]
[[-1.62133408]] [[ 0.16502097]]
[[-1.62908077]] [[ 0.16395631]]
[[-1.62789083]] [[ 0.1641195]]
-1
Game 4 over; alive frames: 67
[[-1.63007784]] [[ 0.1638197]]
[[-1.75918937]] [[ 0.14689191]]
[[-1.75937772]] [[ 0.14686829]]
[[-1.75004101]] [[ 0.14804202]]
[[-1.74927258]] [[ 0.14813897]]
[[-1.73826385]] [[ 0.1495336]]
[[-1.7382431]] [[ 0.14953622]]
[[-1.74342632]] [[ 0.14887825]]
[[-1.73740244]] [[ 0.14964318]]
[[-1.73890448]] [[ 0.14945213]]
[[-1.72786403]] [[ 0.150861]]
[[-1.72877026]] [[ 0.15074494]]
[[-1.7314415]] [[ 0.15040329]]
[[-1.73096275]] [[ 0.15046448]]
[[-1.73298883]] [[ 0.15020567]]
[[-1.71845508]] [[ 0.15207027]]
[[-1.71987724]] [[ 0.15188698]]
[[-1.72350979]] [[ 0.15141962]]
[[-1.71907949]] [

[array([94,  1], dtype=int32), array([94,  1], dtype=int32), array([94], dtype=int32)]
(94, 1) (94,)
loss 0.0503168 new_prob and rewards:  [(array([ 0.85860991], dtype=float32), 0.21457793), (array([ 0.8586328], dtype=float32), 0.20664437), (array([ 0.85747576], dtype=float32), 0.19863068), (array([ 0.85738009], dtype=float32), 0.19053604), (array([ 0.85600632], dtype=float32), 0.18235964), (array([ 0.8560046], dtype=float32), 0.17410064), (array([ 0.85665262], dtype=float32), 0.16575822), (array([ 0.85589898], dtype=float32), 0.15733154), (array([ 0.85608727], dtype=float32), 0.14881974), (array([ 0.85470003], dtype=float32), 0.14022195), (array([ 0.85481477], dtype=float32), 0.13153733), (array([ 0.85515243], dtype=float32), 0.12276498), (array([ 0.85508746], dtype=float32), 0.11390402), (array([ 0.85534191], dtype=float32), 0.10495356), (array([ 0.85350788], dtype=float32), 0.095912687), (array([ 0.85368794], dtype=float32), 0.086780496), (array([ 0.85414875], dtype=float32), 0.0775

[array([62,  1], dtype=int32), array([62,  1], dtype=int32), array([62], dtype=int32)]
(62, 1) (62,)
loss 0.0696194 new_prob and rewards:  [(array([ 0.87824839], dtype=float32), -0.083370164), (array([ 0.87826878], dtype=float32), -0.094313294), (array([ 0.87708718], dtype=float32), -0.10536697), (array([ 0.8767668], dtype=float32), -0.11653229), (array([ 0.87528801], dtype=float32), -0.12781039), (array([ 0.87528569], dtype=float32), -0.13920242), (array([ 0.87607569], dtype=float32), -0.15070951), (array([ 0.87542903], dtype=float32), -0.16233283), (array([ 0.87534642], dtype=float32), -0.17407358), (array([ 0.87412298], dtype=float32), -0.1859329), (array([ 0.87433225], dtype=float32), -0.19791202), (array([ 0.87442088], dtype=float32), -0.21001214), (array([ 0.87402838], dtype=float32), -0.22223449), (array([ 0.87371457], dtype=float32), -0.23458029), (array([ 0.87280667], dtype=float32), -0.24705079), (array([ 0.87336349], dtype=float32), -0.25964728), (array([ 0.87379879], dtype=

[array([59,  1], dtype=int32), array([59,  1], dtype=int32), array([59], dtype=int32)]
(59, 1) (59,)
loss 0.0538807 new_prob and rewards:  [(array([ 0.90867311], dtype=float32), -0.11653229), (array([ 0.90868896], dtype=float32), -0.12781039), (array([ 0.90771627], dtype=float32), -0.13920242), (array([ 0.90744901], dtype=float32), -0.15070951), (array([ 0.90621263], dtype=float32), -0.16233283), (array([ 0.90618926], dtype=float32), -0.17407358), (array([ 0.90684938], dtype=float32), -0.1859329), (array([ 0.9063164], dtype=float32), -0.19791202), (array([ 0.90636635], dtype=float32), -0.21001214), (array([ 0.90509218], dtype=float32), -0.22223449), (array([ 0.90552193], dtype=float32), -0.23458029), (array([ 0.90592778], dtype=float32), -0.24705079), (array([ 0.90494698], dtype=float32), -0.25964728), (array([ 0.90506959], dtype=float32), -0.27237099), (array([ 0.90402931], dtype=float32), -0.28522322), (array([ 0.90415204], dtype=float32), -0.29820526), (array([ 0.90452337], dtype=fl

(67, 1) (67,)
loss 0.0369793 new_prob and rewards:  [(array([ 0.93012571], dtype=float32), -0.030274246), (array([ 0.9300648], dtype=float32), -0.040681057), (array([ 0.92922676], dtype=float32), -0.051192984), (array([ 0.92906356], dtype=float32), -0.061811097), (array([ 0.9283796], dtype=float32), -0.072536461), (array([ 0.92853409], dtype=float32), -0.083370164), (array([ 0.9288581], dtype=float32), -0.094313294), (array([ 0.92841381], dtype=float32), -0.10536697), (array([ 0.92830825], dtype=float32), -0.11653229), (array([ 0.92740095], dtype=float32), -0.12781039), (array([ 0.92748368], dtype=float32), -0.13920242), (array([ 0.92788029], dtype=float32), -0.15070951), (array([ 0.92700136], dtype=float32), -0.16233283), (array([ 0.9272911], dtype=float32), -0.17407358), (array([ 0.92654485], dtype=float32), -0.1859329), (array([ 0.92653358], dtype=float32), -0.19791202), (array([ 0.92680478], dtype=float32), -0.21001214), (array([ 0.92646021], dtype=float32), -0.22223449), (array([ 

loss 0.0311083 new_prob and rewards:  [(array([ 0.94122118], dtype=float32), -0.030274246), (array([ 0.94116747], dtype=float32), -0.040681057), (array([ 0.94041753], dtype=float32), -0.051192984), (array([ 0.94027174], dtype=float32), -0.061811097), (array([ 0.93965697], dtype=float32), -0.072536461), (array([ 0.9397952], dtype=float32), -0.083370164), (array([ 0.94008583], dtype=float32), -0.094313294), (array([ 0.93968755], dtype=float32), -0.10536697), (array([ 0.9395951], dtype=float32), -0.11653229), (array([ 0.93878525], dtype=float32), -0.12781039), (array([ 0.93885994], dtype=float32), -0.13920242), (array([ 0.93921459], dtype=float32), -0.15070951), (array([ 0.93843108], dtype=float32), -0.16233283), (array([ 0.93868762], dtype=float32), -0.17407358), (array([ 0.93801963], dtype=float32), -0.1859329), (array([ 0.93801033], dtype=float32), -0.19791202), (array([ 0.93825388], dtype=float32), -0.21001214), (array([ 0.93794268], dtype=float32), -0.22223449), (array([ 0.9380157], 

(80, 1) (80,)
loss 0.0227417 new_prob and rewards:  [(array([ 0.9488579], dtype=float32), 0.095912687), (array([ 0.94878417], dtype=float32), 0.086780496), (array([ 0.94808018], dtype=float32), 0.077556051), (array([ 0.94779056], dtype=float32), 0.068238437), (array([ 0.94688529], dtype=float32), 0.058826704), (array([ 0.94701803], dtype=float32), 0.049319904), (array([ 0.94742453], dtype=float32), 0.039717074), (array([ 0.94696766], dtype=float32), 0.030017247), (array([ 0.94688296], dtype=float32), 0.020219443), (array([ 0.94625998], dtype=float32), 0.01032267), (array([ 0.94636405], dtype=float32), 0.00032592902), (array([ 0.94671899], dtype=float32), -0.0097717885), (array([ 0.94650698], dtype=float32), -0.019971503), (array([ 0.94655424], dtype=float32), -0.030274246), (array([ 0.94582731], dtype=float32), -0.040681057), (array([ 0.94593364], dtype=float32), -0.051192984), (array([ 0.9462536], dtype=float32), -0.061811097), (array([ 0.94584543], dtype=float32), -0.072536461), (arr

INFO:tensorflow:Restoring parameters from ./models/trained_flappy
[[-2.9868598]] [[ 0.04802305]]
[[-2.98685932]] [[ 0.04802307]]
[[-2.9868598]] [[ 0.04802305]]
[[-2.98533726]] [[ 0.0480927]]
[[-2.96967649]] [[ 0.04881474]]
[[-2.96393585]] [[ 0.04908198]]
[[-2.94844055]] [[ 0.04981027]]
[[-2.95173931]] [[ 0.04965437]]
[[-2.95820713]] [[ 0.04935005]]
[[-2.9569416]] [[ 0.04940946]]
[[-2.95168686]] [[ 0.04965685]]
[[-2.93033361]] [[ 0.05067427]]
[[-2.93221235]] [[ 0.05058397]]
[[-2.94132185]] [[ 0.05014827]]
[[-2.93477058]] [[ 0.05046125]]
[[-2.93679857]] [[ 0.05036417]]
[[-2.92072487]] [[ 0.05113851]]
[[-2.92178631]] [[ 0.05108703]]
[[-2.92446661]] [[ 0.05095726]]
[[-2.92358303]] [[ 0.051]]
[[-2.91957188]] [[ 0.05119449]]
[[-2.90624285]] [[ 0.05184582]]
[[-2.9050622]] [[ 0.05190388]]
[[-2.91110182]] [[ 0.05160748]]
[[-2.9006424]] [[ 0.05212182]]
[[-2.90110826]] [[ 0.05209881]]
[[-2.88858747]] [[ 0.05272062]]
[[-2.88846874]] [[ 0.05272655]]
[[-2.89032507]] [[ 0.05263391]]
[[-2.8810792]] [[

[[-2.96512794]] [[ 0.04902637]]
[[-2.94735241]] [[ 0.0498618]]
[[-2.95102024]] [[ 0.04968832]]
[[-2.95708799]] [[ 0.04940258]]
[[-2.95282888]] [[ 0.04960298]]
[[-2.94783354]] [[ 0.049839]]
[[-2.9286828]] [[ 0.05075375]]
[[-2.9341712]] [[ 0.05048998]]
[[-2.93914175]] [[ 0.05025222]]
[[-2.9347024]] [[ 0.05046452]]
[[-2.94086933]] [[ 0.05016983]]
[[-2.92773199]] [[ 0.05079957]]
[[-2.93005943]] [[ 0.05068746]]
[[-2.93559265]] [[ 0.05042188]]
[[-2.92726326]] [[ 0.05082218]]
[[-2.92779684]] [[ 0.05079645]]
[[-2.91747713]] [[ 0.05129633]]
[[-2.92150593]] [[ 0.05110063]]
[[-2.92183733]] [[ 0.05108456]]
[[-2.90810966]] [[ 0.05175413]]
[[-2.90691566]] [[ 0.05181275]]
[[-2.88710642]] [[ 0.05279463]]
[[-2.8906064]] [[ 0.05261988]]
[[-2.89041853]] [[ 0.05262925]]
[[-2.88537359]] [[ 0.05288135]]
[[-2.88359833]] [[ 0.05297034]]
[[-2.87835312]] [[ 0.05323408]]
[[-2.88323736]] [[ 0.05298845]]
[[-2.88602114]] [[ 0.05284893]]
[[-2.87958574]] [[ 0.05317199]]
[[-2.87875605]] [[ 0.05321377]]
[[-2.87569475]]

[[-2.75824833]] [[ 0.05962251]]
[[-2.76070929]] [[ 0.05948467]]
[[-2.75090122]] [[ 0.06003578]]
[[-2.74766111]] [[ 0.06021888]]
[[-2.75231314]] [[ 0.05995615]]
[[-2.75432253]] [[ 0.05984299]]
[[-2.7579565]] [[ 0.05963887]]
[[-2.74632502]] [[ 0.06029454]]
[[-2.7451129]] [[ 0.06036325]]
[[-2.75582266]] [[ 0.05975865]]
[[-2.75736427]] [[ 0.05967208]]
[[-2.75461388]] [[ 0.05982661]]
[[-2.76076412]] [[ 0.05948161]]
[[-2.75958252]] [[ 0.05954774]]
[[-2.74248695]] [[ 0.06051237]]
[[-2.74082279]] [[ 0.06060704]]
[[-2.74103498]] [[ 0.06059496]]
[[-2.74536514]] [[ 0.06034895]]
[[-2.74662113]] [[ 0.06027776]]
[[-2.73275375]] [[ 0.06106808]]
[[-2.73219109]] [[ 0.06110035]]
[[-2.72998476]] [[ 0.06122704]]
[[-2.74448204]] [[ 0.06039904]]
[[-2.74967909]] [[ 0.06010478]]
[[-2.76020145]] [[ 0.05951309]]
[[-2.760221]] [[ 0.059512]]
[[-2.75627041]] [[ 0.0597335]]
[[-2.76710081]] [[ 0.05912809]]
[[-2.76284552]] [[ 0.05936527]]
[[-2.76275826]] [[ 0.05937015]]
[[-2.76300383]] [[ 0.05935643]]
[[-2.75964642]]

[array([142,   1], dtype=int32), array([142,   1], dtype=int32), array([142], dtype=int32)]
(142, 1) (142,)
loss 0.00407121 new_prob and rewards:  [(array([ 0.95253187], dtype=float32), 0.5151667), (array([ 0.95238119], dtype=float32), 0.5102694), (array([ 0.95153803], dtype=float32), 0.50532264), (array([ 0.9516207], dtype=float32), 0.50032586), (array([ 0.95072865], dtype=float32), 0.49527866), (array([ 0.95071727], dtype=float32), 0.49018046), (array([ 0.9511773], dtype=float32), 0.48503077), (array([ 0.95080447], dtype=float32), 0.47982907), (array([ 0.95069826], dtype=float32), 0.4745748), (array([ 0.950001], dtype=float32), 0.46926749), (array([ 0.95015401], dtype=float32), 0.46390656), (array([ 0.95048451], dtype=float32), 0.45849147), (array([ 0.95005852], dtype=float32), 0.45302168), (array([ 0.95010477], dtype=float32), 0.44749665), (array([ 0.94961816], dtype=float32), 0.44191581), (array([ 0.94962722], dtype=float32), 0.43627858), (array([ 0.94998872], dtype=float32), 0.430

[array([142,   1], dtype=int32), array([142,   1], dtype=int32), array([142], dtype=int32)]
(142, 1) (142,)
loss 0.00400886 new_prob and rewards:  [(array([ 0.95334619], dtype=float32), 0.5151667), (array([ 0.95319736], dtype=float32), 0.5102694), (array([ 0.95252085], dtype=float32), 0.50532264), (array([ 0.95246053], dtype=float32), 0.50032586), (array([ 0.95174479], dtype=float32), 0.49527866), (array([ 0.95202249], dtype=float32), 0.49018046), (array([ 0.95232958], dtype=float32), 0.48503077), (array([ 0.95190978], dtype=float32), 0.47982907), (array([ 0.95198125], dtype=float32), 0.4745748), (array([ 0.95110065], dtype=float32), 0.46926749), (array([ 0.95102286], dtype=float32), 0.46390656), (array([ 0.95136285], dtype=float32), 0.45849147), (array([ 0.95118237], dtype=float32), 0.45302168), (array([ 0.95124692], dtype=float32), 0.44749665), (array([ 0.95076245], dtype=float32), 0.44191581), (array([ 0.95074767], dtype=float32), 0.43627858), (array([ 0.95099169], dtype=float32), 0

[array([142,   1], dtype=int32), array([142,   1], dtype=int32), array([142], dtype=int32)]
(142, 1) (142,)
loss 0.00394259 new_prob and rewards:  [(array([ 0.95413971], dtype=float32), 0.5151667), (array([ 0.95399278], dtype=float32), 0.5102694), (array([ 0.95332408], dtype=float32), 0.50532264), (array([ 0.95326442], dtype=float32), 0.50032586), (array([ 0.95255679], dtype=float32), 0.49527866), (array([ 0.95283103], dtype=float32), 0.49018046), (array([ 0.9531346], dtype=float32), 0.48503077), (array([ 0.95271957], dtype=float32), 0.47982907), (array([ 0.95279026], dtype=float32), 0.4745748), (array([ 0.95192009], dtype=float32), 0.46926749), (array([ 0.9518435], dtype=float32), 0.46390656), (array([ 0.95217955], dtype=float32), 0.45849147), (array([ 0.95200068], dtype=float32), 0.45302168), (array([ 0.95206445], dtype=float32), 0.44749665), (array([ 0.95158523], dtype=float32), 0.44191581), (array([ 0.95157069], dtype=float32), 0.43627858), (array([ 0.95181203], dtype=float32), 0.4

[array([163,   1], dtype=int32), array([163,   1], dtype=int32), array([163], dtype=int32)]
(163, 1) (163,)
loss -0.000640929 new_prob and rewards:  [(array([ 0.95461524], dtype=float32), 0.60741693), (array([ 0.95446986], dtype=float32), 0.60345149), (array([ 0.95377862], dtype=float32), 0.59944594), (array([ 0.95365012], dtype=float32), 0.59539992), (array([ 0.95284587], dtype=float32), 0.59131306), (array([ 0.9530127], dtype=float32), 0.58718491), (array([ 0.95328856], dtype=float32), 0.58301508), (array([ 0.95309395], dtype=float32), 0.57880306), (array([ 0.95286798], dtype=float32), 0.5745486), (array([ 0.95198816], dtype=float32), 0.57025111), (array([ 0.95224178], dtype=float32), 0.56591016), (array([ 0.9524709], dtype=float32), 0.56152546), (array([ 0.95226508], dtype=float32), 0.55709642), (array([ 0.95254761], dtype=float32), 0.55262262), (array([ 0.95194048], dtype=float32), 0.54810369), (array([ 0.9520483], dtype=float32), 0.54353905), (array([ 0.95230424], dtype=float32), 

loss -0.000642078 new_prob and rewards:  [(array([ 0.95453119], dtype=float32), 0.60741693), (array([ 0.95438558], dtype=float32), 0.60345149), (array([ 0.95369351], dtype=float32), 0.59944594), (array([ 0.95356482], dtype=float32), 0.59539992), (array([ 0.95275962], dtype=float32), 0.59131306), (array([ 0.95292658], dtype=float32), 0.58718491), (array([ 0.95320278], dtype=float32), 0.58301508), (array([ 0.953008], dtype=float32), 0.57880306), (array([ 0.95278174], dtype=float32), 0.5745486), (array([ 0.95190072], dtype=float32), 0.57025111), (array([ 0.9521547], dtype=float32), 0.56591016), (array([ 0.95238411], dtype=float32), 0.56152546), (array([ 0.95217806], dtype=float32), 0.55709642), (array([ 0.95246094], dtype=float32), 0.55262262), (array([ 0.9518531], dtype=float32), 0.54810369), (array([ 0.95196104], dtype=float32), 0.54353905), (array([ 0.95221728], dtype=float32), 0.53892833), (array([ 0.95183069], dtype=float32), 0.53427106), (array([ 0.9518553], dtype=float32), 0.529566

(143, 1) (143,)
loss 0.00367707 new_prob and rewards:  [(array([ 0.95468956], dtype=float32), 0.520015), (array([ 0.95454431], dtype=float32), 0.5151667), (array([ 0.95389253], dtype=float32), 0.5102694), (array([ 0.95371532], dtype=float32), 0.50532264), (array([ 0.9527986], dtype=float32), 0.50032586), (array([ 0.95294434], dtype=float32), 0.49527866), (array([ 0.95325017], dtype=float32), 0.49018046), (array([ 0.9529562], dtype=float32), 0.48503077), (array([ 0.95285875], dtype=float32), 0.47982907), (array([ 0.95211601], dtype=float32), 0.4745748), (array([ 0.95239353], dtype=float32), 0.46926749), (array([ 0.95254803], dtype=float32), 0.46390656), (array([ 0.95225245], dtype=float32), 0.45849147), (array([ 0.95217317], dtype=float32), 0.45302168), (array([ 0.95160347], dtype=float32), 0.44749665), (array([ 0.95164227], dtype=float32), 0.44191581), (array([ 0.95183223], dtype=float32), 0.43627858), (array([ 0.95178056], dtype=float32), 0.43058443), (array([ 0.95176286], dtype=float

(143, 1) (143,)
loss 0.00362063 new_prob and rewards:  [(array([ 0.95540571], dtype=float32), 0.520015), (array([ 0.95526224], dtype=float32), 0.5151667), (array([ 0.9546175], dtype=float32), 0.5102694), (array([ 0.95444226], dtype=float32), 0.50532264), (array([ 0.95353556], dtype=float32), 0.50032586), (array([ 0.95367962], dtype=float32), 0.49527866), (array([ 0.95398241], dtype=float32), 0.49018046), (array([ 0.9536913], dtype=float32), 0.48503077), (array([ 0.95359498], dtype=float32), 0.47982907), (array([ 0.95286036], dtype=float32), 0.4745748), (array([ 0.95313472], dtype=float32), 0.46926749), (array([ 0.95328784], dtype=float32), 0.46390656), (array([ 0.9529953], dtype=float32), 0.45849147), (array([ 0.95291686], dtype=float32), 0.45302168), (array([ 0.95235324], dtype=float32), 0.44749665), (array([ 0.95239145], dtype=float32), 0.44191581), (array([ 0.95257956], dtype=float32), 0.43627858), (array([ 0.95252788], dtype=float32), 0.43058443), (array([ 0.9525103], dtype=float32

[array([112,   1], dtype=int32), array([112,   1], dtype=int32), array([112], dtype=int32)]
(112, 1) (112,)
loss 0.0105832 new_prob and rewards:  [(array([ 0.95773721], dtype=float32), 0.34455448), (array([ 0.95767534], dtype=float32), 0.33793381), (array([ 0.95701289], dtype=float32), 0.33124626), (array([ 0.95676881], dtype=float32), 0.32449117), (array([ 0.95610136], dtype=float32), 0.31766787), (array([ 0.95624292], dtype=float32), 0.31077561), (array([ 0.95652246], dtype=float32), 0.30381376), (array([ 0.95646411], dtype=float32), 0.29678157), (array([ 0.95624012], dtype=float32), 0.28967836), (array([ 0.9553104], dtype=float32), 0.2825034), (array([ 0.95539504], dtype=float32), 0.27525595), (array([ 0.95579326], dtype=float32), 0.26793531), (array([ 0.95550287], dtype=float32), 0.26054072), (array([ 0.95559114], dtype=float32), 0.25307143), (array([ 0.95488107], dtype=float32), 0.24552669), (array([ 0.95492828], dtype=float32), 0.23790576), (array([ 0.95504892], dtype=float32), 0

INFO:tensorflow:Restoring parameters from ./models/trained_flappy
[[-3.15360737]] [[ 0.04094937]]
[[-3.15360641]] [[ 0.04094941]]
[[-3.15360641]] [[ 0.04094941]]
[[-3.15207863]] [[ 0.04100945]]
[[-3.13546371]] [[ 0.04166789]]
[[-3.12999773]] [[ 0.0418867]]
[[-3.11364937]] [[ 0.04254773]]
[[-3.11658382]] [[ 0.04242835]]
[[-3.12304497]] [[ 0.04216662]]
[[-3.12162256]] [[ 0.04222411]]
[[-3.11645222]] [[ 0.04243369]]
[[-3.09507799]] [[ 0.04331074]]
[[-3.09625053]] [[ 0.04326218]]
[[-3.10580635]] [[ 0.04286838]]
[[-3.09998369]] [[ 0.04310793]]
[[-3.10224915]] [[ 0.04301457]]
[[-3.08755684]] [[ 0.04362345]]
[[-3.08915615]] [[ 0.04355678]]
[[-3.09284687]] [[ 0.04340328]]
[[-3.08904028]] [[ 0.0435616]]
[[-3.08676672]] [[ 0.04365642]]
[[-3.07476473]] [[ 0.04416027]]
[[-3.07429409]] [[ 0.04418014]]
[[-3.07896805]] [[ 0.04398319]]
[[-3.07053041]] [[ 0.04433935]]
[[-3.06029558]] [[ 0.04477506]]
[[-3.04573059]] [[ 0.04540215]]
[[-3.04589224]] [[ 0.04539515]]
[[-3.05332375]] [[ 0.04507419]]
[[-3.045

[[-3.09156609]] [[ 0.04345649]]
[[-3.09335709]] [[ 0.0433821]]
[[-3.07867193]] [[ 0.04399564]]
[[-3.08009958]] [[ 0.04393563]]
[[-3.08520842]] [[ 0.04372153]]
[[-3.0694108]] [[ 0.04438681]]
[[-3.07242012]] [[ 0.04425934]]
[[-3.05197954]] [[ 0.04513209]]
[[-3.05247068]] [[ 0.04511093]]
[[-3.05513573]] [[ 0.04499627]]
[[-3.04636669]] [[ 0.04537459]]
[[-3.04953146]] [[ 0.04523771]]
[[-3.04438972]] [[ 0.0454603]]
[[-3.04059267]] [[ 0.04562536]]
[[-3.04849386]] [[ 0.04528254]]
[[-3.03130198]] [[ 0.04603162]]
[[-3.03261423]] [[ 0.04597403]]
[[-3.02909756]] [[ 0.04612852]]
[[-3.03160143]] [[ 0.04601847]]
[[-3.03329611]] [[ 0.04594413]]
[[-3.02686071]] [[ 0.04622704]]
[[-3.0229001]] [[ 0.04640198]]
[[-3.00723791]] [[ 0.04709996]]
-1
Game 4 over; alive frames: 41
[[-3.00929165]] [[ 0.04700787]]
[[-3.15361357]] [[ 0.04094913]]
[[-3.15020704]] [[ 0.04108312]]
[[-3.13072443]] [[ 0.04185754]]
[[-3.1273303]] [[ 0.04199388]]
[[-3.10531807]] [[ 0.04288842]]
[[-3.10947037]] [[ 0.0427183]]
[[-3.1169982]

[array([135,   1], dtype=int32), array([135,   1], dtype=int32), array([135], dtype=int32)]
(135, 1) (135,)
loss 0.0048987 new_prob and rewards:  [(array([ 0.95996749], dtype=float32), 0.47982907), (array([ 0.95983583], dtype=float32), 0.4745748), (array([ 0.9590739], dtype=float32), 0.46926749), (array([ 0.95893955], dtype=float32), 0.46390656), (array([ 0.95805919], dtype=float32), 0.45849147), (array([ 0.9582265], dtype=float32), 0.45302168), (array([ 0.95852876], dtype=float32), 0.44749665), (array([ 0.95826107], dtype=float32), 0.44191581), (array([ 0.9581486], dtype=float32), 0.43627858), (array([ 0.9575268], dtype=float32), 0.43058443), (array([ 0.9577691], dtype=float32), 0.42483276), (array([ 0.95791], dtype=float32), 0.41902301), (array([ 0.95761931], dtype=float32), 0.41315454), (array([ 0.9575395], dtype=float32), 0.4072268), (array([ 0.95711768], dtype=float32), 0.40123922), (array([ 0.95715171], dtype=float32), 0.3951911), (array([ 0.95733339], dtype=float32), 0.38908193)

[array([61,  1], dtype=int32), array([61,  1], dtype=int32), array([61], dtype=int32)]
(61, 1) (61,)
loss 0.0219777 new_prob and rewards:  [(array([ 0.96194571], dtype=float32), -0.094313294), (array([ 0.96181893], dtype=float32), -0.10536697), (array([ 0.9612025], dtype=float32), -0.11653229), (array([ 0.96107972], dtype=float32), -0.12781039), (array([ 0.96033424], dtype=float32), -0.13920242), (array([ 0.96047884], dtype=float32), -0.15070951), (array([ 0.9607442], dtype=float32), -0.16233283), (array([ 0.96049196], dtype=float32), -0.17407358), (array([ 0.96041042], dtype=float32), -0.1859329), (array([ 0.95966762], dtype=float32), -0.19791202), (array([ 0.95994407], dtype=float32), -0.21001214), (array([ 0.96007359], dtype=float32), -0.22223449), (array([ 0.95986229], dtype=float32), -0.23458029), (array([ 0.95980221], dtype=float32), -0.24705079), (array([ 0.9592368], dtype=float32), -0.25964728), (array([ 0.95926452], dtype=float32), -0.27237099), (array([ 0.95940483], dtype=flo

(90, 1) (90,)
loss 0.0128086 new_prob and rewards:  [(array([ 0.96677738], dtype=float32), 0.18235964), (array([ 0.96666354], dtype=float32), 0.17410064), (array([ 0.96610212], dtype=float32), 0.16575822), (array([ 0.96599352], dtype=float32), 0.15733154), (array([ 0.9653731], dtype=float32), 0.14881974), (array([ 0.96551168], dtype=float32), 0.14022195), (array([ 0.96572888], dtype=float32), 0.13153733), (array([ 0.96554232], dtype=float32), 0.12276498), (array([ 0.96545798], dtype=float32), 0.11390402), (array([ 0.964656], dtype=float32), 0.10495356), (array([ 0.96490002], dtype=float32), 0.095912687), (array([ 0.96499735], dtype=float32), 0.086780496), (array([ 0.96475518], dtype=float32), 0.077556051), (array([ 0.96473718], dtype=float32), 0.068238437), (array([ 0.96423471], dtype=float32), 0.058826704), (array([ 0.96426755], dtype=float32), 0.049319904), (array([ 0.96441782], dtype=float32), 0.039717074), (array([ 0.96439302], dtype=float32), 0.030017247), (array([ 0.96442735], dt

[array([90,  1], dtype=int32), array([90,  1], dtype=int32), array([90], dtype=int32)]
(90, 1) (90,)
loss 0.0118086 new_prob and rewards:  [(array([ 0.96943581], dtype=float32), 0.18235964), (array([ 0.96932942], dtype=float32), 0.17410064), (array([ 0.96880144], dtype=float32), 0.16575822), (array([ 0.96869916], dtype=float32), 0.15733154), (array([ 0.96811533], dtype=float32), 0.14881974), (array([ 0.96824545], dtype=float32), 0.14022195), (array([ 0.96845061], dtype=float32), 0.13153733), (array([ 0.9682737], dtype=float32), 0.12276498), (array([ 0.96819496], dtype=float32), 0.11390402), (array([ 0.96744162], dtype=float32), 0.10495356), (array([ 0.96767074], dtype=float32), 0.095912687), (array([ 0.96776313], dtype=float32), 0.086780496), (array([ 0.96753496], dtype=float32), 0.077556051), (array([ 0.96751767], dtype=float32), 0.068238437), (array([ 0.96704495], dtype=float32), 0.058826704), (array([ 0.96707541], dtype=float32), 0.049319904), (array([ 0.96721715], dtype=float32), 0

(41, 1) (41,)
loss 0.0191605 new_prob and rewards:  [(array([ 0.9731859], dtype=float32), -0.33794352), (array([ 0.97309035], dtype=float32), -0.3514581), (array([ 0.9726631], dtype=float32), -0.36510921), (array([ 0.9726187], dtype=float32), -0.37889817), (array([ 0.97207433], dtype=float32), -0.39282644), (array([ 0.97218543], dtype=float32), -0.4068954), (array([ 0.9723686], dtype=float32), -0.42110646), (array([ 0.97211158], dtype=float32), -0.43546107), (array([ 0.9720704], dtype=float32), -0.44996068), (array([ 0.97157604], dtype=float32), -0.46460673), (array([ 0.97175241], dtype=float32), -0.47940075), (array([ 0.97182566], dtype=float32), -0.4943442), (array([ 0.97170883], dtype=float32), -0.50943857), (array([ 0.97170144], dtype=float32), -0.52468544), (array([ 0.97123337], dtype=float32), -0.54008627), (array([ 0.97124851], dtype=float32), -0.55564272), (array([ 0.97134924], dtype=float32), -0.5713563), (array([ 0.97133118], dtype=float32), -0.5872286), (array([ 0.97138464],

loss 0.0102997 new_prob and rewards:  [(array([ 0.97594798], dtype=float32), 0.13153733), (array([ 0.97591197], dtype=float32), 0.12276498), (array([ 0.97546548], dtype=float32), 0.11390402), (array([ 0.97531909], dtype=float32), 0.10495356), (array([ 0.97487324], dtype=float32), 0.095912687), (array([ 0.97495103], dtype=float32), 0.086780496), (array([ 0.97512919], dtype=float32), 0.077556051), (array([ 0.97508156], dtype=float32), 0.068238437), (array([ 0.97494555], dtype=float32), 0.058826704), (array([ 0.97435719], dtype=float32), 0.049319904), (array([ 0.97439516], dtype=float32), 0.039717074), (array([ 0.97465909], dtype=float32), 0.030017247), (array([ 0.97448778], dtype=float32), 0.020219443), (array([ 0.97454965], dtype=float32), 0.01032267), (array([ 0.9741348], dtype=float32), 0.00032592902), (array([ 0.97417986], dtype=float32), -0.0097717885), (array([ 0.97428674], dtype=float32), -0.019971503), (array([ 0.9741739], dtype=float32), -0.030274246), (array([ 0.9741078], dtype

INFO:tensorflow:Restoring parameters from ./models/trained_flappy
[[-3.76145029]] [[ 0.02272172]]
[[-3.76145172]] [[ 0.02272168]]
[[-3.76145172]] [[ 0.02272168]]
[[-3.75991726]] [[ 0.02275578]]
[[-3.74115324]] [[ 0.02317681]]
[[-3.73443031]] [[ 0.02332951]]
[[-3.71607971]] [[ 0.02375131]]
[[-3.71839523]] [[ 0.02369768]]
[[-3.72436619]] [[ 0.02355992]]
[[-3.72098589]] [[ 0.02363781]]
[[-3.71686697]] [[ 0.02373306]]
[[-3.69563055]] [[ 0.02423011]]
[[-3.70051622]] [[ 0.02411487]]
[[-3.70946217]] [[ 0.02390524]]
[[-3.7028513]] [[ 0.02405998]]
[[-3.70506525]] [[ 0.02400805]]
[[-3.6870141]] [[ 0.02443467]]
[[-3.68798018]] [[ 0.02441165]]
[[-3.69547653]] [[ 0.02423376]]
[[-3.68769836]] [[ 0.02441836]]
[[-3.6831007]] [[ 0.02452813]]
[[-3.66897917]] [[ 0.02486829]]
[[-3.66859293]] [[ 0.02487765]]
[[-3.66892767]] [[ 0.02486954]]
[[-3.65945196]] [[ 0.02510037]]
[[-3.65229607]] [[ 0.02527607]]
[[-3.63604259]] [[ 0.02567962]]
[[-3.64258432]] [[ 0.02551645]]
[[-3.64818144]] [[ 0.02537764]]
[[-3.6440

[[-3.47357321]] [[ 0.03007358]]
[[-3.48069668]] [[ 0.02986648]]
[[-3.4791646]] [[ 0.02991091]]
[[-3.48953867]] [[ 0.02961136]]
[[-3.46975708]] [[ 0.03018509]]
[[-3.46435404]] [[ 0.03034366]]
[[-3.4735713]] [[ 0.03007364]]
[[-3.47927999]] [[ 0.02990756]]
[[-3.49526405]] [[ 0.02944729]]
[[-3.48087549]] [[ 0.02986131]]
[[-3.48155546]] [[ 0.02984161]]
[[-3.49018955]] [[ 0.02959266]]
[[-3.48492527]] [[ 0.02974421]]
-1
Game 3 over; alive frames: 99
[[-3.49577999]] [[ 0.02943254]]
[[-3.76145554]] [[ 0.0227216]]
[[-3.75770712]] [[ 0.02280498]]
[[-3.73888397]] [[ 0.02322825]]
[[-3.73526382]] [[ 0.02331052]]
[[-3.71304798]] [[ 0.02382171]]
[[-3.71727324]] [[ 0.02372365]]
[[-3.72517824]] [[ 0.02354125]]
[[-3.71755648]] [[ 0.02371709]]
[[-3.71540165]] [[ 0.02376704]]
[[-3.69404793]] [[ 0.02426756]]
[[-3.70150423]] [[ 0.02409163]]
[[-3.70570087]] [[ 0.02399316]]
[[-3.69933462]] [[ 0.02414269]]
[[-3.69764042]] [[ 0.02418264]]
[[-3.68144894]] [[ 0.02456768]]
[[-3.68239975]] [[ 0.02454491]]
[[-3.68699

(142, 1) (142,)
loss 0.00197672 new_prob and rewards:  [(array([ 0.97753245], dtype=float32), 0.5151667), (array([ 0.97744954], dtype=float32), 0.5102694), (array([ 0.97703946], dtype=float32), 0.50532264), (array([ 0.97700518], dtype=float32), 0.50032586), (array([ 0.97667199], dtype=float32), 0.49527866), (array([ 0.97683978], dtype=float32), 0.49018046), (array([ 0.9770003], dtype=float32), 0.48503077), (array([ 0.97671831], dtype=float32), 0.47982907), (array([ 0.97674572], dtype=float32), 0.4745748), (array([ 0.9762553], dtype=float32), 0.46926749), (array([ 0.97629684], dtype=float32), 0.46390656), (array([ 0.97642016], dtype=float32), 0.45849147), (array([ 0.97622555], dtype=float32), 0.45302168), (array([ 0.97630483], dtype=float32), 0.44749665), (array([ 0.97596824], dtype=float32), 0.44191581), (array([ 0.97602159], dtype=float32), 0.43627858), (array([ 0.97616106], dtype=float32), 0.43058443), (array([ 0.97593522], dtype=float32), 0.42483276), (array([ 0.97601694], dtype=flo

[array([41,  1], dtype=int32), array([41,  1], dtype=int32), array([41], dtype=int32)]
(41, 1) (41,)
loss 0.0150277 new_prob and rewards:  [(array([ 0.97903538], dtype=float32), -0.33794352), (array([ 0.97895736], dtype=float32), -0.3514581), (array([ 0.97860008], dtype=float32), -0.36510921), (array([ 0.97855955], dtype=float32), -0.37889817), (array([ 0.97809958], dtype=float32), -0.39282644), (array([ 0.97820723], dtype=float32), -0.4068954), (array([ 0.97836131], dtype=float32), -0.42110646), (array([ 0.97815561], dtype=float32), -0.43546107), (array([ 0.9781419], dtype=float32), -0.44996068), (array([ 0.9777447], dtype=float32), -0.46460673), (array([ 0.97789514], dtype=float32), -0.47940075), (array([ 0.97794998], dtype=float32), -0.4943442), (array([ 0.97788554], dtype=float32), -0.50943857), (array([ 0.97785413], dtype=float32), -0.52468544), (array([ 0.97745919], dtype=float32), -0.54008627), (array([ 0.97746873], dtype=float32), -0.55564272), (array([ 0.9775514], dtype=float3

loss 0.00634366 new_prob and rewards:  [(array([ 0.98104817], dtype=float32), 0.25307143), (array([ 0.98097658], dtype=float32), 0.24552669), (array([ 0.98066616], dtype=float32), 0.23790576), (array([ 0.98064256], dtype=float32), 0.23020783), (array([ 0.98023647], dtype=float32), 0.22243215), (array([ 0.98031425], dtype=float32), 0.21457793), (array([ 0.98043984], dtype=float32), 0.20664437), (array([ 0.98023409], dtype=float32), 0.19863068), (array([ 0.98019177], dtype=float32), 0.19053604), (array([ 0.9798187], dtype=float32), 0.18235964), (array([ 0.97996747], dtype=float32), 0.17410064), (array([ 0.98002774], dtype=float32), 0.16575822), (array([ 0.97988021], dtype=float32), 0.15733154), (array([ 0.97986364], dtype=float32), 0.14881974), (array([ 0.97951007], dtype=float32), 0.14022195), (array([ 0.97953957], dtype=float32), 0.13153733), (array([ 0.97962236], dtype=float32), 0.12276498), (array([ 0.97962499], dtype=float32), 0.11390402), (array([ 0.97963309], dtype=float32), 0.104

(41, 1) (41,)
loss 0.0126415 new_prob and rewards:  [(array([ 0.9824596], dtype=float32), -0.33794352), (array([ 0.98239243], dtype=float32), -0.3514581), (array([ 0.98204613], dtype=float32), -0.36510921), (array([ 0.98197848], dtype=float32), -0.37889817), (array([ 0.98155987], dtype=float32), -0.39282644), (array([ 0.98163986], dtype=float32), -0.4068954), (array([ 0.98179054], dtype=float32), -0.42110646), (array([ 0.98164415], dtype=float32), -0.43546107), (array([ 0.9816041], dtype=float32), -0.44996068), (array([ 0.9811945], dtype=float32), -0.46460673), (array([ 0.98133773], dtype=float32), -0.47940075), (array([ 0.98141974], dtype=float32), -0.4943442), (array([ 0.98129582], dtype=float32), -0.50943857), (array([ 0.98126251], dtype=float32), -0.52468544), (array([ 0.98094648], dtype=float32), -0.54008627), (array([ 0.98096436], dtype=float32), -0.55564272), (array([ 0.98105538], dtype=float32), -0.5713563), (array([ 0.98103338], dtype=float32), -0.5872286), (array([ 0.98103625

[array([127,   1], dtype=int32), array([127,   1], dtype=int32), array([127], dtype=int32)]
(127, 1) (127,)
loss 0.0027018 new_prob and rewards:  [(array([ 0.98427749], dtype=float32), 0.43627858), (array([ 0.98425376], dtype=float32), 0.43058443), (array([ 0.98393697], dtype=float32), 0.42483276), (array([ 0.983823], dtype=float32), 0.41902301), (array([ 0.98350632], dtype=float32), 0.41315454), (array([ 0.98354554], dtype=float32), 0.4072268), (array([ 0.98365009], dtype=float32), 0.40123922), (array([ 0.98358858], dtype=float32), 0.3951911), (array([ 0.98351872], dtype=float32), 0.38908193), (array([ 0.98314548], dtype=float32), 0.38291106), (array([ 0.98323178], dtype=float32), 0.37667781), (array([ 0.98338932], dtype=float32), 0.37038162), (array([ 0.98327023], dtype=float32), 0.36402187), (array([ 0.98330855], dtype=float32), 0.35759783), (array([ 0.98298621], dtype=float32), 0.35110894), (array([ 0.98300385], dtype=float32), 0.34455448), (array([ 0.98313832], dtype=float32), 0.3

loss 0.002658 new_prob and rewards:  [(array([ 0.98454309], dtype=float32), 0.43627858), (array([ 0.98451978], dtype=float32), 0.43058443), (array([ 0.98420715], dtype=float32), 0.42483276), (array([ 0.98409468], dtype=float32), 0.41902301), (array([ 0.98378217], dtype=float32), 0.41315454), (array([ 0.98382086], dtype=float32), 0.4072268), (array([ 0.98392403], dtype=float32), 0.40123922), (array([ 0.98386323], dtype=float32), 0.3951911), (array([ 0.98379433], dtype=float32), 0.38908193), (array([ 0.98342609], dtype=float32), 0.38291106), (array([ 0.98351127], dtype=float32), 0.37667781), (array([ 0.98366672], dtype=float32), 0.37038162), (array([ 0.98354906], dtype=float32), 0.36402187), (array([ 0.98358685], dtype=float32), 0.35759783), (array([ 0.98326874], dtype=float32), 0.35110894), (array([ 0.98328614], dtype=float32), 0.34455448), (array([ 0.98341888], dtype=float32), 0.33793381), (array([ 0.98327923], dtype=float32), 0.33124626), (array([ 0.98319858], dtype=float32), 0.324491

[[-4.06097651]] [[ 0.01694027]]
[[-4.06470776]] [[ 0.01687824]]
[[-4.07068062]] [[ 0.01677942]]
[[-4.04859352]] [[ 0.01714772]]
[[-4.05385399]] [[ 0.01705929]]
[[-4.03215742]] [[ 0.01742694]]
[[-4.03355837]] [[ 0.01740297]]
[[-4.03514147]] [[ 0.01737592]]
[[-4.02114201]] [[ 0.01761657]]
[[-4.02395916]] [[ 0.01756788]]
[[-4.01641607]] [[ 0.01769854]]
[[-4.0119729]] [[ 0.01777595]]
[[-4.02305508]] [[ 0.01758349]]
[[-3.99838901]] [[ 0.01801469]]
[[-4.00139046]] [[ 0.01796167]]
[[-3.98981643]] [[ 0.01816696]]
[[-3.99420524]] [[ 0.01808885]]
[[-3.99795842]] [[ 0.0180223]]
[[-3.98970151]] [[ 0.01816902]]
[[-3.98834968]] [[ 0.01819315]]
[[-3.97439408]] [[ 0.01844411]]
-1
Game 2 over; alive frames: 41
[[-3.9754169]] [[ 0.0184256]]
[[-4.15985584]] [[ 0.01536989]]
[[-4.15589905]] [[ 0.01542988]]
[[-4.13862371]] [[ 0.01569453]]
[[-4.13868475]] [[ 0.01569359]]
[[-4.11329603]] [[ 0.01609064]]
[[-4.11767435]] [[ 0.01602147]]
[[-4.12328911]] [[ 0.0159332]]
[[-4.10887337]] [[ 0.01616081]]
[[-4.1080317

(41, 1) (41,)
loss 0.0105621 new_prob and rewards:  [(array([ 0.98535252], dtype=float32), -0.33794352), (array([ 0.98529488], dtype=float32), -0.3514581), (array([ 0.9850142], dtype=float32), -0.36510921), (array([ 0.98495036], dtype=float32), -0.37889817), (array([ 0.98455596], dtype=float32), -0.39282644), (array([ 0.98461431], dtype=float32), -0.4068954), (array([ 0.9847579], dtype=float32), -0.42110646), (array([ 0.98464549], dtype=float32), -0.43546107), (array([ 0.98460919], dtype=float32), -0.44996068), (array([ 0.98431396], dtype=float32), -0.46460673), (array([ 0.98442441], dtype=float32), -0.47940075), (array([ 0.98449129], dtype=float32), -0.4943442), (array([ 0.98435926], dtype=float32), -0.50943857), (array([ 0.98431385], dtype=float32), -0.52468544), (array([ 0.98407346], dtype=float32), -0.54008627), (array([ 0.98409212], dtype=float32), -0.55564272), (array([ 0.98418409], dtype=float32), -0.5713563), (array([ 0.98413873], dtype=float32), -0.5872286), (array([ 0.9841245

(41, 1) (41,)
loss 0.00945296 new_prob and rewards:  [(array([ 0.98692667], dtype=float32), -0.33794352), (array([ 0.98687434], dtype=float32), -0.3514581), (array([ 0.98664147], dtype=float32), -0.36510921), (array([ 0.9866423], dtype=float32), -0.37889817), (array([ 0.98629481], dtype=float32), -0.39282644), (array([ 0.98635787], dtype=float32), -0.4068954), (array([ 0.98643249], dtype=float32), -0.42110646), (array([ 0.98622781], dtype=float32), -0.43546107), (array([ 0.98621815], dtype=float32), -0.44996068), (array([ 0.98597312], dtype=float32), -0.46460673), (array([ 0.98608845], dtype=float32), -0.47940075), (array([ 0.98612332], dtype=float32), -0.4943442), (array([ 0.98603964], dtype=float32), -0.50943857), (array([ 0.98602641], dtype=float32), -0.52468544), (array([ 0.98580283], dtype=float32), -0.54008627), (array([ 0.98582411], dtype=float32), -0.55564272), (array([ 0.9858821], dtype=float32), -0.5713563), (array([ 0.98579943], dtype=float32), -0.5872286), (array([ 0.985831

(83, 1) (83,)
loss 0.00531848 new_prob and rewards:  [(array([ 0.9880389], dtype=float32), 0.12276498), (array([ 0.9879905], dtype=float32), 0.11390402), (array([ 0.98777324], dtype=float32), 0.10495356), (array([ 0.98777366], dtype=float32), 0.095912687), (array([ 0.98744893], dtype=float32), 0.086780496), (array([ 0.98750526], dtype=float32), 0.077556051), (array([ 0.98757881], dtype=float32), 0.068238437), (array([ 0.98739147], dtype=float32), 0.058826704), (array([ 0.98738122], dtype=float32), 0.049319904), (array([ 0.98715216], dtype=float32), 0.039717074), (array([ 0.98725981], dtype=float32), 0.030017247), (array([ 0.98729312], dtype=float32), 0.020219443), (array([ 0.9872095], dtype=float32), 0.01032267), (array([ 0.98719615], dtype=float32), 0.00032592902), (array([ 0.98698604], dtype=float32), -0.0097717885), (array([ 0.98700184], dtype=float32), -0.019971503), (array([ 0.987064], dtype=float32), -0.030274246), (array([ 0.98700178], dtype=float32), -0.040681057), (array([ 0.9

loss 0.00633498 new_prob and rewards:  [(array([ 0.98863924], dtype=float32), -0.040681057), (array([ 0.98859286], dtype=float32), -0.051192984), (array([ 0.98836845], dtype=float32), -0.061811097), (array([ 0.98834634], dtype=float32), -0.072536461), (array([ 0.9880625], dtype=float32), -0.083370164), (array([ 0.9881286], dtype=float32), -0.094313294), (array([ 0.98822498], dtype=float32), -0.10536697), (array([ 0.98809785], dtype=float32), -0.11653229), (array([ 0.98808402], dtype=float32), -0.12781039), (array([ 0.98784262], dtype=float32), -0.13920242), (array([ 0.98793167], dtype=float32), -0.15070951), (array([ 0.9879694], dtype=float32), -0.16233283), (array([ 0.98791659], dtype=float32), -0.17407358), (array([ 0.98790663], dtype=float32), -0.1859329), (array([ 0.98765534], dtype=float32), -0.19791202), (array([ 0.98770767], dtype=float32), -0.21001214), (array([ 0.98775172], dtype=float32), -0.22223449), (array([ 0.98767084], dtype=float32), -0.23458029), (array([ 0.9877215], d

[array([33,  1], dtype=int32), array([33,  1], dtype=int32), array([33], dtype=int32)]
(33, 1) (33,)
loss 0.00819223 new_prob and rewards:  [(array([ 0.98950219], dtype=float32), -0.44996068), (array([ 0.9894864], dtype=float32), -0.46460673), (array([ 0.98925287], dtype=float32), -0.47940075), (array([ 0.98917764], dtype=float32), -0.4943442), (array([ 0.98894572], dtype=float32), -0.50943857), (array([ 0.98898435], dtype=float32), -0.52468544), (array([ 0.98907804], dtype=float32), -0.54008627), (array([ 0.98904717], dtype=float32), -0.55564272), (array([ 0.98897994], dtype=float32), -0.5713563), (array([ 0.98867565], dtype=float32), -0.5872286), (array([ 0.98869896], dtype=float32), -0.60326117), (array([ 0.98883522], dtype=float32), -0.61945575), (array([ 0.98873919], dtype=float32), -0.63581389), (array([ 0.9887706], dtype=float32), -0.65233725), (array([ 0.98855174], dtype=float32), -0.66902751), (array([ 0.98857528], dtype=float32), -0.68588638), (array([ 0.98863345], dtype=floa

[[-4.48460388]] [[ 0.01115551]]
[[-4.49752665]] [[ 0.01101385]]
[[-4.48298693]] [[ 0.01117336]]
[[-4.48563051]] [[ 0.01114419]]
[[-4.48187828]] [[ 0.01118561]]
[[-4.48461008]] [[ 0.01115544]]
[[-4.49479485]] [[ 0.01104365]]
-1
Game 1 over; alive frames: 32
[[-4.48280764]] [[ 0.01117534]]
[[-4.62532187]] [[ 0.00970538]]
[[-4.62112045]] [[ 0.00974585]]
[[-4.60015678]] [[ 0.00995026]]
[[-4.59508848]] [[ 0.01000031]]
[[-4.56621838]] [[ 0.01029021]]
[[-4.57093191]] [[ 0.01024232]]
[[-4.5813241]] [[ 0.0101375]]
[[-4.57236624]] [[ 0.01022779]]
[[-4.56971502]] [[ 0.01025466]]
[[-4.55065203]] [[ 0.01044996]]
[[-4.54752016]] [[ 0.0104824]]
[[-4.55828428]] [[ 0.01037133]]
[[-4.54494762]] [[ 0.01050911]]
[[-4.54656315]] [[ 0.01049233]]
[[-4.53480625]] [[ 0.0106151]]
[[-4.53656816]] [[ 0.01059661]]
[[-4.54307175]] [[ 0.01052864]]
[[-4.53507233]] [[ 0.0106123]]
[[-4.53656197]] [[ 0.01059667]]
[[-4.5224781]] [[ 0.01074536]]
[[-4.52078438]] [[ 0.01076338]]
[[-4.52493572]] [[ 0.01071926]]
[[-4.51390839

[[-4.57640934]] [[ 0.01018694]]
[[-4.55543756]] [[ 0.01040059]]
[[-4.56278753]] [[ 0.01032521]]
[[-4.56531525]] [[ 0.01029942]]
[[-4.56024408]] [[ 0.01035124]]
[[-4.55934381]] [[ 0.01036046]]
[[-4.53814983]] [[ 0.01058004]]
[[-4.53844023]] [[ 0.010577]]
[[-4.54338264]] [[ 0.0105254]]
[[-4.54000187]] [[ 0.01056067]]
[[-4.54140615]] [[ 0.010546]]
[[-4.52028513]] [[ 0.01076869]]
[[-4.5227828]] [[ 0.01074212]]
[[-4.52972269]] [[ 0.01066862]]
[[-4.50763798]] [[ 0.01090426]]
[[-4.5117836]] [[ 0.01085963]]
[[-4.48525381]] [[ 0.01114834]]
[[-4.48574972]] [[ 0.01114287]]
[[-4.48931217]] [[ 0.01110369]]
[[-4.47523737]] [[ 0.0112593]]
[[-4.47919941]] [[ 0.01121528]]
[[-4.47136497]] [[ 0.01130249]]
[[-4.46620893]] [[ 0.01136026]]
[[-4.47733784]] [[ 0.01123594]]
[[-4.45428562]] [[ 0.01149495]]
[[-4.45680046]] [[ 0.01146641]]
[[-4.4494853]] [[ 0.01154963]]
[[-4.45235682]] [[ 0.01151689]]
[[-4.45624781]] [[ 0.01147268]]
[[-4.44638348]] [[ 0.01158509]]
[[-4.44237709]] [[ 0.01163106]]
[[-4.42281771]] [

(41, 1) (41,)
loss 0.00657056 new_prob and rewards:  [(array([ 0.99093848], dtype=float32), -0.33794352), (array([ 0.99090034], dtype=float32), -0.3514581), (array([ 0.99070948], dtype=float32), -0.36510921), (array([ 0.99069536], dtype=float32), -0.37889817), (array([ 0.99045843], dtype=float32), -0.39282644), (array([ 0.9905138], dtype=float32), -0.4068954), (array([ 0.99059492], dtype=float32), -0.42110646), (array([ 0.99048907], dtype=float32), -0.43546107), (array([ 0.99048334], dtype=float32), -0.44996068), (array([ 0.99028134], dtype=float32), -0.46460673), (array([ 0.99035251], dtype=float32), -0.47940075), (array([ 0.99037713], dtype=float32), -0.4943442), (array([ 0.99032784], dtype=float32), -0.50943857), (array([ 0.99031907], dtype=float32), -0.52468544), (array([ 0.99011153], dtype=float32), -0.54008627), (array([ 0.99011427], dtype=float32), -0.55564272), (array([ 0.99016321], dtype=float32), -0.5713563), (array([ 0.99012941], dtype=float32), -0.5872286), (array([ 0.99014

[array([64,  1], dtype=int32), array([64,  1], dtype=int32), array([64], dtype=int32)]
(64, 1) (64,)
loss 0.00495053 new_prob and rewards:  [(array([ 0.99142289], dtype=float32), -0.061811097), (array([ 0.99138653], dtype=float32), -0.072536461), (array([ 0.99120182], dtype=float32), -0.083370164), (array([ 0.99115652], dtype=float32), -0.094313294), (array([ 0.99089456], dtype=float32), -0.10536697), (array([ 0.99093777], dtype=float32), -0.11653229), (array([ 0.99103278], dtype=float32), -0.12781039), (array([ 0.99095064], dtype=float32), -0.13920242), (array([ 0.9909265], dtype=float32), -0.15070951), (array([ 0.99074978], dtype=float32), -0.16233283), (array([ 0.99072081], dtype=float32), -0.17407358), (array([ 0.9908213], dtype=float32), -0.1859329), (array([ 0.99069649], dtype=float32), -0.19791202), (array([ 0.99071157], dtype=float32), -0.21001214), (array([ 0.99059987], dtype=float32), -0.22223449), (array([ 0.99061662], dtype=float32), -0.23458029), (array([ 0.99067831], dtyp

(83, 1) (83,)
loss 0.00371873 new_prob and rewards:  [(array([ 0.99171346], dtype=float32), 0.12276498), (array([ 0.99167824], dtype=float32), 0.11390402), (array([ 0.99151683], dtype=float32), 0.10495356), (array([ 0.99150419], dtype=float32), 0.095912687), (array([ 0.99129438], dtype=float32), 0.086780496), (array([ 0.99133396], dtype=float32), 0.077556051), (array([ 0.9914012], dtype=float32), 0.068238437), (array([ 0.99129194], dtype=float32), 0.058826704), (array([ 0.99127215], dtype=float32), 0.049319904), (array([ 0.99107915], dtype=float32), 0.039717074), (array([ 0.99115455], dtype=float32), 0.030017247), (array([ 0.99118799], dtype=float32), 0.020219443), (array([ 0.99110931], dtype=float32), 0.01032267), (array([ 0.99110031], dtype=float32), 0.00032592902), (array([ 0.99091727], dtype=float32), -0.0097717885), (array([ 0.99093109], dtype=float32), -0.019971503), (array([ 0.99097568], dtype=float32), -0.030274246), (array([ 0.99097008], dtype=float32), -0.040681057), (array([

(91, 1) (91,)
loss 0.00319009 new_prob and rewards:  [(array([ 0.99201733], dtype=float32), 0.19053604), (array([ 0.99198318], dtype=float32), 0.18235964), (array([ 0.9918018], dtype=float32), 0.17410064), (array([ 0.99176508], dtype=float32), 0.16575822), (array([ 0.99155498], dtype=float32), 0.15733154), (array([ 0.99160141], dtype=float32), 0.14881974), (array([ 0.99168205], dtype=float32), 0.14022195), (array([ 0.99162072], dtype=float32), 0.13153733), (array([ 0.99160165], dtype=float32), 0.12276498), (array([ 0.99135756], dtype=float32), 0.11390402), (array([ 0.9914369], dtype=float32), 0.10495356), (array([ 0.99147081], dtype=float32), 0.095912687), (array([ 0.99137175], dtype=float32), 0.086780496), (array([ 0.9913879], dtype=float32), 0.077556051), (array([ 0.99121684], dtype=float32), 0.068238437), (array([ 0.99123031], dtype=float32), 0.058826704), (array([ 0.99125379], dtype=float32), 0.049319904), (array([ 0.99120855], dtype=float32), 0.039717074), (array([ 0.99115694], dt

[array([91,  1], dtype=int32), array([91,  1], dtype=int32), array([91], dtype=int32)]
(91, 1) (91,)
loss 0.00309238 new_prob and rewards:  [(array([ 0.99227101], dtype=float32), 0.19053604), (array([ 0.99223787], dtype=float32), 0.18235964), (array([ 0.9920612], dtype=float32), 0.17410064), (array([ 0.99202543], dtype=float32), 0.16575822), (array([ 0.99182081], dtype=float32), 0.15733154), (array([ 0.99186599], dtype=float32), 0.14881974), (array([ 0.99194461], dtype=float32), 0.14022195), (array([ 0.99188477], dtype=float32), 0.13153733), (array([ 0.99186623], dtype=float32), 0.12276498), (array([ 0.99162859], dtype=float32), 0.11390402), (array([ 0.99170578], dtype=float32), 0.10495356), (array([ 0.99173892], dtype=float32), 0.095912687), (array([ 0.99164236], dtype=float32), 0.086780496), (array([ 0.99165809], dtype=float32), 0.077556051), (array([ 0.99149144], dtype=float32), 0.068238437), (array([ 0.99150455], dtype=float32), 0.058826704), (array([ 0.9915275], dtype=float32), 0.

[array([33,  1], dtype=int32), array([33,  1], dtype=int32), array([33], dtype=int32)]
(33, 1) (33,)
loss 0.00570924 new_prob and rewards:  [(array([ 0.99268454], dtype=float32), -0.44996068), (array([ 0.99267364], dtype=float32), -0.46460673), (array([ 0.99252802], dtype=float32), -0.47940075), (array([ 0.99248701], dtype=float32), -0.4943442), (array([ 0.99232614], dtype=float32), -0.50943857), (array([ 0.99235296], dtype=float32), -0.52468544), (array([ 0.99240738], dtype=float32), -0.54008627), (array([ 0.99233603], dtype=float32), -0.55564272), (array([ 0.99228412], dtype=float32), -0.5713563), (array([ 0.99210703], dtype=float32), -0.5872286), (array([ 0.99213237], dtype=float32), -0.60326117), (array([ 0.99222428], dtype=float32), -0.61945575), (array([ 0.99214065], dtype=float32), -0.63581389), (array([ 0.99216831], dtype=float32), -0.65233725), (array([ 0.99199837], dtype=float32), -0.66902751), (array([ 0.99202013], dtype=float32), -0.68588638), (array([ 0.99204999], dtype=fl

[[-4.77057791]] [[ 0.00840425]]
[[-4.78327894]] [[ 0.00829906]]
[[-4.75501442]] [[ 0.00853495]]
[[-4.75831652]] [[ 0.00850705]]
[[-4.74451256]] [[ 0.00862428]]
[[-4.74946404]] [[ 0.00858204]]
[[-4.75399494]] [[ 0.00854358]]
[[-4.74410009]] [[ 0.0086278]]
[[-4.7424202]] [[ 0.00864218]]
[[-4.72632837]] [[ 0.00878115]]
-1
Game 4 over; alive frames: 41
[[-4.72775841]] [[ 0.00876871]]
[[-4.94114017]] [[ 0.00709574]]
[[-4.93678951]] [[ 0.00712645]]
[[-4.9137044]] [[ 0.00729167]]
[[-4.90965605]] [[ 0.00732103]]
[[-4.88498831]] [[ 0.0075025]]
[[-4.89063692]] [[ 0.00746056]]
[[-4.90054083]] [[ 0.00738757]]
[[-4.89212275]] [[ 0.00744956]]
[[-4.88883543]] [[ 0.00747391]]
[[-4.85890436]] [[ 0.00769924]]
[[-4.86725712]] [[ 0.00763569]]
[[-4.87187338]] [[ 0.00760079]]
[[-4.8617444]] [[ 0.00767757]]
[[-4.86048603]] [[ 0.00768717]]
[[-4.84036922]] [[ 0.00784215]]
[[-4.84124517]] [[ 0.00783534]]
[[-4.84743786]] [[ 0.00778734]]
[[-4.84316587]] [[ 0.00782042]]
[[-4.84390402]] [[ 0.00781469]]
[[-4.8258624

[array([41,  1], dtype=int32), array([41,  1], dtype=int32), array([41], dtype=int32)]
(41, 1) (41,)
loss 0.00485305 new_prob and rewards:  [(array([ 0.99337339], dtype=float32), -0.33794352), (array([ 0.99334443], dtype=float32), -0.3514581), (array([ 0.99318761], dtype=float32), -0.36510921), (array([ 0.99315804], dtype=float32), -0.37889817), (array([ 0.99298269], dtype=float32), -0.39282644), (array([ 0.99301964], dtype=float32), -0.4068954), (array([ 0.99308664], dtype=float32), -0.42110646), (array([ 0.99302858], dtype=float32), -0.43546107), (array([ 0.99300909], dtype=float32), -0.44996068), (array([ 0.99279785], dtype=float32), -0.46460673), (array([ 0.9928565], dtype=float32), -0.47940075), (array([ 0.99288809], dtype=float32), -0.4943442), (array([ 0.99281633], dtype=float32), -0.50943857), (array([ 0.99280989], dtype=float32), -0.52468544), (array([ 0.99267215], dtype=float32), -0.54008627), (array([ 0.99267888], dtype=float32), -0.55564272), (array([ 0.99272269], dtype=flo

[array([41,  1], dtype=int32), array([41,  1], dtype=int32), array([41], dtype=int32)]
(41, 1) (41,)
loss 0.00454623 new_prob and rewards:  [(array([ 0.99379081], dtype=float32), -0.33794352), (array([ 0.99376345], dtype=float32), -0.3514581), (array([ 0.99361473], dtype=float32), -0.36510921), (array([ 0.99358535], dtype=float32), -0.37889817), (array([ 0.99340504], dtype=float32), -0.39282644), (array([ 0.99343908], dtype=float32), -0.4068954), (array([ 0.9935059], dtype=float32), -0.42110646), (array([ 0.99343944], dtype=float32), -0.43546107), (array([ 0.99342328], dtype=float32), -0.44996068), (array([ 0.99324703], dtype=float32), -0.46460673), (array([ 0.99330765], dtype=float32), -0.47940075), (array([ 0.9933449], dtype=float32), -0.4943442), (array([ 0.99328941], dtype=float32), -0.50943857), (array([ 0.99327409], dtype=float32), -0.52468544), (array([ 0.99313676], dtype=float32), -0.54008627), (array([ 0.99314332], dtype=float32), -0.55564272), (array([ 0.99318421], dtype=floa

[array([41,  1], dtype=int32), array([41,  1], dtype=int32), array([41], dtype=int32)]
(41, 1) (41,)
loss 0.00427551 new_prob and rewards:  [(array([ 0.99416387], dtype=float32), -0.33794352), (array([ 0.99413794], dtype=float32), -0.3514581), (array([ 0.99401629], dtype=float32), -0.36510921), (array([ 0.99401623], dtype=float32), -0.37889817), (array([ 0.993837], dtype=float32), -0.39282644), (array([ 0.99386925], dtype=float32), -0.4068954), (array([ 0.9939099], dtype=float32), -0.42110646), (array([ 0.99380243], dtype=float32), -0.43546107), (array([ 0.99379867], dtype=float32), -0.44996068), (array([ 0.99367076], dtype=float32), -0.46460673), (array([ 0.99372953], dtype=float32), -0.47940075), (array([ 0.99374938], dtype=float32), -0.4943442), (array([ 0.99370354], dtype=float32), -0.50943857), (array([ 0.99369663], dtype=float32), -0.52468544), (array([ 0.99357975], dtype=float32), -0.54008627), (array([ 0.99358994], dtype=float32), -0.55564272), (array([ 0.99362123], dtype=float

[array([33,  1], dtype=int32), array([33,  1], dtype=int32), array([33], dtype=int32)]
(33, 1) (33,)
loss 0.00431492 new_prob and rewards:  [(array([ 0.99449891], dtype=float32), -0.44996068), (array([ 0.9944908], dtype=float32), -0.46460673), (array([ 0.99436814], dtype=float32), -0.47940075), (array([ 0.99435097], dtype=float32), -0.4943442), (array([ 0.99420786], dtype=float32), -0.50943857), (array([ 0.99422449], dtype=float32), -0.52468544), (array([ 0.99425805], dtype=float32), -0.54008627), (array([ 0.99417645], dtype=float32), -0.55564272), (array([ 0.99414623], dtype=float32), -0.5713563), (array([ 0.99403358], dtype=float32), -0.5872286), (array([ 0.99406195], dtype=float32), -0.60326117), (array([ 0.99413472], dtype=float32), -0.61945575), (array([ 0.9940818], dtype=float32), -0.63581389), (array([ 0.99410623], dtype=float32), -0.65233725), (array([ 0.99399006], dtype=float32), -0.66902751), (array([ 0.99400854], dtype=float32), -0.68588638), (array([ 0.99403328], dtype=floa

INFO:tensorflow:Restoring parameters from ./models/trained_flappy
[[-5.25839901]] [[ 0.00517669]]
[[-5.25839996]] [[ 0.00517669]]
[[-5.25839996]] [[ 0.00517669]]
[[-5.25691891]] [[ 0.00518432]]
[[-5.23324585]] [[ 0.00530785]]
[[-5.225914]] [[ 0.0053467]]
[[-5.20719147]] [[ 0.0054472]]
[[-5.21090412]] [[ 0.00542712]]
[[-5.21944141]] [[ 0.00538124]]
[[-5.21276474]] [[ 0.00541709]]
[[-5.20410347]] [[ 0.00546395]]
[[-5.17606735]] [[ 0.00561844]]
[[-5.17835617]] [[ 0.00560566]]
[[-5.19119453]] [[ 0.00553455]]
[[-5.18189812]] [[ 0.00558596]]
[[-5.18640995]] [[ 0.00556095]]
[[-5.16468239]] [[ 0.0056824]]
[[-5.16607285]] [[ 0.00567455]]
[[-5.16983509]] [[ 0.00565336]]
[[-5.16514397]] [[ 0.0056798]]
[[-5.16148663]] [[ 0.00570049]]
[[-5.14134979]] [[ 0.00581577]]
[[-5.14056873]] [[ 0.00582029]]
[[-5.1475668]] [[ 0.00577993]]
[[-5.13473415]] [[ 0.00585414]]
[[-5.12220383]] [[ 0.00592752]]
[[-5.10190773]] [[ 0.00604832]]
[[-5.10068607]] [[ 0.00605567]]
[[-5.11420727]] [[ 0.00597483]]
[[-5.10066032

loss 0.00375382 new_prob and rewards:  [(array([ 0.99488002], dtype=float32), -0.33794352), (array([ 0.99485695], dtype=float32), -0.3514581), (array([ 0.9947384], dtype=float32), -0.36510921), (array([ 0.9947108], dtype=float32), -0.37889817), (array([ 0.99454349], dtype=float32), -0.39282644), (array([ 0.99456948], dtype=float32), -0.4068954), (array([ 0.99463171], dtype=float32), -0.42110646), (array([ 0.99457884], dtype=float32), -0.43546107), (array([ 0.99456489], dtype=float32), -0.44996068), (array([ 0.99443847], dtype=float32), -0.46460673), (array([ 0.99448574], dtype=float32), -0.47940075), (array([ 0.99451655], dtype=float32), -0.4943442), (array([ 0.99445879], dtype=float32), -0.50943857), (array([ 0.99443924], dtype=float32), -0.52468544), (array([ 0.99433595], dtype=float32), -0.54008627), (array([ 0.99434274], dtype=float32), -0.55564272), (array([ 0.99438304], dtype=float32), -0.5713563), (array([ 0.99436063], dtype=float32), -0.5872286), (array([ 0.99435484], dtype=flo

[array([61,  1], dtype=int32), array([61,  1], dtype=int32), array([61], dtype=int32)]
(61, 1) (61,)
loss 0.0029696 new_prob and rewards:  [(array([ 0.99509579], dtype=float32), -0.094313294), (array([ 0.99507356], dtype=float32), -0.10536697), (array([ 0.99495417], dtype=float32), -0.11653229), (array([ 0.99493861], dtype=float32), -0.12781039), (array([ 0.99480999], dtype=float32), -0.13920242), (array([ 0.99485505], dtype=float32), -0.15070951), (array([ 0.99491113], dtype=float32), -0.16233283), (array([ 0.99484587], dtype=float32), -0.17407358), (array([ 0.99486333], dtype=float32), -0.1859329), (array([ 0.9946993], dtype=float32), -0.19791202), (array([ 0.99471056], dtype=float32), -0.21001214), (array([ 0.99475175], dtype=float32), -0.22223449), (array([ 0.99468744], dtype=float32), -0.23458029), (array([ 0.99470276], dtype=float32), -0.24705079), (array([ 0.99459606], dtype=float32), -0.25964728), (array([ 0.99461472], dtype=float32), -0.27237099), (array([ 0.99466085], dtype=f

(61, 1) (61,)
loss 0.00289645 new_prob and rewards:  [(array([ 0.99521995], dtype=float32), -0.094313294), (array([ 0.99519819], dtype=float32), -0.10536697), (array([ 0.99508142], dtype=float32), -0.11653229), (array([ 0.99506617], dtype=float32), -0.12781039), (array([ 0.99494028], dtype=float32), -0.13920242), (array([ 0.99498433], dtype=float32), -0.15070951), (array([ 0.99503922], dtype=float32), -0.16233283), (array([ 0.99497539], dtype=float32), -0.17407358), (array([ 0.99499249], dtype=float32), -0.1859329), (array([ 0.99483198], dtype=float32), -0.19791202), (array([ 0.99484301], dtype=float32), -0.21001214), (array([ 0.99488336], dtype=float32), -0.22223449), (array([ 0.99482036], dtype=float32), -0.23458029), (array([ 0.99483532], dtype=float32), -0.24705079), (array([ 0.99473089], dtype=float32), -0.25964728), (array([ 0.99474913], dtype=float32), -0.27237099), (array([ 0.99479431], dtype=float32), -0.28522322), (array([ 0.99471968], dtype=float32), -0.29820526), (array([ 0

[array([67,  1], dtype=int32), array([67,  1], dtype=int32), array([67], dtype=int32)]
(67, 1) (67,)
loss 0.00259991 new_prob and rewards:  [(array([ 0.9953714], dtype=float32), -0.030274246), (array([ 0.9953503], dtype=float32), -0.040681057), (array([ 0.99524379], dtype=float32), -0.051192984), (array([ 0.99522793], dtype=float32), -0.061811097), (array([ 0.9951058], dtype=float32), -0.072536461), (array([ 0.99513054], dtype=float32), -0.083370164), (array([ 0.99517685], dtype=float32), -0.094313294), (array([ 0.99510932], dtype=float32), -0.10536697), (array([ 0.99510145], dtype=float32), -0.11653229), (array([ 0.99497473], dtype=float32), -0.12781039), (array([ 0.99502045], dtype=float32), -0.13920242), (array([ 0.99504572], dtype=float32), -0.15070951), (array([ 0.99499458], dtype=float32), -0.16233283), (array([ 0.99499714], dtype=float32), -0.17407358), (array([ 0.99490368], dtype=float32), -0.1859329), (array([ 0.99491036], dtype=float32), -0.19791202), (array([ 0.99493659], dt

[array([41,  1], dtype=int32), array([41,  1], dtype=int32), array([41], dtype=int32)]
(41, 1) (41,)
loss 0.00330375 new_prob and rewards:  [(array([ 0.99552017], dtype=float32), -0.33794352), (array([ 0.99549973], dtype=float32), -0.3514581), (array([ 0.99538326], dtype=float32), -0.36510921), (array([ 0.99535716], dtype=float32), -0.37889817), (array([ 0.99519926], dtype=float32), -0.39282644), (array([ 0.99522334], dtype=float32), -0.4068954), (array([ 0.99528062], dtype=float32), -0.42110646), (array([ 0.99523503], dtype=float32), -0.43546107), (array([ 0.99521863], dtype=float32), -0.44996068), (array([ 0.99511039], dtype=float32), -0.46460673), (array([ 0.99515092], dtype=float32), -0.47940075), (array([ 0.99517989], dtype=float32), -0.4943442), (array([ 0.99512345], dtype=float32), -0.50943857), (array([ 0.99510598], dtype=float32), -0.52468544), (array([ 0.99501014], dtype=float32), -0.54008627), (array([ 0.99501473], dtype=float32), -0.55564272), (array([ 0.99505186], dtype=fl

[array([33,  1], dtype=int32), array([33,  1], dtype=int32), array([33], dtype=int32)]
(33, 1) (33,)
loss 0.00338743 new_prob and rewards:  [(array([ 0.99569184], dtype=float32), -0.44996068), (array([ 0.99568552], dtype=float32), -0.46460673), (array([ 0.99557954], dtype=float32), -0.47940075), (array([ 0.99554634], dtype=float32), -0.4943442), (array([ 0.99546003], dtype=float32), -0.50943857), (array([ 0.99547708], dtype=float32), -0.52468544), (array([ 0.99551654], dtype=float32), -0.54008627), (array([ 0.99548537], dtype=float32), -0.55564272), (array([ 0.99544561], dtype=float32), -0.5713563), (array([ 0.99531335], dtype=float32), -0.5872286), (array([ 0.99532455], dtype=float32), -0.60326117), (array([ 0.99538547], dtype=float32), -0.61945575), (array([ 0.99534082], dtype=float32), -0.63581389), (array([ 0.99536216], dtype=float32), -0.65233725), (array([ 0.99525785], dtype=float32), -0.66902751), (array([ 0.99526459], dtype=float32), -0.68588638), (array([ 0.99528301], dtype=fl

INFO:tensorflow:Restoring parameters from ./models/trained_flappy
[[-5.49369764]] [[ 0.00409576]]
[[-5.49369764]] [[ 0.00409576]]
[[-5.49369907]] [[ 0.00409576]]
[[-5.49222994]] [[ 0.00410176]]
[[-5.47053671]] [[ 0.00419133]]
[[-5.46584606]] [[ 0.00421095]]
[[-5.43809175]] [[ 0.00432895]]
[[-5.44201756]] [[ 0.00431206]]
[[-5.45005512]] [[ 0.00427769]]
[[-5.44002485]] [[ 0.00432063]]
[[-5.43370581]] [[ 0.0043479]]
[[-5.40949965]] [[ 0.00445395]]
[[-5.41296721]] [[ 0.0044386]]
[[-5.42635489]] [[ 0.00437983]]
[[-5.42014647]] [[ 0.00440699]]
[[-5.42373085]] [[ 0.00439129]]
[[-5.39955997]] [[ 0.00449824]]
[[-5.40102291]] [[ 0.0044917]]
[[-5.40535784]] [[ 0.00447235]]
[[-5.40018845]] [[ 0.00449543]]
[[-5.39608717]] [[ 0.00451382]]
[[-5.36856508]] [[ 0.00463919]]
[[-5.36670732]] [[ 0.00464778]]
[[-5.37456322]] [[ 0.00461158]]
[[-5.35849762]] [[ 0.00468591]]
[[-5.34750509]] [[ 0.00473746]]
[[-5.32324171]] [[ 0.00485325]]
[[-5.32469654]] [[ 0.00484623]]
[[-5.34102964]] [[ 0.00476809]]
[[-5.3215